From b058aae1b690ad2a1a8dddc9b9421b51266c7859 Mon Sep 17 00:00:00 2001
From: ojwb
Date: Fri, 29 Sep 2023 05:04:13 +0000
Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20snowball?=
=?UTF-8?q?stem/snowball-website@49453106689db279f61acb80f61da99deb9cdc27?=
=?UTF-8?q?=20=F0=9F=9A=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
algorithms/armenian/stemmer.tt | 54 -
algorithms/basque/stemmer.tt | 24 -
algorithms/catalan/stemmer.tt | 33 -
algorithms/czech/stemmer.tt | 16 -
algorithms/danish/stemmer.tt | 132 -
algorithms/dutch/stemmer.tt | 103 -
algorithms/english/stemmer.tt | 583 ----
algorithms/estonian/stemmer.tt | 485 ---
algorithms/finnish/stemmer.tt | 212 --
algorithms/french/stemmer.tt | 301 --
algorithms/german/stemmer.tt | 146 -
algorithms/german2/stemmer.tt | 22 -
algorithms/germanic.tt | 113 -
algorithms/greek/stemmer.tt | 27 -
algorithms/hindi/stemmer.tt | 83 -
algorithms/hungarian/stemmer.tt | 243 --
algorithms/index.tt | 92 -
algorithms/indonesian/stemmer.tt | 124 -
algorithms/irish/stemmer.tt | 52 -
algorithms/italian/stemmer.tt | 185 -
algorithms/kraaij_pohlmann/stemmer.tt | 69 -
algorithms/lithuanian/stemmer.tt | 22 -
algorithms/lovins/festschrift.tt | 1740 ----------
algorithms/lovins/stemmer.tt | 332 --
algorithms/norwegian/stemmer.tt | 109 -
algorithms/porter/stemmer.tt | 656 ----
algorithms/portuguese/stemmer.tt | 175 -
algorithms/romance.tt | 106 -
algorithms/romanian/stemmer.tt | 224 --
algorithms/russian/stemmer.tt | 327 --
algorithms/scandinavian.tt | 34 -
algorithms/serbian/stemmer.tt | 273 --
algorithms/spanish/stemmer.tt | 205 --
algorithms/swedish/stemmer.tt | 107 -
algorithms/turkish/stemmer.tt | 47 -
algorithms/yiddish/stemmer.tt | 202 --
codesets/guide.tt | 93 -
compiler/snowman.tt | 1558 ---------
js/arabic-stemmer.js | 1613 ---------
js/armenian-stemmer.js | 391 ---
js/base-stemmer.js | 296 --
js/basque-stemmer.js | 849 -----
js/catalan-stemmer.js | 927 -----
js/danish-stemmer.js | 312 --
js/dutch-stemmer.js | 671 ----
js/english-stemmer.js | 1086 ------
js/estonian-stemmer.js | 1112 ------
js/finnish-stemmer.js | 788 -----
js/french-stemmer.js | 1311 -------
js/german-stemmer.js | 591 ----
js/greek-stemmer.js | 2870 ----------------
js/hindi-stemmer.js | 183 -
js/hungarian-stemmer.js | 709 ----
js/indonesian-stemmer.js | 422 ---
js/irish-stemmer.js | 419 ---
js/italian-stemmer.js | 1008 ------
js/lithuanian-stemmer.js | 558 ---
js/nepali-stemmer.js | 319 --
js/norwegian-stemmer.js | 263 --
js/porter-stemmer.js | 739 ----
js/portuguese-stemmer.js | 896 -----
js/romanian-stemmer.js | 918 -----
js/russian-stemmer.js | 622 ----
js/serbian-stemmer.js | 4557 -------------------------
js/spanish-stemmer.js | 976 ------
js/swedish-stemmer.js | 267 --
js/tamil-stemmer.js | 1190 -------
js/turkish-stemmer.js | 2361 -------------
js/yiddish-stemmer.js | 1173 -------
otherapps/romanian/index.tt | 180 -
otherapps/schinke/index.tt | 168 -
runtime/use.tt | 381 ---
texts/apostrophe.tt | 75 -
texts/earlyenglish.tt | 135 -
texts/glossary.tt | 106 -
texts/howtohelp.tt | 65 -
texts/introduction.tt | 923 -----
texts/r1r2.tt | 78 -
texts/vowelmarking.tt | 74 -
79 files changed, 41891 deletions(-)
delete mode 100644 algorithms/armenian/stemmer.tt
delete mode 100644 algorithms/basque/stemmer.tt
delete mode 100644 algorithms/catalan/stemmer.tt
delete mode 100644 algorithms/czech/stemmer.tt
delete mode 100644 algorithms/danish/stemmer.tt
delete mode 100644 algorithms/dutch/stemmer.tt
delete mode 100644 algorithms/english/stemmer.tt
delete mode 100644 algorithms/estonian/stemmer.tt
delete mode 100644 algorithms/finnish/stemmer.tt
delete mode 100644 algorithms/french/stemmer.tt
delete mode 100644 algorithms/german/stemmer.tt
delete mode 100644 algorithms/german2/stemmer.tt
delete mode 100644 algorithms/germanic.tt
delete mode 100644 algorithms/greek/stemmer.tt
delete mode 100644 algorithms/hindi/stemmer.tt
delete mode 100644 algorithms/hungarian/stemmer.tt
delete mode 100644 algorithms/index.tt
delete mode 100644 algorithms/indonesian/stemmer.tt
delete mode 100644 algorithms/irish/stemmer.tt
delete mode 100644 algorithms/italian/stemmer.tt
delete mode 100644 algorithms/kraaij_pohlmann/stemmer.tt
delete mode 100644 algorithms/lithuanian/stemmer.tt
delete mode 100644 algorithms/lovins/festschrift.tt
delete mode 100644 algorithms/lovins/stemmer.tt
delete mode 100644 algorithms/norwegian/stemmer.tt
delete mode 100644 algorithms/porter/stemmer.tt
delete mode 100644 algorithms/portuguese/stemmer.tt
delete mode 100644 algorithms/romance.tt
delete mode 100644 algorithms/romanian/stemmer.tt
delete mode 100644 algorithms/russian/stemmer.tt
delete mode 100644 algorithms/scandinavian.tt
delete mode 100644 algorithms/serbian/stemmer.tt
delete mode 100644 algorithms/spanish/stemmer.tt
delete mode 100644 algorithms/swedish/stemmer.tt
delete mode 100644 algorithms/turkish/stemmer.tt
delete mode 100644 algorithms/yiddish/stemmer.tt
delete mode 100644 codesets/guide.tt
delete mode 100644 compiler/snowman.tt
delete mode 100644 js/arabic-stemmer.js
delete mode 100644 js/armenian-stemmer.js
delete mode 100644 js/base-stemmer.js
delete mode 100644 js/basque-stemmer.js
delete mode 100644 js/catalan-stemmer.js
delete mode 100644 js/danish-stemmer.js
delete mode 100644 js/dutch-stemmer.js
delete mode 100644 js/english-stemmer.js
delete mode 100644 js/estonian-stemmer.js
delete mode 100644 js/finnish-stemmer.js
delete mode 100644 js/french-stemmer.js
delete mode 100644 js/german-stemmer.js
delete mode 100644 js/greek-stemmer.js
delete mode 100644 js/hindi-stemmer.js
delete mode 100644 js/hungarian-stemmer.js
delete mode 100644 js/indonesian-stemmer.js
delete mode 100644 js/irish-stemmer.js
delete mode 100644 js/italian-stemmer.js
delete mode 100644 js/lithuanian-stemmer.js
delete mode 100644 js/nepali-stemmer.js
delete mode 100644 js/norwegian-stemmer.js
delete mode 100644 js/porter-stemmer.js
delete mode 100644 js/portuguese-stemmer.js
delete mode 100644 js/romanian-stemmer.js
delete mode 100644 js/russian-stemmer.js
delete mode 100644 js/serbian-stemmer.js
delete mode 100644 js/spanish-stemmer.js
delete mode 100644 js/swedish-stemmer.js
delete mode 100644 js/tamil-stemmer.js
delete mode 100644 js/turkish-stemmer.js
delete mode 100644 js/yiddish-stemmer.js
delete mode 100644 otherapps/romanian/index.tt
delete mode 100644 otherapps/schinke/index.tt
delete mode 100644 runtime/use.tt
delete mode 100644 texts/apostrophe.tt
delete mode 100644 texts/earlyenglish.tt
delete mode 100644 texts/glossary.tt
delete mode 100644 texts/howtohelp.tt
delete mode 100644 texts/introduction.tt
delete mode 100644 texts/r1r2.tt
delete mode 100644 texts/vowelmarking.tt
diff --git a/algorithms/armenian/stemmer.tt b/algorithms/armenian/stemmer.tt
deleted file mode 100644
index b57a5d2..0000000
--- a/algorithms/armenian/stemmer.tt
+++ /dev/null
@@ -1,54 +0,0 @@
-[% header('Armenian stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('armenian', 'Armenian') %]
-
-
-[% algorithm_vocab([11, 'աղոթում', 'բանաձևեր']) %]
-
-
-This stemmer for Armenian was developed and contributed by Astghik Mkrtchyan.
-
-
-
-The following characters are vowels for the purposes of this algorithm:
-
-
-
- ա է ի օ ւ ե ո ը
-
-
-
-R2 is the region after the first non-vowel following a vowel after the
-first non-vowel following a vowel, or the end of the word if there is no such
-non-vowel.
-
-
-
-RV has the same definition as in the
- Spanish stemmer.
-
-
-
-The algorithm has a fairly simple structure which only removes suffixes. There
-are four steps, applied in turn:
-
-
-
-- an "ending" is removed, if one is found in R2,
-- a verb suffix is removed, if one is found in RV,
-- an adjective suffix is removed, if one is found in RV,
-- a noun suffix is removed, if one is found in RV.
-
-
-
-See the Snowball implementation of the stemmer below for the lists of suffixes
-each step checks for.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('armenian') %]
-
-[% footer %]
diff --git a/algorithms/basque/stemmer.tt b/algorithms/basque/stemmer.tt
deleted file mode 100644
index 171932c..0000000
--- a/algorithms/basque/stemmer.tt
+++ /dev/null
@@ -1,24 +0,0 @@
-[% header('Basque stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('basque', 'Basque') %]
-
-
-[% algorithm_vocab([12, 'barrutiaren', 'museoak']) %]
-
-
-The following letters are vowels:
-
-
--
- a e i o u
-
-
-
-The stemming algorithm
-
-[% highlight_file('basque') %]
-
-[% footer %]
diff --git a/algorithms/catalan/stemmer.tt b/algorithms/catalan/stemmer.tt
deleted file mode 100644
index d38f8e4..0000000
--- a/algorithms/catalan/stemmer.tt
+++ /dev/null
@@ -1,33 +0,0 @@
-[% header('Catalan stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([12, 'abatuda', 'gore']) %]
-
-
-Letters in Catalan include the following accented forms,
-
-
--
- á é í ó ú à è ì ò ù ü ï
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u á é í ó ú ü ï
-
-
-
-The stemming algorithm
-
-[% highlight_file('catalan') %]
-
-[% footer %]
diff --git a/algorithms/czech/stemmer.tt b/algorithms/czech/stemmer.tt
deleted file mode 100644
index 92d050e..0000000
--- a/algorithms/czech/stemmer.tt
+++ /dev/null
@@ -1,16 +0,0 @@
-[% header('Czech stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('czech', 'Czech') %]
-
-
-
-In March 2012 Jim O’Regan sent us an implementation of Ljiljana
-Dolamic's Czech stemmer.
-
-
-[% highlight_file('czech') %]
-
-[% footer %]
diff --git a/algorithms/danish/stemmer.tt b/algorithms/danish/stemmer.tt
deleted file mode 100644
index 3491cb1..0000000
--- a/algorithms/danish/stemmer.tt
+++ /dev/null
@@ -1,132 +0,0 @@
-[% header('Danish stemming algorithm') %]
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'indtage', 'underste']) %]
-
-The stemming algorithm
-
-
-The Danish alphabet includes the following additional letters,
-
-
--
- æ å ø
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u y æ å ø
-
-
-
-A consonant is defined as a character from ASCII a-z which isn't a vowel
-(originally this was "A consonant is defined as a non-vowel" but since
-2018-11-15 we've changed this definition to avoid the stemmer altering
-alphanumeric codes which end with a repeated digit).
-
-
-
-R2 is not used: R1 is defined in the same way as in the
-German stemmer.
-(See the note on R1 and R2.)
-
-
-
-Define a valid s-ending as one of
-
-
--
-a b c d f g h j k l m n o p r
- t v y z å
-
-
-
-Do each of steps 1, 2, 3 and 4.
-
-
-
-Step 1:
-
--
-
- Search for the longest among the following suffixes in R1, and
- perform the action indicated.
-
-
- - (a)
- hed ethed ered e erede ende erende ene
- erne ere en heden eren er heder erer
- heds es endes erendes enes ernes eres
- ens hedens erens ers ets erets et eret
-
- delete
-
- (b)
- s
-
- delete if preceded by a valid s-ending
-
-
- (Of course the letter of the valid s-ending is
- not necessarily in R1)
-
-
-
-
-Step 2:
-
--
-
- Search for one of the following suffixes in R1, and if found
- delete the last letter.
-
- -
- gd dt gt kt
-
- (For example, friskt → frisk)
-
-
-Step 3:
--
-
- If the word ends igst, remove the final st.
-
-
-
- Search for the longest among the following suffixes in R1, and
- perform the action indicated.
-
-
- - (a)
- ig lig elig els
-
- delete, and then repeat step 2
-
- (b)
- løst
-
- replace with løs
-
-
-
-Step 4: undouble
--
-
- If the word ends with double consonant in R1, remove one of the
- consonants.
-
-
-
- (For example, bestemmelse → bestemmels (step 1)
- → bestemm (step 3a)
- → bestem in this step.)
-
-
-
-The same algorithm in Snowball
-
-[% highlight_file('danish') %]
-
-[% footer %]
diff --git a/algorithms/dutch/stemmer.tt b/algorithms/dutch/stemmer.tt
deleted file mode 100644
index 76d9f33..0000000
--- a/algorithms/dutch/stemmer.tt
+++ /dev/null
@@ -1,103 +0,0 @@
-[% header('Dutch stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'lichaamsziek', 'opgingen']) %]
-
-The stemming algorithm
-
-Dutch includes the following accented forms
--
- ä ë ï ö ü á é í ó ú è
-
-First, remove all umlaut and acute accents. A vowel is then one of,
--
- a e i o u y è
-
-Put initial y, y after a vowel, and
-i between vowels into upper case. R1 and
-R2
-(see the note on R1 and R2)
-are then defined as in German.
-
-Define a valid s-ending as a non-vowel other than j.
-
-Define a valid en-ending as a non-vowel, and not gem.
-
-Define undoubling the ending as removing the last letter if the word ends
-kk, dd or tt.
-
-Do each of steps 1, 2 3 and 4.
-
-Step 1:
--
- Search for the longest among the following suffixes, and perform the
- action indicated
-
-
- - (a) heden
-
- replace with heid if in R1
-
- - (b) en ene
-
- delete if in R1 and preceded by a valid en-ending, and then
- undouble the ending
-
- - (c) s se
-
- delete if in R1 and preceded by a valid s-ending
-
-
-Step 2:
--
- Delete suffix e if in R1 and preceded by a non-vowel, and then undouble
- the ending
-
-Step 3a: heid
--
- delete heid if in R2 and not preceded by c, and treat a preceding
- en as in step 1(b)
-
-Step 3b: d-suffixes (*)
--
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
-
- - end ing
-
- delete if in R2
-
- if preceded by ig, delete if in R2 and not preceded by e, otherwise
- undouble the ending
-
- - ig
-
- delete if in R2 and not preceded by e
-
- - lijk
-
- delete if in R2, and then repeat step 2
-
- - baar
-
- delete if in R2
-
- - bar
-
- delete if in R2 and if step 2 actually removed an e
-
-
-Step 4: undouble vowel
--
- If the words ends CVD, where C is a non-vowel, D is a non-vowel other
- than I, and V is double a, e, o or u, remove one of the vowels from
- V (for example, maan → man, brood → brod).
-
-Finally,
--
- Turn I and Y back into lower case.
-
-
-The same algorithm in Snowball
-
-[% highlight_file('dutch') %]
-
-[% footer %]
diff --git a/algorithms/english/stemmer.tt b/algorithms/english/stemmer.tt
deleted file mode 100644
index 8b35c7a..0000000
--- a/algorithms/english/stemmer.tt
+++ /dev/null
@@ -1,583 +0,0 @@
-[% header('The English (Porter2) stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('english', 'English') %]
-
-
-[% algorithm_vocab([40, 'consign', 'knack']) %]
-
-Developing the English stemmer
-
-
-(Revised slightly, December 2001)
-(Further revised, September 2002)
-
-
-
-I have made more than one attempt to improve the structure of the Porter
-algorithm by making it follow the pattern of ending removal of the Romance
-language stemmers. It is not hard to see why one should want to do this:
-step 1b of the Porter stemmer removes ed and ing, which are
-i-suffixes (*) attached to verbs. If these suffixes are removed, there
-should be no need to remove d-suffixes which are not verbal, although
-it will try to do so. This seems to be a deficiency in the Porter stemmer,
-not shared by the Romance stemmers. Again, the divisions between steps
-2, 3 and 4 seem rather arbitrary, and are not found in the Romance stemmers.
-
-
-
-Nevertheless, these attempts at improvement have been abandoned. They seem
-to lead to a more complicated algorithm with no very obvious improvements.
-A reason for not taking note of the outcome of step 1b may be that
-English endings do not determine word categories quite as strongly as
-endings in the Romance languages. For example, condition and
-position in French have to be nouns, but in English they can be verbs
-as well as nouns,
-
-
- We are all conditioned by advertising
- They are positioning themselves differently today
-
-A possible reason for having separate steps 2, 3 and 4 is that
-d-suffix combinations in English are quite complex, a point which has
-been made
-elsewhere.
-
-
-
-But it is hardly surprising that after twenty years of use of the Porter
-stemmer, certain improvements did suggest themselves, and a new algorithm
-for English is therefore offered here. (It could be called the ‘Porter2’
-stemmer to distinguish it from the Porter stemmer, from which it derives.)
-The changes are not so very extensive: (1) terminating y is changed to
-i rather less often, (2) suffix us does not lose its s, (3) a
-few additional suffixes are included for removal, including (4) suffix
-ly. In addition, a small list of exceptional forms is included. In
-December 2001 there were two further adjustments: (5) Steps 5a and 5b
-of the old Porter stemmer were combined into a single step. This means
-that undoubling final ll is not done with removal of final e. (6)
-In Step 3 ative is removed only when in region R2.
-(7)
-In July
-2005 a small adjustment was made (including a new step 0) to handle
-apostrophe.
-
-
-
-To begin with, here is the basic algorithm without reference to the
-exceptional forms. An exact comparison with the Porter algorithm needs to
-be done quite carefully if done at all. Here we indicate by *
points
-of departure, and by +
additional features. In the sample vocabulary,
-Porter and Porter2 stem slightly under 5% of words to different forms.
-
-
-Definition of the English stemmer
-
-
-Define a vowel as one of
-
-
- a e i o u y
-
-Define a double as one of
--
- bb dd ff gg mm nn pp rr tt
-
-Define a valid li-ending as one of
--
- c d e g h k m n r t
-
-
-R1 is the region after the first non-vowel following a vowel, or the end of
-the word if there is no such non-vowel. (This definition may be modified for certain exceptional
-words — see below.)
-
-
-
-R2 is the region after the first non-vowel following a vowel in R1, or the
-end of the word if there is no such non-vowel.
-(See note on R1 and R2.)
-
-
-
-Define a short syllable in a word as either (a) a vowel followed by a
-non-vowel other than w, x or Y and preceded by a non-vowel, or
-*
-(b) a vowel at the beginning of the word followed by a non-vowel.
-
-
-
-So rap,
-trap, entrap end with a short syllable, and ow, on, at are
-classed as short syllables. But uproot, bestow, disturb do not end with a
-short syllable.
-
-
-
-A word is called short if it ends in a short syllable, and if R1 is null.
-
-
-
-So bed, shed and shred are short words, bead, embed, beds are
-not short words.
-
-
-
-An apostrophe (') may be regarded as a letter.
-(See note on apostrophes in English.)
-
-
-
-If the word has two letters or less, leave it as it is.
-
-
-
-Otherwise, do each of the following operations,
-
-
-
-Remove initial ', if present. +
Then,
-
-
-
-Set initial y, or y after a vowel, to Y, and then establish the regions
-R1 and R2.
-(See note on vowel marking.)
-
-
-
-Step 0: +
-
-
-
- Search for the longest among the suffixes,
-
-
- - '
-
- 's
-
- 's'
-
- and remove if found.
-
-
-Step 1a:
--
-
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
-
- - sses
-
- replace by ss
-
- ied
+
ies*
- - replace by i if preceded by more than one letter, otherwise by ie
- (so ties → tie, cries → cri)
-
- s
-
- delete if the preceding word part contains a vowel not immediately before the
-s (so gas and this retain the s, gaps and kiwis lose it)
-
- us
+
ss
- - do nothing
-
-
-
-Step 1b:
--
-
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
-
- - eed eedly
+
- - replace by ee if in R1
-
- ed edly
+
ing ingly+
- - delete if the preceding word part contains a vowel, and after the deletion:
-
- if the word ends at, bl or iz add e (so luxuriat → luxuriate), or
-
- if the word ends with a double
- remove the last letter (so hopp → hop), or
-
- if the word is short, add e (so hop → hope)
-
-
-
-Step 1c: *
--
- replace suffix y or Y by i if preceded by a non-vowel which is not the
- first letter of the word (so cry → cri, by → by, say → say)
-
-
-Step 2:
--
-
- Search for the longest among the following suffixes, and, if
- found and in R1, perform the action indicated.
-
-
- - tional: replace by tion
-
- enci: replace by ence
-
- anci: replace by ance
-
- abli: replace by able
-
- entli: replace by ent
-
- izer ization: replace by ize
-
- ational ation ator: replace by ate
-
- alism aliti alli: replace by al
-
- fulness: replace by ful
-
- ousli ousness: replace by ous
-
- iveness iviti: replace by ive
-
- biliti bli
+
: replace by ble
- - ogi
+
: replace by og if preceded by l
- - fulli
+
: replace by ful
- - lessli
+
: replace by less
- - li
+
: delete if preceded by a valid li-ending
-
-
-
-Step 3:
--
-
- Search for the longest among the following suffixes, and, if
- found and in R1, perform the action indicated.
-
-
- - tional
+
: replace by tion
- - ational
+
: replace by ate
- - alize: replace by al
-
- icate iciti ical: replace by ic
-
- ful ness: delete
-
- ative
*
: delete if in R2
-
-
-
-Step 4:
--
-
- Search for the longest among the following suffixes, and, if
- found and in R2, perform the action indicated.
-
-
- - al ance ence er ic able ible ant ement
- ment ent ism ate iti ous ive ize
-
- delete
-
- ion
-
- delete if preceded by s or t
-
-
-
-Step 5: *
--
-
- Search for the following suffixes, and, if
- found, perform the action indicated.
-
-
- - e
-
- delete if in R2, or in R1 and not preceded by a short
- syllable
-
- l
-
- delete if in R2 and preceded by l
-
-
-
-Finally, turn any remaining Y letters in the word back into lower case.
-
-
-Exceptional forms in general
-
-
-It is quite easy to expand a Snowball script so that certain exceptional
-word forms get special treatment. The standard case is that certain words
-W1
, W2
..., instead of passing through the stemming process, are
-mapped to the forms X1
, X2
... respectively. If the script does
-the stemming by means of the call
-
-[% highlight("
- define stem as C
-") %]
-
-where C
is a command, the exceptional cases can be dealt with by extending this to
-
-[% highlight("
- define stem as ( exception or C )
-") %]
-
-and putting in a routine exception
:
-
-
- define exception as (
- [substring] atlimit among(
- 'W1' ( <- 'X1' )
- 'W2' ( <- 'X2' )
- ...
- )
- )
-
-
-atlimit
causes the whole string to be tested for equality with one of
-the Wi
, and if a match is found, the string is replaced with
-Xi
.
-
-
-
-More precisely we might have a group of words W11
, W12
...
-that need to be mapped to X1
, another group W21
, W22
-... that need to be mapped to X2
, and so on, and a list of words
-V1
, V2
... Vk
that are to remain invariant. The
-exception
routine may then be written as follows:
-
-
- among( 'W11' 'W12' ... (<- 'X1')
- 'W21' 'W22' ... (<- 'X2')
- ...
- 'Wn1' 'Wn2' ... (<- 'Xn')
- 'V1' 'V2' ... 'Vk'
- )
-
-
-And indeed the exception1
routine for the English stemmer has just that
-shape:
-
-[% highlight("
- define exception1 as (
-
- [substring] atlimit among(
-
- /* special changes: */
-
- 'skis' (<-'ski')
- 'skies' (<-'sky')
- 'dying' (<-'die')
- 'lying' (<-'lie')
- 'tying' (<-'tie')
-
- /* special -LY cases */
-
- 'idly' (<-'idl')
- 'gently' (<-'gentl')
- 'ugly' (<-'ugli')
- 'early' (<-'earli')
- 'only' (<-'onli')
- 'singly' (<-'singl')
-
- // ... extensions possible here ...
-
- /* invariant forms: */
-
- 'sky'
- 'news'
- 'howe'
-
- 'atlas' 'cosmos' 'bias' 'andes' // not plural forms
-
- // ... extensions possible here ...
- )
- )
-") %]
-
-
-(More will be said about the words that appear here shortly.)
-
-
-
-Here we see words being treated exceptionally before stemming is done, but equally we could
-treat stems exceptionally after stemming is done, and so, if we wish, map absorpt to
-absorb, reduct to reduc etc., as in the
-Lovins stemmer.
-But more generally, throughout the algorithm, each significant step may have recognised
-exceptions, and a suitably placed among
will take care of them. For example, a point made
-at least twice in the literature is that words beginning gener are overstemmed by the
-Porter stemmer:
-
-
-
- generate
- generates
- generated
- generating
- general
- generally
- generic
- generically
- generous
- generously | | → | | gener
-
- |
-
-
-To fix this over-stemming, we make an exception to the usual setting of p1,
-the left point of R1, and therefore replace
-
-
-[% highlight("
- gopast v gopast non-v setmark p1
-") %]
-
-
-with
-
-
-[% highlight("
- among (
- 'gener'
- // ... and other stems may be included here ...
- ) or (gopast v gopast non-v)
- setmark p
-") %]
-
-
-after which the words beginning gener stem as follows:
-
-
-
- generate
- generates
- generated
- generating
- | | → | | generat
-
- |
general
- generally
- | | → | | general
- |
generic
- generically
- | | → | | generic
- |
generous
- generously
- | | → | | generous
- |
-
-
-Another example is given by the exception2
routine, which is similar to exception1
,
-but placed after the call of Step_1a
, which may have removed terminal s,
-
-
-[% highlight("
- define exception2 as (
-
- [substring] atlimit among(
- 'inning' 'outing' 'canning' 'herring'
- 'proceed' 'exceed' 'succeed'
-
- // ... extensions possible here ...
-
- )
- )
-") %]
-
-
-Snowball makes it easy therefore to add in lists of exceptions. But deciding what the lists of
-exceptions should be is far from easy. Essentially there are two lines of attack, the
-systematic and the piecemeal. One might systematically treat as exceptions the stem changes of
-irregular verbs, for example. The piecemeal approach is to add in exceptions as people notice
-them — like gener above. The problem with the systematic approach is that it should be
-done by investigating the entire language vocabulary, and that is more than most people are
-prepared to do. The problem with the piecemeal approach is that it is arbitrary, and usually
-yields little.
-
-
-
-The exception lists in the English stemmer are meant to be illustrative (‘this is how it is done if you
-want to do it’), and were derived piecemeal.
-
-
-
-a)
-The new stemmer improves on the Porter stemmer in handling short words ending e and
-y. There is however a mishandling of the four forms sky, skies, ski,
-skis, which is easily corrected by treating three of these words as
-special cases.
-
-
-
-b)
-Similarly there is a problem with the ing form of three letter verbs ending ie. There
-are only three such verbs: die, lie and tie, so a special case is made for
-dying, lying and tying.
-
-
-
-c)
-One has to be a little careful of certain ing forms.
-inning, outing, canning, which one does not wish
-to be stemmed to
-in, out, can.
-
-
-
-d)
-The removal of suffix ly, which is not in the Porter stemmer, has a number of exceptions.
-Certain short-word exceptions are idly, gently, ugly, early, only, singly.
-Rarer words (bristly, burly, curly, surly ...) are not included.
-
-
-
-e)
-The remaining words were included following complaints from users of the Porter algorithm.
-news is not the plural of new (noticed when IR systems were being set up for
-Reuters). Howe is a surname, and needs to be separated from how (noticed when
-doing a search for ‘Sir Geoffrey Howe’ in a demonstration at the House of Commons).
-succeed etc are not past participles, so the ed should not be removed (pointed out
-to me in an email from India). herring should not stem to her (another email from
-Russia).
-
-
-
-f)
-Finally, a few non-plural words ending s have been added.
-
-
-
-Incidentally, this illustrates how much feedback to expect from the real users of a stemming
-algorithm: seven or eight words in twenty years!
-
-
-
-The definition of the English stemmer above is therefore supplemented by the following:
-
-
-Exceptional forms in the English stemmer
-
--
-
- If the words begins gener, commun or arsen, set R1 to be the remainder of the
- word.
-
-
-
- Stem certain special words as follows,
-
-
-
- skis | | → | | ski
- |
skies | | → | | sky
-
- |
dying lying tying
- | | →
- | | die lie tie
-
-
- |
idly gently ugly early only singly
- | | →
- | | idl gentl ugli earli onli singl
-
- |
-
-
- If one of the following is found, leave it invariant,
-
-
-
- sky news howe
- |
atlas | | cosmos | | bias | | andes
- |
-
-
- Following step 1a, leave the following invariant,
-
-
-
- inning | | outing | | canning | | herring | | earring
- |
proceed | | exceed | | succeed
- |
-
-
-The full algorithm in Snowball
-
-[% highlight_file('english') %]
-
-[% footer %]
diff --git a/algorithms/estonian/stemmer.tt b/algorithms/estonian/stemmer.tt
deleted file mode 100644
index dec4b74..0000000
--- a/algorithms/estonian/stemmer.tt
+++ /dev/null
@@ -1,485 +0,0 @@
-[% header('Estonian stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('estonian', 'Estonian') %]
-
-
-
-
-
-Here is a sample of vocabulary, with the stemmed forms that will
-be generated with the algorithm.
-
-
-[% algorithm_vocab([
-'raamat',
-'raamatu',
-'raamatut',
-'raamatule',
-'raamatud',
-'raamatute',
-'raamatuid',
-'raamatutele',
-'raamatutestki',
-'hele',
-'heleda',
-'heledat',
-'heledale',
-'heledad',
-'heledate',
-'heledaid',
-'heledatele',
-'heledam',
-'heledama',
-'heledamat',
-'heledamad',
-'heledamate',
-'heledamaid',
-'heledamatelegi',
-'heledaim',
-'heledaima',
-'heledaimat',
-'heledaimale',
-'heledaimad',
-'heledaimate',
-'heledaimaid',
-'heledaimatelt',
-'hobune',
-'hobuse',
-'hobust',
-'hobusele',
-'hobused',
-'hobuste',
-'hobuseid',
-'hobustele',
-], [
-'hüpata',
-'hüppa',
-'hüppaksin',
-'hüppaksid',
-'hüppaks',
-'hüppaksime',
-'hüppaksite',
-'hüppan',
-'hüppad',
-'hüppab',
-'hüppame',
-'hüppate',
-'hüppavad',
-'hüppasin',
-'hüppasid',
-'hüppas',
-'hüppasime',
-'hüppasite',
-'hüpanuksite',
-'hüpatakse',
-'hüpati',
-'hüpanud',
-'hüpanutest',
-'hüpates',
-'hüppavat',
-'hüppavatele',
-'hüppamata',
-'hüppamast',
-'hüljes',
-'hülge',
-'hüljest',
-'hülgesse',
-'hüljeste',
-'hülgeid',
-'hüljestesse',
-'hülgeisse',
-'ohutule',
-'ohutud',
-'ohutuid',
-'ohututele',
-]) %]
-
-
-
-This algorithm is written in collaboration with Estonian text analytics enterprise Texta.
-
-
-
-Letters in Estonian include the following accented forms,
-
-
--
- ä ö õ ü š ž
-
-
-
-The following letters are vowels (V1):
-
-
--
- a e i o u õ ä ö ü
-
-
-
-RV is defined as one of the following:
-
-
--
- a e i u o
-
-
-
-KI is defined as one of the following (letters possible before -ki emphasis):
-
-
--
- k p t g b d s h f š z ž
-
-
-
-GI is defined as one of the following (letters possible before -gi emphasis):
-
-
--
- c j l m n q r v w x a e i o u õ ä ö ü
-
-
-
-R1 in this algorithm is defined as a region after the first consonant preceded by a vowel (laul[nud], mõt[teid], kar[tuleid], saab[as]). If there’s no such region, then R1 is empty (laul[Ø], saun[Ø]). Limitations in steps (such as "if preceded by RV") are not restricted to the R1 region.
-
-
-
-LONGV is defined as one of the following:
-
-
--
- aa ee ii oo uu ää öö üü õõ
-
-
-
-Do step 0. If nothing was changed in step 0, continue with the steps, otherwise stop. Do step 1 and step 2. If nothing was changed in step 2, do steps 3, 4, 5, 6, 7, 8 and 9. If something was changed in step 2, do step 9.
-
-
-
-Step 0: verb_exceptions
-
-
--
- Search for some frequent irregular short verbs which wouldn’t have been found otherwise and give them a chosen stem.
-
-
- - joon jood joob joote joome joovad
-
- replace by joo
-
- jõin jõid jõi jõime jõite
-
- replace by joo
-
- joomata juuakse joodakse juua jooma
-
- replace by joo
-
- saan saad saab saate saame saavad
-
- replace by saa
-
- saaksin saaksid saaks saaksite saaksime
-
- replace by saa
-
- sain said sai saite saime
-
- replace by saa
-
- saamata saadakse saadi saama saada
-
- replace by saa
-
- viin viid viib viite viime viivad
-
- replace by viima
-
- viiksin viiksid viiks viiksite viiksime
-
- replace by viima
-
- viisin viisite viisime
-
- replace by viima
-
- viimata viiakse viidi viima viia
-
- replace by viima
-
- keen keeb keed kees keeme keete keevad
-
- replace by keesi
-
- keeksin keeks keeksid keeksime keeksite
-
- replace by keesi
-
- keemata keema keeta keedakse
-
- replace by keesi
-
- löön lööd lööb lööme lööte löövad
-
- replace by löö
-
- lööksin lööksid lööks lööksime lööksite
-
- replace by löö
-
- löömata lüüakse löödakse löödi lööma lüüa
-
- replace by löö
-
- lõin lõid lõi lõime lõite
-
- replace by lõi
-
- loon lood loob loome loote loovad
-
- replace by loo
-
- looksin looksid looks looksime looksite
-
- replace by loo
-
- loomata luuakse loodi luua looma
-
- replace by loo
-
- käin käib käid käis käime käite käivad
-
- replace by käisi
-
- käiksin käiks käiksid käiksime käiksite
-
- replace by käisi
-
- käimata käiakse käidi käia käima
-
- replace by käisi
-
- söön sööb sööd sööme sööte söövad
-
- replace by söö
-
- sööksin sööks sööksid sööksime sööksite
-
- replace by söö
-
- sõin sõi sõid sõime sõite
-
- replace by söö
-
- söömata süüakse söödakse söödi sööma süüa
-
- replace by söö
-
- toon tood toob toote toome toovad
-
- replace by too
-
- tooksin tooksid tooks tooksite tooksime
-
- replace by too
-
- tõin tõid tõi tõime tõite
-
- replace by too
-
- toomata tuuakse toodi tooma tuua
-
- replace by too
-
- võin võid võib võime võis võite võivad
-
- replace by võisi
-
- võiksin võiksid võiks võiksime võiksite
-
- replace by võisi
-
- võimata võidakse võidi võida võima
-
- replace by võisi
-
- jään jääd jääb jääme jääte jäävad
-
- replace by jääma
-
- jääksin jääksid jääks jääksime jääksite
-
- replace by jääma
-
- jäime jäite jäin jäid jäi
-
- replace by jääma
-
- jäämata jäädakse jääda jääma jäädi
-
- replace by jääma
-
- müün müüd müüb müüs müüme müüte müüvad
-
- replace by müüsi
-
- müüksin müüksid müüks müüksime müüksite
-
- replace by müüsi
-
- müümata müüakse müüdi müüa müüma
-
- replace by müüsi
-
- loeb loen loed loeme loete loevad
-
- replace by luge
-
- loeks loeksin loeksid loeksime loeksite
-
- replace by luge
-
- põen põeb põed põeme põete põevad
-
- replace by põde
-
- põeksin põeks põeksid põeksime põeksite
-
- replace by põde
-
- laon laob laod laome laote laovad
-
- replace by ladu
-
- laoksin laoks laoksid laoksime laoksite
-
- replace by ladu
-
- teeksin teeks teeksid teeksime teeksite
-
- replace by tegi
-
- teen teeb teed teeme teete teevad
-
- replace by tegi
-
- tegemata tehakse tehti tegema teha
-
- replace by tegi
-
- näen näeb näed näeme näete näevad
-
- replace by nägi
-
- näeksin näeks näeksid näeksime näeksite
-
- replace by nägi
-
- nägemata nähakse nähti näha nägema
-
- replace by nägi
-
-
-
-
-
-Step 1: emphasis
-
-
--
- Search for the longest among the following suffixes in R1, and perform the action indicated
-
-
- - Test if there’s at least 4 characters before R1 region. If so, continue this step
-
- gi
-
- if preceded by a character from GI which is not the second character of a long vowel as defined by LONGV, delete
-
- ki
-
- if preceded by KI, delete
-
-
-
-
-Step 2: verb
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
-
- - nuksin nuksime nuksid nuksite
-
- delete
-
- ksin ksid ksime ksite
-
- delete
-
- mata
-
- delete
-
- takse dakse
-
- delete
-
- taks daks
-
- delete
-
- akse
-
- replace with a
-
- sime
-
- delete
-
- site
-
- delete
-
- sin
-
- delete
-
- me
-
- if preceded by V1, delete
-
- da
-
- if preceded by V1, delete
-
- n
-
- if preceded by V1, delete
-
- b
-
- if preceded by V1, delete
-
-
-
-
-Step 3: special_noun_endings
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - lasse
-
- replace by lase
-
- last
-
- replace by lase
-
- lane
-
- replace by lase
-
- lasi
-
- replace by lase
-
- misse
-
- replace by mise
-
- mist
-
- replace by mise
-
- mine
-
- replace by mise
-
- misi
-
- replace by mise
-
- lisse
-
- replace by lise
-
- list
-
- replace by lise
-
- line
-
- replace by lise
-
- lisi
-
- replace by lise
-
-
-
-
-Step 4: case_ending
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - sse if preceded by RV or LONGV
-
- st if preceded by RV or LONGV
-
- le if preceded by RV or LONGV
-
- lt if preceded by RV or LONGV
-
- ga if preceded by RV or LONGV
-
- ks if preceded by RV or LONGV
-
- ta if preceded by RV or LONGV
-
- t if preceded by at least 4 characters
-
- s if preceded by RV or LONGV
-
- l if preceded by RV or LONGV
-
- delete
-
-
-
-
-Step 5: plural_three_first_cases
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - ikkude
-
- replace by iku
-
- ikke
-
- replace by iku
-
- ike
-
- replace by iku
-
- sid
-
- if it is not preceded by LONGV, delete
-
- te
-
- if it doesn't have at least 4 characters before it, replace by t.
-
- Otherwise:
-
- a) if it is preceded by mis, replace with e,
-
- b) if it is preceded by las, replace with e,
-
- c) if it is preceded by lis, replace with e,
-
- if it wasn't replaced with e in steps a)-c) and it isn't preceded by t, delete
-
- de if preceded by RV or LONGV
-
- delete
-
- d if preceded by RV or LONGV
-
- delete
-
-
-
-
-Step 6: degrees
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - mai if preceded by RV
-
- ma
-
- m if preceded by RV
-
- delete
-
-
-
-
-Step 7: i_plural
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - i if preceded by RV
-
- delete
-
-
-
-
-Step 8: nu
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - nu
-
- tu
-
- du
-
- va
-
- delete
-
-
-
-
-Step 9: undouble_kpt
-
-
--
- Undouble consonant if word ending is kk+V1, tt+V1, pp+V1,
- provided the vowel is in R1.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('estonian') %]
-
-[% footer %]
diff --git a/algorithms/finnish/stemmer.tt b/algorithms/finnish/stemmer.tt
deleted file mode 100644
index dd2c0bf..0000000
--- a/algorithms/finnish/stemmer.tt
+++ /dev/null
@@ -1,212 +0,0 @@
-[% header('Finnish stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('finnish', 'Finnish') %]
-
-
-[% algorithm_vocab([40, 'edeltäjien', 'innostu']) %]
-
-
-Finnish is not an Indo-European language, but belongs to the Finno-Ugric
-group, which again belongs to the Uralic group (*). Distinctions between
-a-, i- and d-suffixes can be made in Finnish, but they are much
-less sharply separated than in an Indo-European language. The system of
-endings is extremely elaborate, but strictly defined, and applies equally to
-all nominals, that is, to nouns, adjectives and pronouns. Verb endings have a
-close similarity to nominal endings, which again makes Finnish very different
-from any Indo-European language.
-
-
-
-More problematical than the endings themselves is the change that can be
-effected in a stem as a result of taking a particular ending. A stem typically
-has two forms, strong and weak, where one class of ending follows the
-strong form and the complementary class the weak. Normalising strong and weak
-forms after ending removal is not generally possible, although the common case
-where strong and weak forms only differ in the single or double form of a
-final consonant can be dealt with.
-
-
-
-Finnish includes the following accented forms,
-
-
--
- ä ö
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u y ä ö
-
-
-
-R1 and
-R2 are then defined in the usual way
-(see the note on R1 and R2).
-
-
-
-Do each of steps 1, 2, 3, 4, 5 and 6.
-
-
-
-Step 1: particles etc
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - (a) kin kaan kään ko kö han hän pa pä
-
- delete if preceded by n, t or a vowel
-
- (b) sti
-
- delete if in R2
-
-
-
-
-(Of course, the n, t or vowel of 1(a) need not be in R1: only
-the suffix removed must be in R1. And similarly below.
-
-
-
-Step 2: possessives
-
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
- - si
-
- delete if not preceded by k
-
- ni
-
- delete
-
- if preceded by kse, replace with ksi
-
- nsa nsä mme nne
-
- delete
-
- an
-
- delete if preceded by one of ta ssa sta lla lta na
-
- än
-
- delete if preceded by one of tä ssä stä llä ltä nä
-
- en
-
- delete if preceded by one of lle ine
-
-
-
-
-The remaining steps require a few definitions.
-
-
-
-Define a v (vowel) as one of a e i o u y ä ö.
-
-Define a V (restricted vowel) as one of a e i o u ä ö.
-
-So Vi means a V followed by letter i.
-
-Define LV (long vowel) as one of aa ee ii oo uu ää öö.
-
-Define a c (consonant) as a character from ASCII a-z which isn't in
-v (originally this was "a character other than a v but since
-2018-04-11 we've changed this definition to avoid the stemmer from altering
-sequences of digits).
-
-So cv means a c followed by a v.
-
-
-
-Step 3: cases
-
--
- Search for the longest among the following suffixes in R1, and perform the
- action indicated
-
-
-
- - hXn preceded by X, where X is a V other than u (a/han, e/hen etc)
-
- siin den tten preceded by Vi
-
- seen preceded by LV
-
- a ä preceded by cv
-
- tta ttä preceded by e
-
- ta tä ssa ssä sta stä lla llä lta ltä lle na nä ksi ine
-
- delete
-
- n
-
- delete, and if preceded by LV or ie, delete the last vowel
-
-
-
-
-So aarteisiin → aartei, the longest matching suffix being siin,
-preceded as it is by Vi. But adressiin → adressi. The longest
-matching suffix is not siin, because there is no preceding Vi, but n,
-and then the last vowel of the preceding LV is removed.
-
-
-
-Step 4: other endings
-
-
--
- Search for the longest among the following suffixes in R2, and perform the
- action indicated
-
-
- - mpi mpa mpä mmi mma mmä
-
- delete if not preceded by po
-
- impi impa impä immi imma immä eja ejä
-
- delete
-
-
-
-
-Step 5: plurals
-
-
--
-If an ending was removed in step 3, delete a final i or j if in R1;
-otherwise, if an ending was not removed in step 3, delete a final t in
-R1 if it follows a vowel, and, if a t is removed, delete a final mma or
-imma in R2, unless the mma is preceded by po.
-
-
-
-Step 6: tidying up
-
-
--
-Do in turn steps (a), (b), (c), (d), restricting all tests to the region
-R1.
-
-
-
-a) If R1 ends LV delete the last letter
-b) If R1 ends cX, c a consonant and X one of a ä e i, delete the last
-letter
-c) If R1 ends oj or uj delete the last letter
-d) If R1 ends jo delete the last letter
-
-
-
-Do step (e), which is not restricted to R1.
-
-
-
-e) If the word ends with a double consonant followed by zero or more vowels,
-remove the last consonant (so eläkk → eläk, aatonaatto →
-aatonaato)
-
-
-
-The full algorithm in Snowball
-
-[% highlight_file('finnish') %]
-
-[% footer %]
diff --git a/algorithms/french/stemmer.tt b/algorithms/french/stemmer.tt
deleted file mode 100644
index 4b23e44..0000000
--- a/algorithms/french/stemmer.tt
+++ /dev/null
@@ -1,301 +0,0 @@
-[% header('French stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'continu', 'main']) %]
-
-The stemming algorithm
-
-
-Letters in French include the following accented forms,
-
-
--
- â à ç ë é ê è ï î ô û ù
-
-The following letters are vowels:
--
- a e i o u y â à ë é ê è ï î ô û ù
-
-Assume the word is in lower case. Then, taking the letters in turn from the
-beginning to end of the word, put u or i into upper
-case when it is both preceded and followed by a vowel; put y into
-upper case when it is either preceded or followed by a vowel; and put u into upper case when it follows q. For example,
-
- jouer | | → | | joUer
- |
ennuie | | → | | ennuIe
- |
yeux | | → | | Yeux
- |
quand | | → | | qUand
- |
croyiez | | → | | croYiez
- |
-
-
-In the last example, y becomes Y because it is
-between two vowels, but i does not become I because
-it is between Y and e, and Y is not
-defined as a vowel above.
-
-
-
-(The upper case forms are not then classed as vowels — see note on vowel
-marking.)
-
-
-
-Replace ë and ï with He and Hi. The H
-marks the vowel as having originally had a diaeresis, while the vowel itself, lacking an accent, is able to
-match suffixes beginning in e or i.
-
-
-
-If the word begins with two vowels, RV is the region after the third
-letter, otherwise the region after the first vowel not at the beginning of
-the word, or the end of the word if these positions cannot be found. (Exceptionally,
-par, col or tap, at the beginning of a word is also taken to define
-RV as the region to their right.)
-
-
-
-For example,
-
-
-
- a i m e r a d o r e r v o l e r t a p i s
- |...| |.....| |.....| |...|
-
-
-
-R1 is the region after the first non-vowel following a vowel, or the end of
-the word if there is no such non-vowel.
-
-
-
-R2 is the region after the first non-vowel following a vowel in R1, or the
-end of the word if there is no such non-vowel.
-(See note on R1 and R2.)
-
-
-
-For example:
-
-
-
- f a m e u s e m e n t
- |......R1.......|
- |...R2....|
-
-
-
-Note that R1 can contain RV (adorer), and RV can contain R1 (voler).
-
-
-
-Below, ‘delete if in R2’ means that a found suffix should be removed if it
-lies entirely in R2, but not if it overlaps R2 and the rest of the word.
-‘delete if in R1 and preceded by X’ means that X itself does not have to
-come in R1, while ‘delete if preceded by X in R1’ means that X, like the
-suffix, must be entirely in R1.
-
-
-
-Start with step 1
-
-
-
-Step 1: Standard suffix removal
-
-
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
- - ance iqUe isme able iste eux ances iqUes ismes ables istes
-
- delete if in R2
-
- atrice ateur ation atrices ateurs ations
-
- delete if in R2
-
- if preceded by ic, delete if in R2, else replace by iqU
-
- logie logies
-
- replace with log if in R2
-
- usion ution usions utions
-
- replace with u if in R2
-
- ence ences
-
- replace with ent if in R2
-
- ement ements
-
- delete if in RV
-
- if preceded by iv, delete if in R2 (and if further preceded by at,
- delete if in R2), otherwise,
-
- if preceded by eus, delete if in R2, else replace by eux
- if in R1, otherwise,
-
- if preceded by abl or iqU, delete if in R2, otherwise,
-
- if preceded by ièr or Ièr, replace by i if in RV
-
- ité ités
-
- delete if in R2
-
- if preceded by abil, delete if in R2, else replace by abl,
- otherwise,
-
- if preceded by ic, delete if in R2, else replace by iqU, otherwise,
-
- if preceded by iv, delete if in R2
-
- if ive ifs ives
-
- delete if in R2
-
- if preceded by at, delete if in R2 (and if further preceded by ic,
- delete if in R2, else replace by iqU)
-
- eaux
-
- replace with eau
-
- aux
-
- replace with al if in R1
-
- euse euses
-
- delete if in R2, else replace by eux if in R1
-
- issement issements
-
- delete if in R1 and preceded by a non-vowel
-
- amment
-
- replace with ant if in RV
-
- emment
-
- replace with ent if in RV
-
- ment ments
-
- delete if preceded by a vowel in RV
-
-
-
-
-In steps 2a and 2b all tests are confined to the RV region.
-
-
-
-Do step 2a if either no ending was removed by step 1, or if one of endings
-amment, emment, ment, ments was found.
-
-
-
-Step 2a: Verb suffixes beginning i
-
-
--
- Search for the longest among the following suffixes and if found,
- delete if the preceding character is neither a vowel nor H.
-
-
- îmes ît îtes i ie ies ir ira irai iraIent irais irait iras
- irent irez iriez irions irons iront is issaIent issais issait
- issant issante issantes issants isse issent isses issez issiez
- issions issons it
-
-
- (Note that the preceding character itself must also be in RV.)
-
-
-
-Do step 2b if step 2a was done, but failed to remove a suffix.
-
-
-
-Step 2b: Other verb suffixes
-
-
--
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
- - ions
-
- delete if in R2
-
- é ée ées és èrent er era erai eraIent erais erait eras erez
- eriez erions erons eront ez iez
-
- delete
-
- âmes ât âtes a ai aIent ais ait ant ante antes ants as asse
- assent asses assiez assions
-
- delete
-
- if preceded by e, delete
-
-
- (Note that the e that may be deleted in this last step must also be in
- RV.)
-
-
-
-If the last step to be obeyed — either step 1, 2a or 2b — altered the word,
-do step 3
-
-
-
-Step 3
-
--
- Replace final Y with i or final ç with c
-
-
-Alternatively, if the last step to be obeyed did not alter the word, do
-step 4
-
-
-
-Step 4: Residual suffix
-
-
--
-
- If the word ends s, not preceded by a, i (unless itself preceded by H), o, u, è or s, delete it.
-
-
-
- In the rest of step 4, all tests are confined to the RV region.
-
-
-
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
-
-
- - ion
-
- delete if in R2 and preceded by s or t
-
- ier ière Ier Ière
-
- replace with i
-
- e
-
- delete
-
-
- (So note that ion is removed only when it is in R2 — as well as being
- in RV — and preceded by s or t which must be in RV.)
-
-
-
-Always do steps 5 and 6.
-
-
-
-Step 5: Undouble
-
-
--
- If the word ends enn, onn, ett, ell or eill, delete the last letter
-
-
-
-Step 6: Un-accent
-
-
--
- If the words ends é or è followed by at least one non-vowel, remove
- the accent from the e.
-
-
-
-And finally:
-
-
--
-
- Turn any remaining I, U and Y letters in the word back into lower case.
-
-
-
- Turn He and Hi back into ë and ï, and remove any
- remaining H.
-
-
-
-The same algorithm in Snowball
-
-[% highlight_file('french') %]
-
-[% footer %]
diff --git a/algorithms/german/stemmer.tt b/algorithms/german/stemmer.tt
deleted file mode 100644
index 7220e24..0000000
--- a/algorithms/german/stemmer.tt
+++ /dev/null
@@ -1,146 +0,0 @@
-[% header('German stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'aufeinander', 'kategorie']) %]
-
-The stemming algorithm
-
-
-German includes the following accented forms,
-
-
--
- ä ö ü
-
-
-
-and a special letter, ß, equivalent to double s.
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u y ä ö ü
-
-
-
-First put u and y between vowels into
-upper case, and then do the following mappings,
-
- -
- (a) replace ß with ss,
- (a) replace ae with ä,
- (a) replace oe with ö,
- (a) replace ue with ü unless preceded by q.
-
-
-
-(The rules here for ae, oe and ue were
-added in Snowball 2.3.0, but were previously present as a variant of the
-algorithm termed "german2"). The condition
-on the replacement of ue prevents the unwanted changing of
-quelle. Also note that feuer is not modified because the first
-part of the rule changes it to feUer, so ue is not
-found.)
-
-
-
-R1 and R2 are first set up in the standard way
-(see the note on R1 and R2),
-but then R1 is adjusted so that the region before it contains at least 3 letters.
-
-
-
-Define a valid s-ending as one of b, d, f, g, h, k, l, m, n, r or t.
-
-
-
-Define a valid st-ending as the same list, excluding letter r.
-
-
-
-Do each of steps 1, 2 and 3.
-
-
-
-Step 1:
-
-
- Search for the longest among the following suffixes,
-
-
- (a) em ern er
- (b) e en es
- (c) s (preceded by a valid s-ending)
-
-
- and delete if in R1. (Of course the letter of the valid s-ending is
- not necessarily in R1.) If an ending of group (b) is deleted, and the ending
- is preceded by niss, delete the final s.
-
-
-
- (For example, äckern → äck, ackers → acker,
- armes → arm, bedürfnissen → bedürfnis)
-
-
-Step 2:
--
-
- Search for the longest among the following suffixes,
-
- -
- (a) en er est
- (b) st (preceded by a valid st-ending, itself preceded by at least 3
- letters)
-
-
- and delete if in R1.
-
-
-
- (For example, derbsten → derbst by step 1, and derbst → derb by step
- 2, since b is a valid st-ending, and is preceded by just 3 letters)
-
-
-Step 3: d-suffixes (*)
--
-
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
-
- - end ung
-
- delete if in R2
-
- if preceded by ig, delete if in R2 and not preceded by e
-
- ig ik isch
-
- delete if in R2 and not preceded by e
-
- lich heit
-
- delete if in R2
-
- if preceded by er or en, delete if in R1
-
- keit
-
- delete if in R2
-
- if preceded by lich or ig, delete if in R2
-
-
-
-
-Finally,
-
-
--
- turn U and Y back into lower case, and remove the umlaut accent from a,
- o and u.
-
-
-The same algorithm in Snowball
-
-[% highlight_file('german') %]
-
-[% footer %]
diff --git a/algorithms/german2/stemmer.tt b/algorithms/german2/stemmer.tt
deleted file mode 100644
index 6751bd6..0000000
--- a/algorithms/german2/stemmer.tt
+++ /dev/null
@@ -1,22 +0,0 @@
-[% header('German stemming algorithm variant') %]
-
-Links to resources
-
-
-
-
-We used to present a variant of the main German stemmer, termed "german2" which
-was the same as the German stemmer but adjusted the first step to improve
-handling of input text where the German letters ä,
-ö and ü, were written as ae,
-oe and ue respectively.
-
-
-
-Snowball 2.3.0 added these adjustments to the main German stemmer, so there
-is no longer a "german2" variant - just used the "german" stemmer.
-
-
-[% footer %]
diff --git a/algorithms/germanic.tt b/algorithms/germanic.tt
deleted file mode 100644
index eb52a74..0000000
--- a/algorithms/germanic.tt
+++ /dev/null
@@ -1,113 +0,0 @@
-[% header('Germanic language stemmers') %]
-
-Links to resources
-
-
-
-
-Despite its inflexional complexities, German has quite a simple suffix
-structure, so that, if one ignores the almost intractable problems of
-compound words, separable verb prefixes, and prefixed and infixed ge, an
-algorithmic stemmer can be made quite short. (Infixed zu can be removed
-algorithmically, but this minor feature is not shown here.) The umlaut in
-German is a regular feature of plural formation, so its removal is a
-natural feature of stemming, but this leads to certain false conflations
-(for example, schön, beautiful; schon, already).
-
-
-
-By contrast, Dutch is inflexionally simple, but even so, this does not make
-for any great difference between the stemmers. A feature of Dutch that
-makes it markedly different from German is that the grammar of the written
-language has changed, and continues to change, relatively rapidly, and that
-it has assimilated a large and mixed foreign vocabulary with some of the
-accompanying foreign suffixes. Foreign words may, or may not, be
-transliterated into a Dutch style. Naturally these create problems in
-stemming. The stemmer here is intended for native words of contemporary
-Dutch.
-
-
-
-In a Dutch noun, a vowel may double in the singular form (manen = moons, maan
-= moon). We attempt to solve this by undoubling the double vowel (Kraaij
-Pohlman by contrast attempt to double the single vowel). The endings je,
-tje, pje etc., although extremely common, are not stemmed. They are
-diminutives and can significantly alter word meaning.
-
-
-A note on compound words
-
-
-Famously, German allows for the formation of long compound words, written
-without spaces. For retrieval purposes, it is useful to be able to search
-on the parts of such words, as well as the on the complete words
-themselves. This is not just peculiar to German: Dutch, Danish, Norwegian,
-Swedish, Icelandic and Finnish have the same property. To split up
-compound words cannot be done without a dictionary, and the purely
-algorithmic stemmers presented here do not attempt it.
-
-
-
-We would suggest, however, that the need for compound word splitting in
-these languages has been somewhat overstated. In the case of German:
-
-
-
-1) There are many English compounds one would see no advantage in
-splitting,
-
-
-
- blackberry | | blackboard | | rainbow | | coastguard | | ....
- |
-
-
-Many German compounds are like this,
-
-
-
- Bleistift (pencil) | | = | | Blei (lead) + Stift (stick)
- |
Eisenbahn (railway) | | = | | Eisen (iron) + Bahn (road)
- |
Unterseeboot (submarine) | | = | | under + sea + boat
- |
-
-
-2) Other compounds correspond to what in English one would want to do by
-phrase searching, so they are ready made for that purpose,
-
-
-
- Gesundheitspflege | | = | | ‘health care’
- |
Fachhochschule | | = | | ‘technical college’
- |
Kunstmuseum | | = | | ‘museum of fine art’
- |
-
-
-3) In any case, longer compounds, especially involving personal names, are
-frequently hyphenated,
-
-
-- Heinrich-Heine-Universität
-
-
-
-4) It is possible to construct participial adjectives of almost any
-length, but they are little used in contemporary German, and regarded now
-as poor style. As in English, very long words are not always to be taken
-too seriously. On the author's last visit to Germany, the longest word he
-had to struggle with was
-
-
-- Nasenspitzenwurzelentzündung
-
-
-
-It means ‘inflammation of the root of the tip of the nose’, and comes from
-a cautionary tale for children.
-
-
-[% footer %]
diff --git a/algorithms/greek/stemmer.tt b/algorithms/greek/stemmer.tt
deleted file mode 100644
index 2327149..0000000
--- a/algorithms/greek/stemmer.tt
+++ /dev/null
@@ -1,27 +0,0 @@
-[% header('Greek stemming algorithm') %]
-
-Links to resources
-
-
-
-This is an implementation of the stemmer described in:
-
-
- Ntais, Georgios. Development of a Stemmer for the Greek Language. Diss. Royal Institute of Technology, 2006.
-
-
-with additional improvements from:
-
-
- Saroukos, Spyridon. Enhancing a Greek language stemmer. University of Tampere, 2008.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('greek') %]
-
-[% footer %]
diff --git a/algorithms/hindi/stemmer.tt b/algorithms/hindi/stemmer.tt
deleted file mode 100644
index 8300a82..0000000
--- a/algorithms/hindi/stemmer.tt
+++ /dev/null
@@ -1,83 +0,0 @@
-[% header('Hindi stemming algorithm') %]
-
-Links to resources
-
-
-
-
-This is an implementation of the "Lightweight Stemmer for Hindi" described in:
-
-
-
- A. Ramanathan and D. Rao (2003) A Lightweight Stemmer for Hindi
-
-
-
-The major difference in our implementation is that rather than transliterating
-to the Latin alphabet we instead work in the original Devanagari script. We
-have modified the suffixes in the list by converting them back to Devanagari
-like so:
-
-
-
-- within the suffixes, "a" after a consonant is dropped since
-consonants have an implicit "a".
-
- within the suffixes, a vowel other than "a" after a consonant
-is a dependent vowel (vowel sign); a vowel (including "a") after a
-non-consonant is an independent vowel.
-
- to allow for the vowel at the start of each suffix being dependent or
-independent, we include each suffix twice. For the dependent version, a
-leading "a" is dropped and we check that the suffix is preceded by a
-consonant (which will have an implicit "a").
-
-
-
-The transliterations of our stems would end with "a" when our
-stems end in a consonant, so we also include the character virama in the
-list of suffixes to remove (this affects 222 words from our sample vocabulary).
-
-
-
-Aside from this, our implementation attempts to be faithful to the algorithm
-described in the paper, though in a few places we've had to resolve ambiguities
-in the paper:
-
-
-
-
--
-We assume that the whole word doesn't count as a valid suffix to remove, so we
-remove the longest suffix from the list which leaves at least one character.
-The paper doesn't seem to clearly state either way which is intended, but producing
-an empty stem seems unhelpful in general. If we instead allowed an empty stem
-to be produced this would result in a different stem for 47 words out of the
-65,140 in our sample vocabulary from Hindi wikipedia.
-
-
--
-We add a to the list of suffixes to remove in figure 3. This needed for
-the example given right at the end of section 5 to work (conflating BarawIya
-and BarawIyawA, and which §3.1 a.v strongly suggests should be in the list:
-"Thus, the following suffix deletions (longest possible match) are required
-to reduce inflected forms of masculine nouns to a common stem: a A i [...]"
-Adding a only affect 2 words out of the 65,140 in our sample vocabulary.
-
-
--
-We've also assumed that Mh in the suffix list isn't meant to match
-M followed by h. Only one of the 65,140 words in the
-sample vocabulary stems differently due to this (and that word
-seems to be a typo).
-
-
-
-
-The full algorithm in Snowball
-
-[% highlight_file('hindi') %]
-
-[% footer %]
diff --git a/algorithms/hungarian/stemmer.tt b/algorithms/hungarian/stemmer.tt
deleted file mode 100644
index 18e64e4..0000000
--- a/algorithms/hungarian/stemmer.tt
+++ /dev/null
@@ -1,243 +0,0 @@
-[% header('Hungarian stemming algorithm') %]
-
-Contributed by Anna Tordai University of Amsterdam
-
-
-Links to resources
-
-
-[% algorithm_lis('hungarian', 'Hungarian') %]
-- The
-isla, Amsterdam page for the Hungarian stemmer
-
-
-[% algorithm_vocab([40, 'babaháznak', 'muattta']) %]
-
-
-This stemming algorithm removes the inflectional suffixes of nouns. Nouns are
-inflected for case, person/possession and number.
-
-
-
-Letters in Hungarian include the following accented forms,
-
-
--
- á é í ó ö ő ú ü ű
-
-
-
-The following letters are vowels:
-
-
--
- a á e é i í o ó ö ő u ú
- ü ű
-
-
-
-The following letters are digraphs:
-
-
--
- cs dz dzs gy ly ny ty zs
-
-
-
-A double consonant is defined as:
-
-
--
- bb cc ccs dd ff gg ggy jj kk ll lly mm
- nn nny pp rr ss ssz tt tty vv zz zzs
-
-
-
-If the word begins with a vowel, R1 is defined as the region after the
-first consonant or digraph in the word. If the word begins with a consonant, it
-is defined as the region after the first vowel in the word. If the word does
-not contain both a vowel and consonant, R1 is the null region at the end of
-the word.
-
-
-
-For example:
-
-
-
- t ó b a n consonant-vowel
- |.....| R1 is 'a b a n'
-
- a b l a k a n vowel-consonant
- |.........| R1 is 'l a k a n'
-
- a c s o n y vowel-digraph
- |.....| R1 is 'o n y'
-
- c v s
- --->|<--- null R1 region
-
-
-
-‘Delete if in R1’ means that the suffix should be removed if it is in
-region R1 but not if it is outside.
-
-
-
-Do steps 1 to 9 in turn
-
-
-
-Step 1: Remove instrumental case
-
-
--
- Search for one of the following suffixes and perform the action indicated.
-
- - al el
-
- delete if in R1 and preceded by a double consonant, and
- remove one of the double consonants. (In the case of consonant plus digraph, such as ccs, remove a c).
-
-
-
-
-Step 2: Remove frequent cases
-
-
--
- Search for the longest among the following suffixes and perform the action indicated.
-
- - ban ben ba be ra re nak nek val vel tól
- től ról ről ból ből hoz hez höz
- nál nél ig at et ot öt ért képp
- képpen kor ul ül vá vé onként enként
- anként ként en on an ön n t
-
-
-
- delete if in R1
-
- if the remaining word ends á replace by a
-
- if the remaining word ends é replace by e
-
-
-
-
-Step 3: Remove special cases:
-
-
--
- Search for the longest among the following suffixes and perform the action
- indicated.
-
- - án ánként
-
- replace by a if in R1
-
- én
-
- replace by e if in R1
-
-
-
-
-Step 4: Remove other cases:
-
-
--
- Search for the longest among the following suffixes and perform the action indicated
-
- - astul estül stul stül
-
- delete if in R1
-
- ástul
-
- replace with a if in R1
-
- éstül
-
- replace with e if in R1
-
-
-
-
-Step 5: Remove factive case
-
-
--
- Search for one of the following suffixes and perform the action indicated.
-
- - á é
-
- delete if in R1 and preceded by a double consonant, and
- remove one of the double consonants (as in step 1).
-
-
-
-
-Step 6: Remove owned
-
-
--
- Search for the longest among the following suffixes and perform the action
- indicated.
-
- - oké öké aké eké ké éi é
-
- delete if in R1
-
- áké áéi
-
- replace with a if in R1
-
- éké ééi éé
-
- replace with e if in R1
-
-
-
-
-Step 7: Remove singular owner suffixes
-
-
--
- Search for the longest among the following suffixes and perform the action
- indicated.
-
- - ünk unk nk juk jük uk ük em om am m
- od ed ad öd d ja je a e o
-
- delete if in R1
-
- ánk ájuk ám ád á
-
- replace with a if in R1
-
- énk éjük ém éd é
-
- replace with e if in R1
-
-
-
-
-Step 8: Remove plural owner suffixes
-
-
--
- Search for the longest among the following suffixes and perform the action
- indicated.
-
- - jaim jeim aim eim im jaid jeid aid eid id
- jai jei ai ei i jaink jeink eink aink ink
- jaitok jeitek aitok eitek itek jeik jaik aik eik
- ik
-
-
- delete if in R1
-
- áim áid ái áink áitok áik
-
- replace with a if in R1
-
- éim éid éi éink éitek éik
-
- replace with e if in R1
-
-
-
-
-Step 9: Remove plural suffixes
-
-
--
- Search for the longest among the following suffixes and perform the action
- indicated.
-
- - ák
-
- replace with a if in R1
-
- replace with e if in R1
-
- ök ok ek ak k
-
- delete if in R1
-
-
-
-The full algorithm in Snowball
-
-[% highlight_file('hungarian') %]
-
-[% footer %]
diff --git a/algorithms/index.tt b/algorithms/index.tt
deleted file mode 100644
index 58c622e..0000000
--- a/algorithms/index.tt
+++ /dev/null
@@ -1,92 +0,0 @@
-[% header('Stemming algorithms') %]
-
-Stemming for various European languages
-
-
-We present stemming algorithms (with implementations in Snowball) for the
-following languages:
-
-
-
-
-
-There are two English stemmers, the original Porter stemmer,
-and an improved stemmer which has been called Porter2. Read the accounts of them to
-learn a bit more about using Snowball.
-
-
-
-Each formal algorithm should be compared with the corresponding Snowball program.
-
-
-
-Surprisingly, among the Indo-European languages (*), the French stemmer turns out to be the most complicated, whereas
-the Russian stemmer, despite its large number of suffixes, is very simple. In
-fact it is interesting that English, with its minimal use of i-suffixes,
-has such a complex stemmer. This is partly due to the delicate nature of
-i-suffix removal (undoubling the p after removing ing from hopping etc),
-and partly to the wealth of forms of d-suffixes, deriving as they do from
-the mixed Romance and Germanic ancestry of the language.
-
-
-
-Note that by i-suffix we mean inflexional suffix, and by d-suffix,
-derivational suffix (*).
-
-
-Other Stemming Algorithms
-
-
-We also provide Snowball implementations of some algorithms developed by other parties:
-
-
-
-
-[% footer %]
diff --git a/algorithms/indonesian/stemmer.tt b/algorithms/indonesian/stemmer.tt
deleted file mode 100644
index ffffe04..0000000
--- a/algorithms/indonesian/stemmer.tt
+++ /dev/null
@@ -1,124 +0,0 @@
-[% header('Indonesian stemming algorithm') %]
-
-Links to resources
-
-
-
-
-This is an implementation of the "Porter Stemmer for Bahasa Indonesia" described
-in:
-
-
-
- Tala F Z (2003) A Study of Stemming Effects on Information Retrieval in Bahasa Indonesia. M.S. thesis, University of Amsterdam.
-
-
-
-It would be more accurately described as "Porter-style" or "Porter-inspired"
-since Martin Porter wasn't directly involved in its development.
-
-
-
-Our implementation attempts to be faithful to the algorithm described in the
-paper, but we have had to address some places in the paper which are unclear,
-and a case where an example doesn't match the described algorithm.
-
-
-
--
-
-In table 2.7 on page 9, the additional condition on the remaining stem for
-removing the suffix "i" reads "V|K...c1c1, c1
-≠ s, c2 ≠ i and prefix ∉ {ber, ke, peng}".
-
-
-
-The meaning of this is unclear in several ways, and none of the
-examples given of the stemmer's behaviour in the paper help to
-resolve these issues.
-
-
-
-Notice that c2 isn't actually used - the most obvious explanation
-seems to be that "c1c1" should read
-"c1c2", or maybe "c2c1".
-
-
-
-Elsewhere the paper defines V... as meaning "the stem starts with
-a vowel" and K... as meaning "the stem starts with a consonant".
-
-
-
-In other places where it says X|Y... it seems the | binds more
-tightly, so it's (V|K)...cicj not
-V|(K...cicj). That seems a bit
-odd as the first letter must be either a vowel or a consonant, so
-that really just means "ends cicj". However, nowhere in
-the paper uses or defines a notation such as ...X, which may explain this
-seemingly redundant way of specifying this.
-
-
-
-The conditions elsewhere on prefix removal (e.g. V...) are clearly
-on the stem left after the prefix is removed. None of the other
-rules for suffix removal have conditions on the stem, but for
-consistency with the prefix rules we might expect that the
-cicj test is on what's left after removing the
-"i" suffix.
-
-
-
-However, studying Indonesian wordlists and discussion with a native
-speaker leads us to conclude that the purpose of this check is to
-protect words of foreign origin (e.g. "televisi", "organisasi",
-"komunikasi") from stemming, and the common feature of these is
-that the word ends "-si", so we conclude that the condition here
-should be read as "word does not end -si", and this is what we
-have implemented.
-
-
-
--
-
-On page 29, the example "kompas Q.31" says "Both Nazief and Porter stemmer
-converted the word peledakan (blast, explotion) to ledak (to
-blast, to explode)". However, the algorithm as described doesn't behave in
-this way - grammatically the prefix pe- occurs as a variation of both the
-first-order derivational prefix peng- and the second-order derivational prefix
-per-, but table 2.5 doesn't include "pe", only table 2.6 does, so "peledakan"
-is handled (incorrectly) as having prefix "per" not "peng", and so we remove
-derivational suffix "kan" rather than "an" to give stem leda.
-(Porter-style stemmers remove the longest suffix they can amongst those
-available, which this paper notes in the last paragraph on page 15).
-
-
-
-We resolve this by amending the condition on suffix "kan" to "prefix ∉
-{ke, peng, per}", which seems to make the stemmer's behaviour match all the
-examples in the paper except for one: "perbaikan" is shown in table 3.4
-as stemming to "bai", but with this change it now stems to "baik". The
-table notes that "baik" is the actual root so this deviation is an
-improvement. In a sample vocabulary derived from the most common words in
-id.wikipedia.org, this change only affects 0.12% of words (76 out of 64,587,
-including "peledakan" and "perbaikan").
-
-
-
--
-The paper has the condition on removal of prefix "bel" and "pel" as
-just "ajar" not "ajar..." but it seems that the latter must be what
-is intended so that e.g. "pelajaran" stems to "ajar" not "lajar".
-This change only affects a very small number of words (11 out of
-64,587), and only for the better.
-
-
-
-The full algorithm in Snowball
-
-[% highlight_file('indonesian') %]
-
-[% footer %]
diff --git a/algorithms/irish/stemmer.tt b/algorithms/irish/stemmer.tt
deleted file mode 100644
index 1293bc2..0000000
--- a/algorithms/irish/stemmer.tt
+++ /dev/null
@@ -1,52 +0,0 @@
-[% header('Irish Gaelic stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([30, 'ábharú', 'pábháil']) %]
-
-
-This basic stemmer for Irish was developed and contributed by Jim
-O’Regan.
-
-
-
-One thing that should be taken into account with Irish is the initial
-mutation (n-eclipsis and h-prothesis) which causes problems if words
-are simply folded to lowercase before stemming in the way that is
-usually assumed by Snowball stemmers. A Snowball version of an algorithm to
-fold to lowercase while taking this into account would look something like:
-
-
-[% highlight_file('tolower_irish') %]
-
-
-The following characters are vowels for the purposes of this algorithm:
-
-
-
- a e i o u á é í ó ú
-
-
-
-The algorithm first addresses the initial mutation, then regions are determined
-based on the word after this first step:
-
-
-
-- RV is the region after the first vowel, or the end of the word
-if it contains no vowels.
-- R1 is the region after the first non-vowel following a vowel, or the
-end of the word if there is no such non-vowel.
-- R2 is the region after the first non-vowel following a vowel in
-R1, or the end of the word if there is no such non-vowel.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('irish') %]
-
-[% footer %]
diff --git a/algorithms/italian/stemmer.tt b/algorithms/italian/stemmer.tt
deleted file mode 100644
index de88f5c..0000000
--- a/algorithms/italian/stemmer.tt
+++ /dev/null
@@ -1,185 +0,0 @@
-[% header('Italian stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'abbandonata', 'pronto']) %]
-
-The stemming algorithm
-
-
-Italian can include the following accented forms:
-
-
--
- á é í ó ú à è ì ò ù
-
-
-
-First, replace all acute accents by grave accents. And, as in French, put u after
-q, and u, i between vowels into upper case.
-(See note on vowel marking.)
-
-
-
-The vowels are then
-
-
--
- a e i o u à è ì ò ù
-
-
-
-R2
-(see the note on R1 and R2)
-and RV have the same definition as in the
- Spanish stemmer.
-
-
-
-First exceptional cases are checked for. These need to match the whole word, and currently are:
-
-
-
-- divano: replace with divan (to avoid conflating with diva) [Added 2022-11-16]
-
-
-
-If found then handle as described and that's it.
-
-
-
-Otherwise always do steps 0 and 1.
-
-
-
-Step 0: Attached pronoun
-
-
--
- Search for the longest among the following suffixes
-
-
- ci gli la le li lo mi ne si ti vi
- sene gliela gliele glieli glielo gliene
- mela mele meli melo mene
- tela tele teli telo tene
- cela cele celi celo cene
- vela vele veli velo vene
-
-
- following one of
-
- -
- (a) ando endo
- (b) ar er ir
-
-
- in RV. In case of (a) the suffix is deleted, in case (b) it is replace
- by e (guardandogli → guardando, accomodarci → accomodare)
-
-
-
-
-Step 1: Standard suffix removal
-
-
--
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
- - anza anze ico ici ica ice iche ichi ismo ismi abile abili ibile ibili
- ista iste isti istà istè istì oso osi osa ose mente
- atrice atrici ante anti
-
- delete if in R2
-
- azione azioni atore atori
- delete if in R2
-
- if preceded by ic, delete if in R2
-
- logia logie
-
- replace with log if in R2
-
- uzione uzioni usione usioni
-
- replace with u if in R2
-
- enza enze
-
- replace with ente if in R2
-
- amento amenti imento imenti
-
- delete if in RV
-
- amente
-
- delete if in R1
-
- if preceded by iv, delete if in R2 (and if further preceded by at,
- delete if in R2), otherwise,
-
- if preceded by os, ic or abil, delete if in R2
-
- ità
-
- delete if in R2
-
- if preceded by abil, ic or iv, delete if in R2
-
- ivo ivi iva ive
-
- delete if in R2
-
- if preceded by at, delete if in R2 (and if further preceded by ic,
- delete if in R2)
-
-
-
-
-Do step 2 if no ending was removed by step 1.
-
-
-
-Step 2: Verb suffixes
-
-
--
- Search for the longest among the following suffixes in RV, and if found,
- delete.
-
-
- ammo ando ano are arono
- asse assero assi assimo ata ate
- ati ato ava avamo avano avate avi avo emmo
- enda ende endi endo erà erai eranno ere
- erebbe erebbero erei eremmo eremo ereste
- eresti erete erò erono essero ete eva evamo
- evano evate evi evo Yamo iamo immo irà
- irai iranno ire irebbe irebbero irei iremmo
- iremo ireste iresti irete irò irono isca
- iscano isce isci isco iscono issero ita ite
- iti ito iva ivamo ivano ivate ivi ivo
- ono uta ute uti uto ar ir
-
-
-
-Always do steps 3a and 3b.
-
-
-
-
-
-Step 3a
-
-
--
- Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a
- preceding i if it is in RV (crocchi → crocch, crocchio → crocch)
-
-
-
-Step 3b
-
-
--
- Replace final ch (or gh) with c (or g) if in RV (crocch → crocc)
-
-
-
-Finally,
-
-
--
- turn I and U back into lower case
-
-
-The same algorithm in Snowball
-
-[% highlight_file('italian') %]
-
-[% footer %]
diff --git a/algorithms/kraaij_pohlmann/stemmer.tt b/algorithms/kraaij_pohlmann/stemmer.tt
deleted file mode 100644
index 4fad378..0000000
--- a/algorithms/kraaij_pohlmann/stemmer.tt
+++ /dev/null
@@ -1,69 +0,0 @@
-[% header('The Kraaij-Pohlmann stemming algorithm') %]
-
-Links to resources
-
-
-
-
-The Kraaij-Pohlmann stemming algorithm is an ANSI C program for stemming in Dutch. Although
-advertised as an algorithm, it is in fact a program without an accompanying
-algorithmic description. It is possible to produce a fairly clean Snowball
-version, but only by sacrificing exact functional equivalence. But that does not
-matter too much, since in the demonstration vocabulary only 32 words out of over
-45,000 stem differently. Here they are:
-
-
-
-
-source | | ANSI C stemmer | | Snowball stemmer
- |
airways | | airways | | airway
- |
algerije | | algerije | | alrije
- |
assays | | assays | | assay
- |
bruys | | bruys | | bruy
- |
cleanaways | | cleanaways | | cleanaway
- |
creys | | creys | | crey
- |
croyden | | croyd | | croy
- |
edele | | edel | | edeel
- |
essays | | essays | | essay
- |
gedijen | | gedij | | dij
- |
geoff | | of | | off
- |
gevrey | | gevrey | | vrey
- |
geysels | | ysel | | gey
- |
grootmeesteres | | grootmee | | grootmeest
- |
gròotmeesteres | | gròotmee | | gròotmeest
- |
hectares | | hectaar | | hect
- |
huys | | huys | | huy
- |
kayen | | kayen | | kaay
- |
lagerwey | | lagerwey | | larwey
- |
mayen | | mayen | | maay
- |
meesteres | | meester | | meest
- |
oppasseres | | oppasser | | oppas
- |
pays | | pays | | pay
- |
royale | | royale | | royaal
- |
schilderes | | schilder | | schild
- |
summerhayes | | summerhayes | | summerhaye
- |
tyumen | | tyuum | | tyum
- |
verheyen | | verheyen | | verheey
- |
verleideres | | verleider | | verleid
- |
ytsen | | yts | | ytsen
- |
yves | | yve | | yves
- |
zangeres | | zanger | | zang
- |
-
-
-The Kraaij-Pohmann stemmer can make fairly drastic reductions to a word. For
-example, infixed ge is removed, so geluidgevoelige stems to
-luidvoel. Often, therefore, the original word cannot be easily guessed from
-the stemmed form.
-
-
-
-Here then is the Snowball equivalent of the Kraaij-Pohlmann algorithm.
-
-
-[% highlight_file('kraaij_pohlmann') %]
-
-[% footer %]
diff --git a/algorithms/lithuanian/stemmer.tt b/algorithms/lithuanian/stemmer.tt
deleted file mode 100644
index 8db874a..0000000
--- a/algorithms/lithuanian/stemmer.tt
+++ /dev/null
@@ -1,22 +0,0 @@
-[% header('Lithuanian stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('lithuanian', 'Lithuanian') %]
-
-
-
-This algorithm was contributed by Dainius Jocas.
-
-
-
-Its intended domain of use is information retrieval, and so handling of nouns
-is considered more important than that of verbs, adjectives, etc.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('lithuanian') %]
-
-[% footer %]
diff --git a/algorithms/lovins/festschrift.tt b/algorithms/lovins/festschrift.tt
deleted file mode 100644
index b461436..0000000
--- a/algorithms/lovins/festschrift.tt
+++ /dev/null
@@ -1,1740 +0,0 @@
-[% header('Lovins revisited') %]
-
-
-This is a revised version of Martin Porter’s paper which was published as part
-of the Karen Sparck Jones Festschrift of 2005.
-
-
-
-Charting a New Course: Progress in Natural Language Processing and
-Information Retrieval: A Festschrift for Professor Karen Sparck Jones, edited
-by John Tait, Amsterdam: Kluwer, 2005.
-
-
-Lovins Revisited
-
-
-Martin Porter, December 2001 (revised November 2008).
-
-
-Abstract
--
- The Lovins stemming algorithm for English is analysed, and compared
- with the Porter stemming algorithm, using Snowball, a language designed
- specifically for the development of stemming algorithms. It is shown
- how the algorithms manage to function in a similar way, while appearing
- to be quite different. The Porter algorithm is recoded in the style of
- the Lovins algorithm, which leads to the discovery of a few possible
- improvements.
-
-
-Preamble
-
-
-This is a festschrift paper, so I am allowed to begin on a personal note.
-In 1979 I was working with Keith van Rijsbergen and Stephen Robertson on a
-British Library funded IR project to investigate the selection of good
-index terms, and one of the things we found ourselves having to do was to
-establish a document test collection from some raw data that had been sent
-to us on a magnetic tape by Peter Vaswani of the National Physical
-Laboratory. I was the tame programmer in the project, so it was my job to
-set up the test collection.
-
-
-
-On the whole it did not prove too difficult. The data we received was a
-collection of about 11,000 documents (titles and short abstracts), 93
-queries — in a free text form, and relevance judgements. All the text was
-in upper case without punctuation, and there were one or two marker
-characters to act as field terminators. By modern standards the data was
-really very small indeed, but at the time it was considerably larger than
-any of the other test collections we had. What you had to do was to cast it
-into a standard form
-for experimental work. You represented terms and documents by numbers, and
-created flat files in text form corresponding to the queries, relevance
-assessments, and term to document index. One process however was less
-straightforward. On their way to becoming numeric terms, the words of the
-source text were put through a process of linguistic normalisation called
-suffix stripping, in which certain derivational and inflectional suffixes
-attached to the words were removed. There was a standard piece of software
-used in Cambridge at that time to do this, written in 1971 by Keith
-Andrews (Andrews, 1971) as part of a Diploma Project.
-One of the courses in
-Cambridge is the one year post-graduate Diploma in Computer Science. Each
-student on the course is required to do a special project, which includes
-writing a significant piece of software — significant in the sense of being
-both useful and substantial.
-Keith's piece of software was more useful than most, and it continued to be
-used as a suffix stripping program, or stemmer, for many years after it was
-written.
-
-
-
-Now by an odd chance I was privy to much of Keith Andrews’ original
-thinking at the time that he was doing the work. The reason for this was
-that in 1971 I was looking for a house in Cambridge, and the base I was
-operating from was a sleeping bag on the living room floor of an old friend
-called John Dawson, who was Keith’s diploma supervisor. Keith used to come round
-and discuss stemming algorithms with him, while I formed a mute audience. I
-learnt about the Lovins stemming algorithm of 1968 (Lovins, 1968),
-and must I think have
-at least looked at her paper then, since I know it was not new to me when I
-saw it again in 1979. Their view of Lovins’ work was that it did not go far
-enough. There needed to be many more suffixes, and more complex rules to
-determine the criteria for their removal. Much of their discussion was
-about new suffixes to add to the list, and removal rules. It was interesting
-therefore to find myself needing to use Andrews’ work eight years later,
-and questioning some of its assumptions. Did you need that many suffixes?
-Did the rules need to be so complicated? Perhaps one would do better to
-break composite suffixes into smaller units and remove them piecemeal.
-And perhaps syllables would be a better count of stem length than letters.
-So I wrote my own stemmer, which became known as the Porter stemmer, and
-which was published in 1980 (Porter, 1980).
-
-
-
-I must explain where Karen Sparck Jones fits into all of this. Keith
-Andrews’ piece of work was originally suggested by Karen as a Diploma
-student project, and she was able to use the Andrews stemmer in her IR
-experiments throughout the seventies. In 1979 however Karen had moved much
-more into the field of Natural Language Processing and Artificial
-Intelligence, and by then had two or three research students in that field
-just writing up their PhDs (only one of whom I really got to know — John
-Tait, the editor of this volume). So we were in contact, but not working
-together. That again was an odd chance: that Karen had been my research
-supervisor in a topic other than IR, and that when later I was doing IR
-research at Cambridge I was not working with Karen. While I was engaged on
-writing the stemmer, Karen showed some justifiable irritation that I had
-become interested in a topic so very remote from the one for which we had
-received the British Library funding. Nevertheless, she came into my room
-one day, said, ‘Look, if you're getting interested in stemming, you’d
-better read this,’ and handed me the 1968 issue of Mechanical
-Translation that contains the Lovins paper. I still have this issue with
-Karen’s name across the top. (And I hope she didn't expect it back!)
-
-
-
-Another 20 years have gone by, and I have been studying the Lovins stemmer
-again, really because I was looking for examples to code up in Snowball, a
-small string processing language I devised in the latter half of 2001
-particularly adapted for writing stemming algorithms. Lovins’ stemmer
-strikes me now as a fine piece of work, for which she never quite received
-the credit she deserved. It was the first stemmer for English set out as
-an algorithm that described the stemming process exactly. She explained
-how it was intended to be used to improve IR performance, in just the way
-in which stemmers are used today. It is not seriously short of suffixes:
-the outstanding omissions are the plural forms ements and ents
-corresponding to her ement and ent, and it is easy enough to add
-them into the definition. It performs well in practice. In fact it is
-still in use, and can be downloaded in various languages from the net (1).
-The tendency since 1980 has been to attach the name ‘Porter’ to any
-language stemming process that does not use a dictionary, even when it is
-quite dissimilar to the original Porter stemmer (witness the Dutch Porter
-stemmer of Kraaij and Pohlmann (2) (Kraaij, 1994 and Kraaij, 1995), but
-the priority really belongs to Lovins. It also has one clear advantage
-over the Porter algorithm, in that it involves fewer steps. Coded up well,
-it should run a lot faster.
-
-
-
-A number of things intrigued me. Why are the Lovins and Porter stemmers so
-different, when what they do looks so similar? Could the stemmer, in some
-sense, be brought up-to-date? Could the Porter stemmer be cast into the
-Lovins form, and so run faster?
-
-
-
-This paper is about the answers for these questions. In discovering them, I
-have learned a lot more about my own stemmer.
-
-
-Why stem?
-
-
-It may be worth saying a little on what stemming is all about. We can imagine
-a document with the title,
-
-
--
- Pre-raphaelitism: A Study of Four Critical Approaches
-
-
-
-and a query, containing the words
-
-
--
- PRE-RAPHAELITE CRITICISM
-
-
-
-We want to match query against title so that ‘Pre-raphaelitism’ matches
-‘PRE-RAPHAELITE’ and ‘Critical’ matches ‘CRITICISM’. This leads to the
-idea of removing endings from words as part of the process of extracting index
-terms from documents, a similar process of ending removal being applied to
-queries prior to the match. For example, we would like to remove the endings
-from
-
-
--
-critical
-critically
-criticism
-criticisms
-critics
-
-
-
-so that each word is reduced to ‘critic’. This is the stem, from which the
-other words are formed, so the process as a whole is called stemming. It is
-a feature of English morphology that the part of the word we want to remove is
-at the end — the suffix. But the same is broadly true of French, German and other
-languages of the Indo-European group. It is also true of numerous languages
-outside Indo-European, Finnish for example, although there is a
-boundary beyond which it is not true. So Chinese, where words are simple
-units without affixes, and Arabic, where the stem is modified by
-prefixes and infixes as well as suffixes, lie outside the
-boundary. As an IR technique it therefore has wide applicability. In developing
-stemmers two points were recognised quite early on. One is that the
-morphological regularities that you find in English (or other languages) mean
-that you can attempt to do stemming by a purely algorithmic process. Endings
-al, ally, ism etc. occur throughout English vocabulary, and are
-easy to detect and remove: you don’t need access to an on-line dictionary. The
-other is that the morphological irregularities of English set a limit to the
-success of an algorithmic approach. Syntactically, what look like endings may
-not be endings (offspring is not offspr + ing), and the list of
-endings seems to extend indefinitely (trapez-oid, likeli-hood,
-guardian-ship, Tibet-an, juven-ilia, Roman-esque, ox-en
-...) It is difficult to gauge where to set the cut-off for these rarer forms.
-Semantically, the addition of a suffix may alter the meaning of a word a
-little, a lot, or completely, and morphology alone cannot measure the degree of
-change (prove and provable have closely related meanings; probe and
-probable do not.) This meant that stemming, if employed at all, became the
-most challenging, and the most difficult part of the indexing process.
-
-
-
-In the seventies, stemming might be applied as part of the process of
-establishing a test collection, and when it was there would not usually be any
-attempt to make the stemming process well-defined, or easily repeatable by
-another researcher. This was really because the basis for experiment replication
-was the normalised data that came out of the stemming process, rather than the
-source data plus a description of stemming procedures. Stemming tended to be
-applied, and then forgotten about. But by the 1980s, stemming itself was being
-investigated. Lennon and others (Lennon, 1981) found no substantial differences
-between the use of different stemmers for English. Harman (Harman, 1991)
-challenged the effectiveness of stemming altogether, when she reported no
-substantial differences between using and not using stemming in a series of
-experiments. But later work has been more positive. Krovetz (Krovetz, 1995), for example,
-reported small but significant improvements with stemming over a range of test
-collections.
-
-
-
-Of course, all these experiments assume some IR model which will use stemming in
-a particular way, and will measure just those features that tests collections
-are, notoriously, able to measure. We might imagine an IR system where the users
-have been educated in the advantages and disadvantages to be expected from
-stemming, and are able to flag individual search terms to say whether or not
-they are to be used stemmed or unstemmed. Stemming sometimes improves,
-occasionally degrades, search performance, and this would be the best way of
-using it as an IR facility. Again stemming helps regularise the IR vocabulary,
-which is very useful when preparing a list of terms to present to a user as
-candidates for query expansion. But this advantage too is difficult to quantify.
-
-
-
-An evaluative comparison between the Lovins and later stemmers lies in any case
-outside the scope of this paper, but it is important to
-bear in mind that it is not a straightforward undertaking.
-
-
-The Lovins Stemmer
-
-
-Structurally, the Lovins stemmer is in four parts, collected together in
-four Appendices A, B, C and D in her paper. Part A is a list of 294
-endings, each with a letter which identifies a condition for whether or
-not the ending should be removed. (I will follow Lovins in using ‘ending’
-rather than ‘suffix’ as a name for the items on the list.)
-Part A therefore looks like this:
-
-
--
- .11.
- alistically B
- arizability A
- izationally B
- .10.
- antialness A
- arisations A
- arizations A
- entialness A
- .09.
- allically C
- antaneous A
- antiality A
- . . .
-
- .01.
- a A
- e A
- i A
- o A
- s W
- y B
-
-
-
-Endings are banked by length, from 11 letters down to 1. Each bank is tried
-in turn until an ending is found which matches the end of the word to be
-stemmed and leaves a stem which satisfies the given condition, when the
-ending is removed. For example condition C says that the stem must have at
-least 4 letters, so bimetallically would lose allically leaving a
-stem bimet of length 5, but metallically would not reduce to
-met, since its length is only 3.
-
-
-
-There are 29 such conditions, called A to Z, AA, BB and CC, and they
-constitute part B of the stemmer. Here they are (* stands for any letter):
-
-
-
-
-A | | No restrictions on stem
- |
B | | Minimum stem length = 3
- |
C | | Minimum stem length = 4
- |
D | | Minimum stem length = 5
- |
E | | Do not remove ending after e
- |
F | | Minimum stem length = 3 and do not remove ending after e
- |
G | | Minimum stem length = 3 and remove ending only after f
- |
H | | Remove ending only after t or ll
- |
I | | Do not remove ending after o or e
- |
J | | Do not remove ending after a or e
- |
K | | Minimum stem length = 3 and remove ending only after l, i or
-u*e
- |
L | | Do not remove ending after u, x or s, unless s follows
-o
- |
M | | Do not remove ending after a, c, e or m
- |
N | | Minimum stem length = 4 after s**, elsewhere = 3
- |
O | | Remove ending only after l or i
- |
P | | Do not remove ending after c
- |
Q | | Minimum stem length = 3 and do not remove ending after l or
-n
- |
R | | Remove ending only after n or r
- |
S | | Remove ending only after dr or t, unless t follows t
- |
T | | Remove ending only after s or t, unless t follows o
- |
U | | Remove ending only after l, m, n or r
- |
V | | Remove ending only after c
- |
W | | Do not remove ending after s or u
- |
X | | Remove ending only after l, i or u*e
- |
Y | | Remove ending only after in
- |
Z | | Do not remove ending after f
- |
AA | | Remove ending only after d, f, ph, th, l, er, or, es or t
- |
BB | | Minimum stem length = 3 and do not remove ending after met or
-ryst
- |
CC | | Remove ending only after l
- |
-
-
-
-There is an implicit assumption in each condition, A included, that the minimum
-stem length is 2.
-
-
-
-This is much less complicated than it seems at first. Conditions A to D
-depend on a simple measure of minimum stem length, and E and F are slight
-variants of A and B. Out of the 294 endings, 259 use one of these
-6 conditions. The remaining 35 endings use the other 23 conditions, so
-conditions G, H ... CC have less than 2 suffixes each, on average. What is
-happening here is that Lovins is trying to capture a rule which gives a
-good removal criterion for one ending, or a small number of similar
-endings. She does not explain the thinking behind the conditions, but it is
-often not too difficult to reconstruct. Here for example are the last few
-conditions with their endings,
-
-
--
-
-Y (early, ealy, eal, ear). collinearly, multilinear are
-stemmed.
-
-Z (eature). misfeature does not lose eature.
-
-AA (ite). acolouthite, hemimorphite lose ite, ignite and
-requite retain it.
-
-BB (allic, als, al). Words ending metal, crystal retain
-al.
-
-CC (inity). crystallinity → crystall, but affinity,
-infinity are unaltered.
-
-
-
-
-Part C of the Lovins stemmer is a set of 35 transformation rules used to
-adjust the letters at the end of the stem. These rules are invoked after the
-stemming step proper, irrespective of whether an ending was actually
-removed. Here are about half of them, with examples to show the type of
-transformation intended (letters in square brackets indicate the full form
-of the words),
-
-
-
-
- 1) | | bb | | → | | b | | rubb[ing] → rub
- |
| | ll | | → | | l | | controll[ed] → control
- |
| | mm | | → | | m | | trimm[ed] → trim
- |
| | rr | | → | | r | | abhorr[ing] → abhor
- |
2) | | iev | | → | | ief | | believ[e] → belief
- |
3) | | uct | | → | | uc | | induct[ion] → induc[e]
- |
4) | | umpt | | → | | um | | consumpt[ion] → consum[e]
- |
5) | | rpt | | → | | rb | | absorpt[ion] → absorb
- |
6) | | urs | | → | | ur | | recurs[ive] → recur
- |
7a) | | metr | | → | | meter | | parametr[ic] → paramet[er]
- |
8) | | olv | | → | | olut | | dissolv[ed] → dissolut[ion]
- |
11) | | dex | | → | | dic | | index → indic[es]
- |
16) | | ix | | → | | ic | | matrix → matric[es]
- |
18) | | uad | | → | | uas | | persuad[e] → persuas[ion]
- |
19) | | vad | | → | | vas | | evad[e] → evas[ion]
- |
20) | | cid | | → | | cis | | decid[e] → decis[ion]
- |
21) | | lid | | → | | lis | | elid[e] → elis[ion]
- |
31) | | ert | | → | | ers | | convert[ed] → convers[ion]
- |
33) | | yt | | → | | ys | | analytic → analysis
- |
34) | | yz | | → | | ys | | analyzed → analysed
- |
-
-
-
-Finally, part D suggests certain relaxed matching rules between query terms
-and index terms when the stemmer has been used to set up an IR system, but
-we can regard that as not being part of the stemmer proper.
-
-
-The Lovins stemmer in Snowball
-
-
-Snowball is a string processing language designed with the idea of making
-the definition of stemming algorithms much more rigorous. The Snowball
-compiler translates a Snowball script into a thread-safe ANSI C module,
-where speed of execution is a major design consideration. The resulting
-stemmers are pleasantly fast, and will process one million or so words a
-second on a high-performance modern PC. The Snowball website (3) gives a
-full description of the language, and also presents stemmers for a range of
-natural languages. Each stemmer is written out as a formal algorithm, with
-the corresponding Snowball script following. The algorithm definition acts
-as program comment for the Snowball script, and the Snowball script gives a
-precise definition to the algorithm. The ANSI C code with the
-same functionality can also be inspected, and sample vocabularies in source
-and stemmed form can be used for test purposes.
-An essential function of
-the Snowball script is therefore comprehensibility — it should be fully understood
-by the reader of the script, and Snowball has been designed with this in mind.
-It contrasts interestingly in this respect with a system like Perl.
-Perl has a very big definition. Writing your own scripts in Perl is easy,
-after the initial learning hurdle, but understanding other scripts can be
-quite hard. The size of the language means that there are many different
-ways of doing the same thing, which gives programmers the opportunity of
-developing highly idiosyncratic styles. Snowball has a small, tight
-definition. Writing Snowball is much less easy than writing Perl, but on
-the other hand once it is written it is fairly easy to understand
-(or at least one hopes that it is). This is
-illustrated by the Lovins stemmer in Snowball, which is given in Appendix
-1. There is a very easy and natural correspondence
-between the different parts of the stemmer definition in Lovins' original
-paper and their Snowball equivalents.
-For example, the Lovins conditions A, B ... CC code up very neatly
-into routines with the same name. Taking condition L,
-
-
--
- L Do not remove ending after u, x or s, unless s follows
- o
-
-
-
-corresponds to
-
-
-[% highlight("
- define L as ( test hop 2 not 'u' not 'x' not ('s' not 'o') )
-") %]
-
-
-When L
is called, we are the right end of the stem, moving left towards the
-front of the word. Each Lovins condition has an implicit test for a stem of
-length 2, and this is done by [% highlight_inline('test hop 2') %], which sees if it is possible to
-hop two places left. If it is not, the routine immediately returns with a
-false signal, otherwise it carries on. It tests that the character at the
-right hand end is not u, and also not x, and also not s following a letter
-which is not o. This is equivalent to the Lovins condition. Here is not of
-course the place to give the exact semantics, but the you can quickly get
-the feel of the language by comparing the 29 Lovins conditions with their
-Snowball definitions.
-
-
-
-Something must be said about the [% highlight_inline('among') %] feature of Snowball however,
-since this is central to the efficient implementation of stemmers. It is
-also the one part of Snowball that requires just a little effort to
-understand.
-
-
-
-At its simplest, [% highlight_inline('among') %] can be used to test for alternative strings. The
-[% highlight_inline('among') %]s used in the definition of condition AA and the undouble
-routine have this form. In Snowball you can write
-
-
-[% highlight("
- 'sh' or 's' or 't' 'o' or 'i' 'p'
-") %]
-
-
-which will match the various forms shop, ship, sop, sip, top, tip. The
-order is important, because if [% highlight_inline("'sh'") %] and [% highlight_inline("'s'") %] are swapped over, the
-[% highlight_inline("'s'") %] would match the first letter of ship, while [% highlight_inline("'o'") %] or [% highlight_inline("'i'") %]
-would fail to match with the following [% highlight_inline("'h'") %] — in other words the pattern
-matching has no backtracking. But it can also be written as
-
-
-[% highlight("
- among('sh' 's' 't') among('i' 'o') 'p'
-") %]
-
-
-The order of the strings in each [% highlight_inline('among') %] is not important, because the
-match will be with the longest of all the strings that can match. In
-Snowball the implementation of [% highlight_inline('among') %] is based on the binary-chop idea,
-but has been carefully optimised. For example, in the Lovins stemmer, the
-main [% highlight_inline('among') %] in the endings
routine has 294 different strings of average
-length 5.2 characters. A search for an ending involves accessing a number
-of characters within these 294 strings. The order is going to be
-Klog2294, or 8.2K, where K is a number that one hopes will
-be small, although one must certainly expect it to be greater than 1. It
-turns out that, for the successive words of a standard test vocabulary,
-K averages to 1.6, so for each word there are about 13 character
-comparisons needed to determine whether it has one of the Lovins endings.
-
-
-
-Each string in an [% highlight_inline('among') %] construction can be followed by a routine name. The
-routine returns a true/false signal, and then the [% highlight_inline('among') %] searches for the
-longest substring whose associated routine gives a true signal. A string not
-followed by a routine name can be thought of as a string which is associated
-with a routine that does nothing except give a true signal. This is the way
-that the [% highlight_inline('among') %] in the endings
routine works, where indeed every string is
-followed by a routine name.
-
-
-
-More generally, lists of strings in the [% highlight_inline('among') %] construction can be followed
-by bracketed commands, which are obeyed if one of the strings in the list is
-picked out for the longest match. The syntax is then
-
-
-
- among( S11 S12 ... (C1)
- S21 S22 ... (C2)
- ...
-
- Sn1 Sn2 ... (Cn)
- )
-
-
-
-where the Sij
are strings, optionally followed by their routine names,
-and the Ci
are Snowball command sequences. The semantics is a bit
-like a switch in C, where the switch is on a string rather than a numerical
-value:
-
-
-
- switch(...) {
- case S11: case S12: ... C1; break;
- case S21: case S22: ... C2; break;
- ...
-
- case Sn1: case Sn2: ... Cn; break;
- }
-
-
-
-The [% highlight_inline('among') %] in the respell
routine has this form.
-
-
-
-The full form however is to use [% highlight_inline('among') %] with a preceding [% highlight_inline('substring') %], with
-[% highlight_inline('substring') %] and [% highlight_inline('among') %] possibly separated by further commands.
-[% highlight_inline('substring') %]
-triggers the test for the longest matching substring, and the [% highlight_inline('among') %] then
-causes the corresponding bracketed command to be obeyed. At a simple
-level this can be used to cut down the size of the code, in that
-
-
-
- substring C among( S11 S12 ... (C1)
- S21 S22 ... (C2)
- ...
-
- Sn1 Sn2 ... (Cn)
- )
-
-
-
-is a shorter form of
-
-
-
- among( S11 S12 ... (C C1)
- S21 S22 ... (C C2)
- ...
-
- Sn1 Sn2 ... (C Cn)
- )
-
-
-
-More importantly, [% highlight_inline('substring') %] and [% highlight_inline('among') %] can work in different contexts. For
-example, [% highlight_inline('substring') %] could be used to test for the longest string, matching from
-right to left, while the commands in the [% highlight_inline('among') %] could operate in a left to
-right direction. In the Lovins stemmer, [% highlight_inline('substring') %] is used in this style:
-
-
-[% highlight("
- [substring] among ( ... )
-") %]
-
-
-The two square brackets are in fact individual commands, so before the [% highlight_inline('among') %]
-come three commands. [% highlight_inline('[') %] sets a lower marker, [% highlight_inline('substring') %] is obeyed, searching
-for the strings in the following among, and then [% highlight_inline(']') %] sets an upper marker.
-The region between the lower and upper markers is called the slice, and this
-may subsequently be copied, replaced or deleted.
-
-
-
-It was possible to get the Lovins stemmer working in Snowball very quickly.
-The Sourceforge versions (1) could be used to get the long list of endings and
-to help with the debugging. There was however one problem, that rules 24 and
-30 of part C conflicted. They are given as
-
-
--
- 24) end → ens except following s
- ...
- 30) end → ens except following m
-
-
-
-This had not been noticed in the Sourceforge implementations, but
-immediately gave rise to a compilation error in Snowball. Experience
-suggested that I was very unlikely to get this problem resolved. Only a few
-months before, I had hit a point in a stemming algorithm where
-something did not quite make sense. The algorithm had been published just a
-few years ago, and contacting one at least of the authors was quite easy.
-But I never sorted it out. The author I traced was not au fait
-with the linguistic background, and the language expert had been swallowed
-up in the wilds of America. So what chance would I have here? Even if I was
-able to contact Lovins, it seemed to me inconceivable that she would have
-any memory of, or even interest in, a tiny problem in a paper which she
-published 33 years ago. But the spirit of academic enquiry forced me to
-venture the attempt. After pursuing a number of red-herrings, email contact
-was finally made.
-
-
-
-Her reply was a most pleasant surprise.
-
-
-
- ... The explanation is both mundane and exciting. You have just found
- a typo in the MT article, which I was unaware of all these years, and I
- suspect has puzzled a lot of other people too. The original paper, an
- MIT-published memorandum from June 1968, has rule 30 as
-
-
-
- ent → ens except following m
-
-
-
- and that is undoubtedly what it should be ...
-
-
-
-An analysis of the Lovins stemmer
-
-
-It is very important in understanding the Lovins stemmer to know something
-of the IR background of the late sixties. In the first place there was an
-assumption that IR was all, or mainly, about the retrieval of
-technical scientific papers, and research projects were set up accordingly.
-I remember being shown, in about 1968, a graph illustrating the
-‘information explosion’, as it was understood at the time, which showed
-just the rate of growth of publications of scientific papers in various
-different domains over the previous 10 or 20 years. Computing resources
-were very precious, and they could not be wasted by setting up IR systems
-for information that was, by comparison, merely frivolous (articles in
-popular magazines, say). And even in 1980, when I was working in IR, the
-data I was using came from the familiar, and narrow, scientific domain.
-Lovins was working with Project Intrex (Overhage, 1966), where the data came from
-papers in materials science and engineering.
-
-
-
-Secondly, the idea of indexing on every word in a document, or even looking
-at every word before deciding whether or not to put it into an index, would
-have seemed quite impractical, even though it might have been recognised as
-theoretically best. In the first place, the computing resources necessary to
-store and analyse complete documents in machine readable form were absent, and in the
-second, the rigidities of the printing industry almost guaranteed that one
-would never get access to them.
-A stemmer, therefore, would be seen as something not
-applied to general text but to certain special words, and in the case of the
-Lovins stemmer, the plan was to apply it to the subject terms that were used
-to categorize each document. Subsequently it would be used with each word
-in a query, where it
-was hoped that the vocabulary of the queries would match the vocabulary of
-the catalogue of subject terms.
-
-
-
-This accounts for: —
-
-
-
-- The emphasis on the scientific vocabulary. This can be seen in the
-endings, which include oidal, on, oid, ide, for words like colloidal,
-proton, spheroid, nucleotide. It can be seen in the transformation rules,
-with their concern for Greek sis and Latin ix suffixes. And also it can be
-seen in in the word samples of the paper (magnesia, magnesite, magnesian,
-magnesium, magnet, magnetic, magneto etc. of Fig. 2).
-
-
-- The slight shortage of plural forms. The subject terms would naturally
-have been mainly in the singular, and one might also expect the same of
-query terms.
-
-
-- The surprising shortness of the allowed minimum stems — usually 2
-letters. A controlled technical vocabulary will contain longish words, and
-the problem of minimum stem lengths only shows up with shorter words.
-
-
-
-
-If we take a fairly ordinary vocabulary of modern English, derived from
-non-scientific writing, it is interesting to see how much of the Lovins
-stemmer does not actually get used. We use vocabulary V, derived from a
-sample of modern texts from Project Gutenberg (4). V can be inspected
-at (5). It contains 29,401 words, and begins
-
-
- a aback abandon abandoned abandoning abandonment
- abandons abasement abashed abate abated ...
-
-We find that 22,311, or about 76%, of the words in V have one of the
-294 endings removed if passed through the Lovins stemmer. Of this 76%, over a
-half (55%) of the removals are done by just five of the endings, the breakdown
-being,
--
- s (13%) ed (12%) e (10%) ing (10%) es (6%) y (4%)
-
-If, on the other hand, you look at the least frequent endings, 51% of them
-do only 1.4% of the removals. So of the ones removed, half the endings in
-V
-correspond to 2% of the endings in the stemmer, and 1.4% of the endings in
-V
-correspond to half the endings in the stemmer. In fact 62 of the endings
-(about a fifth) do not lead to any ending removals in V at all. These are
-made up of the rarer ‘scientific’ endings, such as aroid and oidal, and
-long endings, such as alistically and entiality.
-
-
-
-This helps explain why the Porter and Lovins stemmers behave in a fairly
-similar way despite the fact that they look completely different — it is
-because most of the work is being done in just a small part of the stemmer,
-and in that part there is a lot of overlap. Porter and Lovins stem 64% of
-the words in V identically, which is quite high. (By contrast, an
-erroneous but plausibly written Perl script
-advertised on the Web as an implementation of the Porter stemmer
-still proves to stem only 86% of the words in V
-to the same forms that are produced by the Porter stemmer.)
-
-
-
-A feature of the Lovins stemmer that is worth looking at in some detail is
-the transformation rules. People who come to the problem of stemming for
-the first time usually devote a lot of mental energy to the issue of
-morphological irregularity which they are trying to address.
-
-
-
-A good starting point is the verbs of English. Although grammatically
-complex, the morphological forms of the English verb are few, and are
-illustrated by the pattern harm, harms, harming, harmed, where the basic
-verb form adds s, ing and ed to make the other three forms. There are
-certain special rules: to add s to a verb ending ss an e is inserted,
-so pass becomes passes, and adding e and ing replaces a final e of
-the verb (love to loves), and can cause consonant doubling (hop to
-hopped), but
-apart from this all verbs in the language follow the basic pattern with the
-exception of a finite class of irregular verbs.
-In a regular verb, the addition of ed to the basic verb creates both the
-past form (‘I harmed’) and the p.p. (past participle) form (‘I have
-harmed’). An irregular verb, such as ring, forms its past in some other
-way (‘I rang’), and may have a distinct p.p. (‘I have rung’).
-The irregular verbs have a
-different past form, and sometimes a separate p.p. form.
-It is easy to think up more examples,
-
-
- stem | | past | | p.p.
- |
- |
ring | | rang | | rung
- |
rise | | rose | | risen
- |
sleep | | slept | | slept
- |
fight | | fought | | fought
- |
come | | came | | come
- |
go | | went | | gone
- |
hit | | hit | | hit
- |
-
-How many of these verbs are there altogether? On 20 Jan 2000, in order to
-test the hypothesis that the number is consistently over-estimated, I asked
-this question in a carefully worded email to a mixed group of
-about 50
-well-educated
-work colleagues (business rather than academic people). Ten of them replied,
-and here are the
-guesses they made:
-
--
- 20, 25, 25, 50, 180, 200, 426, 25000, 10%, 20%
-
-
-The last two numbers mean 10% and 20% of all English verbs.
-My hypothesis was of course wrong. The truth is that most people have no
-idea at all how many irregular verbs there are in English.
-In
-fact there are around 135 (see section 3.3 of Palmer, 1965).
-If a stemming algorithm handles suffix removal
-of all regular verbs correctly, the question arises as to whether it is
-worth making it do the same for the irregular forms. Conflating fought and
-fight, for example, could be useful in IR queries about boxing. It seems
-easy: you make a list of the irregular verbs and create a mapping of the
-past and p.p. forms to the main form. We can call the process
-English verb respelling. But when you try it, numerous problems arise. Are
-forsake, beseech, cleave really verbs of contemporary English? If so, what
-is the p.p. of cleave?
-Or take the verb stride, which is common enough. What is its p.p.? My
-Concise Oxford English Dictionary says it is stridden (6), but have we ever
-heard this word used? (‘I have stridden across the paving.’)
-
-
-
-To compose a realistic list for English verb respelling we therefore need to
-judge word rarity. But among the commoner verb forms even greater problems
-arise because of their use as homonyms. A rose is a type of flower, so
-is it wise
-to conflate rose and rise? Is it wise to conflate
-saw and see when saw can mean a cutting instrument?
-
-
-
-We suddenly get to
-the edge of what it is useful to include in a stemming algorithm. So long as
-a stemming algorithm is built around general rules, the full impact of the
-stemmer on a vocabulary need not be studied too closely. It is sufficient to
-know that the stemmer, judiciously used, improves retrieval performance. But
-when we look at its effect on individual words these issues can no longer be
-ignored. To build even a short list of words into a stemmer for special
-treatment takes us into the area of the dictionary-based stemmer, and the
-problem of determining, for a pair of related words in the dictionary, a
-measure of semantic similarity which tells us whether or not the words
-should be conflated together.
-
-
-
-About half the transformation rules in the Lovins stemmer deal with a
-problem which is similar to that posed by the irregular verbs of English,
-and which ultimately goes back to the irregular forms of second conjugation
-verbs in Latin. We can call it Latin verb respelling. Verbs like
-induce, consume, commit are perfectly regular in modern English, but
-the adjectival and noun forms induction, consumptive, commission that
-derive from them correspond to p.p. forms in Latin.
-You can see the descendants of these Latin irregularities
-in modern Italian, which has commettere with p.p.
-commesso, like our commit and commission, and scendere with
-p.p. sceso like our ascend and ascension (although scendere
-means ‘to go down’ rather than ‘to go up’).
-
-
-
-Latin verb respelling often seems to be more the territory of a stemmer than
-English verb respelling, presumably because Latin verb irregularities
-correspond to consonantal changes at the end of the stem, where the
-stemmer naturally operates, while English verb irregularities more often
-correspond to vowel changes in the middle. Lovins was no doubt
-particularly interested in Latin verb respelling because so many of the
-words affected have scientific usages.
-
-
-
-We can judge that Latin verb respellings constitute a small set because the
-number of second conjugation verbs of Latin form a small, fixed set. Again,
-looking at Italian, a modern list of irregular verbs contains 150 basic forms
-(nearly all of them second conjugation), not unlike the number of forms in
-English. Extra verbs are formed with prefixes. Corresponding English words
-that exhibit the Latin verb respelling problem
-will be a subset of this system. In fact we
-can offer a Snowball script that does the Latin verb respelling with more
-care. It should be invoked, in the Porter stemmer, after removal of ive or
-ion endings only,
-
-[% highlight("
-define prefix as (
-
- among (
-
- 'a' 'ab' 'ad' 'al' 'ap' 'col' 'com' 'con' 'cor' 'de'
- 'di' 'dis' 'e' 'ex' 'in' 'inter' 'o' 'ob' 'oc' 'of'
- 'per' 'pre' 'pro' 're' 'se' 'sub' 'suc' 'trans'
- ) atlimit
-)
-
-define second_conjugation_form as (
-
- [substring] prefix among (
-
- 'cept' (<-'ceiv') //-e con de re
- 'cess' (<-'ced') //-e con ex inter pre re se suc
- 'cis' (<-'cid') //-e de (20)
- 'clus' (<-'clud') //-e con ex in oc (26)
- 'curs' (<-'cur') // re (6)
- 'dempt' (<-'deem') // re
- 'duct' (<-'duc') //-e de in re pro (3)
- 'fens' (<-'fend') // de of
- 'hes' (<-'her') //-e ad (28)
- 'lis' (<-'lid') //-e e col (21)
- 'lus' (<-'lud') //-e al de e
- 'miss' (<-'mit') // ad com o per re sub trans (29)
- 'pans' (<-'pand') // ex (23)
- 'plos' (<-'plod') //-e ex
- 'prehens' (<-'prehend') // ap com
- 'ris' (<-'rid') //-e de (22)
- 'ros' (<-'rod') //-e cor e
- 'scens' (<-'scend') // a
- 'script' (<-'scrib') //-e de in pro
- 'solut' (<-'solv') //-e dis re (8)
- 'sorpt' (<-'sorb') // ab (5)
- 'spons' (<-'spond') // re (25)
- 'sumpt' (<-'sum') // con pre re (4)
- 'suas' (<-'suad') //-e dis per (18)
- 'tens' (<-'tend') // ex in pre (24)
- 'trus' (<-'trud') //-e ob (27)
- 'vas' (<-'vad') //-e e (19)
- 'vers' (<-'vert') // con in re (31)
- 'vis' (<-'vid') //-e di pro
- )
-)
-") %]
-
-This means that if suas, for example, is preceded by one of the strings
-in [% highlight_inline('prefix') %], and there is nothing more before the prefix string (which is
-what the
-[% highlight_inline('atlimit') %]
-command tests), it is replaced by suad. So dissuas(ion) goes to
-dissuad(e)
-and persuas(ive) to persuad(e). Of course, asuas(ion), absuas(ion),
-adsuas(ion) and so on would get the same treatment, but not being words of
-English that does not really matter. The corresponding Lovins rules are
-shown in brackets.
-This is not quite the end
-of the story, however, because the Latin forms ex + cedere (‘go
-beyond’) pro + cedere (‘go forth’), and sub + cedere
-(‘go after’) give rise to verbs which,
-by an oddity of English orthography, have an extra letter e: exceed, proceed,
-succeed. They can be sorted out in a final respelling step:
-
-
-[% highlight("
-define final_respell as (
-
- [substring] atlimit among(
-
- 'exced' (<-'exceed')
- 'proced' (<-'proceed')
- 'succed' (<-'succeed')
- /* extra forms here perhaps */
- )
-)
-") %]
-
-
-As you might expect, close inspection of this process creates doubts in
-the same way as for English verb respelling. (Should we really conflate
-commission and commit? etc.)
-
-
-
-The other transformation rules are concerned with unusual plurals, mainly
-of Latin or Greek origin, er and re differences, as in parameter and
-parametric, and the sis/tic connection of certain words of Greek origin:
-analysis/analytic, paralysis/paralytic ... (rule 33), and
-hypothesis/hypothetic, kinesis/kinetic ... (rule 32). Again, these
-irregularities might be tackled by forming explicit word lists. Certainly
-rule 30, given as,
-
-
--
- ent → ens except following m,
-
-
-
-goes somewhat wild when given a general English vocabulary (dent becomes
-dens for example), although it is the only rule that might be said to
-have a damaging effect.
-
-
-A Lovins shape for the Porter stemmer
-
-
-The 1980 paper (Porter, 1980) may be said to define the ‘pure’ Porter stemmer.
-The stemmer distributed at (7) can be called the ‘real’ Porter
-stemmer, and differs from the pure stemmer in three small respects, which
-are carefully explained. This disparity does not require much excuse,
-since the oldest traceable encodings of the stemmer have always contained
-these differences. There is also a revised stemmer for English, called
-‘Porter2’ and still subject to slight changes. Unless otherwise stated,
-it is the real Porter stemmer which is being studied below.
-
-
-
-The Porter stemmer differs from the Lovins stemmer in a number of
-respects. In the first place, it only takes account of fairly common
-features of English. So rare suffixes are not included, and there is no
-equivalent of Lovins’ transformation rules, other than her rule (1), the
-undoubling of terminal double letters. Secondly, it removes suffixes only
-when the residual stem is fairly substantial. Some suffixes are removed
-only when at least one syllable is left, and most are removed only when at least two
-syllables are left. (One might say that this is based on a guess about the
-way in which the meanings of a stem is related to its length in syllables (8).)
-The Porter stemmer is therefore ‘conservative’ in its removal
-of suffixes, or at least that is how it has often been described. Thirdly,
-it removes suffixes in a series of steps, often reducing a compound suffix
-to its first part, so a step might reduce ibility to ible, where
-ibility is thought of as being ible + ity. Although the
-description of the whole stemmer is a bit complicated, the total number of
-suffixes is quite small — about 60.
-
-
-
-The Porter stemmer has five basic steps. Step 1 removes an
-inflectional suffix. There are only three of these: ed and ing, which are
-verbal, and s, which is verbal (he sings), plural (the songs) or possessive
-(the horses’ hooves), although the rule for s removal is the same in all
-three cases. Step 1 may also restore an e (hoping → hope), undouble a
-double letter pair (hopping → hop), or change y to i (poppy →
-poppi, to match with poppies → poppi.) Steps 2 to 4 remove derivational
-suffixes. So
-ibility may reduce to ible in step 2, and ible itself may be removed in step
-4. Step 5 is for removing final e, and undoubling ll.
-
-
-
-A clear advantage of the Lovins stemmer over the Porter stemmer is speed.
-The Porter stemmer has five steps of suffix removal to the Lovins stemmer’s
-one. It is instructive therefore to try and cast the Porter stemmer into
-the shape of the Lovins stemmer, if only for the promise of certain speed
-advantages. As we will see, we learn a few other things from the exercise
-as well.
-
-
-
-First we need a list of endings. The Lovins endings were built up by hand,
-but we can construct a set of endings for the Porter stemmer by writing an
-ending generator that follows the algorithm definition. From an analysis of
-the suffixes in steps 2 to 4 of the Porter stemmer we can construct
-the following diagram:
-
-
-
-
-
-This is not meant to be a linguistic analysis of the suffix structure of
-English, but is merely intended to show how the system of endings works in
-the stemming algorithm. Suffixes combine if their boxes are connected by
-an arrow. So ful combines with ness to make fulness.
-
-
-
- ful + ness → fulness
-
-
-The combination is not always a concatenation of the strings
-however, for we have,
-
--
- able + ity → ability
- able + ly → ably
- ate + ion → ation
- ible + ity → ibility
- ible + ly → ibly
- ize + ate + ion → ization
-
-
-The path from ize to ion goes via ate, so we can form ization, but there is
-no suffix izate. Three of the suffixes, ator, ance and ence, do not connect
-into the rest of the diagram, and ance, ence also appear in the forms
-ancy, ency. The letter to the left of the box is going to be the
-condition for the
-removal of the suffix in the box, so
-
-
- B +-------+ n
- | ism |
- +-------+
-
-
-means that ism will be removed if it follows a stem that satisfies
-condition B. On the right of the box is either n, v or hyphen. n means the
-suffix is of noun type. So if a word ends ism it is a noun. v means verb
-type. hyphen means neither: ly (adverbial) and ful, ous (adjectival) are of
-this type. If a suffix is a noun type it can have a plural form (criticism,
-criticisms), so we have to generate isms as well as ism. Again, the
-combining is not just concatenation,
-
--
- ity + s → ities
- ness + s → nesses
-
-
-If a suffix has v type, it has s, ed and ing forms,
-
--
- ize + s → izes
- ize + ed → ized
- ize + ing → izing
-
-
-Type v therefore includes type n, and we should read this type as ‘verb or
-noun’, rather than just ‘verb’. For example, condition, with suffix ion, is
-both verb (‘They have been conditioned to behave like that’) and noun
-(‘It is subject to certain conditions’).
-
-
-
-The diagram is therefore a scheme for generating combined derivational
-suffixes, each combination possibly terminated with an inflectional suffix.
-A problem is that it contains a loop in
-
-
-
- ize → ate → ion → al → ize → ...
-
-
-suggesting suffixes of the form izationalizational... We break the loop by
-limiting the number of joined derivational suffixes of diagram 1 to four.
-(Behaviour of the Porter stemmer shows that removal of five combined
-derivation suffixes is never desirable, even supposing five ever combine.)
-We can then generate 181 endings, with their removal codes. But 75 of these
-suffixes do not occur as endings in V, and they can be eliminated as rare
-forms, leaving 106. Alphabetically, the endings begin,
-
--
- abilities ability able ables ably al alism
- (alisms) alities ality alization (alizationed)
- (alizationing) (alizations) alize alized (alizer)
- (alizered) (alizering) (alizers) (alizes) (alizing)
- ally alness (alnesses) als ance ances ancies
- ancy ...
-
-
-The eliminated rare forms are shown bracketed.
-
-
-
-The 106 endings are arranged in a file as a list of strings followed by
-condition letter,
-
-
- 'abilities' B
- 'ability' B
- 'able' B
- 'ables' B
- 'ably' B
- 'al' B
- ....
-
-
-This ending list is generated by running the ANSI C program shown in
-Appendix 4, and line-sorting the result into a file,
-and this file is called in by the [% highlight_inline('get') %] directive in the Snowball script of
-Appendix 2, which is the Porter stemming algorithm laid out in the style of
-the Lovins algorithm. In fact, precise equivalence cannot be achieved, but
-in V only 137 words stem differently, which is 0.4% of V. There are 10
-removal conditions, compared with Lovins’ 29, and 11 transformation or
-respelling rules, compared with Lovins’ 35. We can describe the process in
-Lovins style, once we have got over a few preliminaries.
-
-
-
-We have to distinguish y as a vowel from y as a consonant. We treat initial
-y, and y before vowel, as a consonant, and make it upper case. Thereafter
-a, e, i, o, u and y are vowels, and the other lower case letters and Y are
-consonants. If [C] stands for zero or more consonants, C for one or more
-consonants, and V for one or more vowels, then a stem of shape [C]VC has
-length 1s (1 syllable), of shape [C]VCVC length 2s, and so on.
-
-
-
-A stem ends with a short vowel if the ending has the form cvx, where c is a
-consonant, v a vowel, and x a consonant other than w, x or Y.
-(Short vowel endings with ed and ing imply loss of an e from
-the stem, as in removing = remove + ing.)
-
-
-
-Here are the removal conditions,
-
-
-
-
-A | | Minimum stem length = 1s
- |
B | | Minimum stem length = 2s
- |
C | | Minimum stem length = 2s and remove ending only after s or t
- |
D | | Minimum stem length = 2s and do not remove ending after m
- |
E | | Remove ending only after e or ous after minimum stem length 1s
- |
F | | Remove ending only after ss or i
- |
G | | Do not remove ending after s
- |
H | | Remove ending only if stem contains a vowel
- |
I | | Remove ending only if stem contains a vowel and does not end in e
- |
J | | Remove ending only after ee after minimum stem length 1s
- |
-
-
-
-In condition J the stem must end ee, and the part of the stem before the
-ee must have minimum length 1s. Condition E is similar.
-
-
-
-Here are the respelling rules, defined with the help of the removal
-conditions. In each case, the stem being tested does not include the string
-at the end which has been identified for respelling.
-
-
- 1) | | Remove e if A, or if B and the stem does not end with a short vowel
- |
2) | | Remove l if B and the stem ends with l
- |
3) | | enci/ency → enc if A, otherwise → enci
- |
4) | | anci/ancy → anc if A, otherwise → anci
- |
5) | | ally → al if A, otherwise → alli
- |
6) | | ently → ent if A, otherwise → entli
- |
7) | | ator → at if A
- |
8) | | logi/logy → log if A, otherwise → log
- |
9) | | bli/bly → bl if A, otherwise → bli
- |
10) | | bil → bl if stem ends vowel after A
- |
11) | | y/Y → i if stem contains a vowel
- |
-
-The 106 endings are distributed among conditions A to E as A(5), B(87),
-C(8), D(3) and E(1). F to J deal with the purely inflectional endings: F
-with es, G with s, H with ing and ings, I with ed and J with d.
-There is however one point at which the Lovins structure breaks down, in that
-removal of ed and ing(s) after conditions I and H requires a special
-adjustment that cannot be left to a separate transformation rule. It is to
-undouble the last letter, and to restore a final e if the stem has length 1s
-and ends with a short vowel (so shopping loses a p and becomes shop,
-sloping gains an e and becomes slope.)
-
-
-
-The Porter stemmer cast into this form runs significantly faster than the
-multi-stage stemmer — about twice as fast in tests with Snowball.
-
-
-
-We will call the Porter stemmer P, the Lovins stemmer L, and this Lovins
-version of the Porter stemmer LP. As we have said, P and LP are not identical,
-but stem 137 of the 29,401 words of V differently.
-
-
-
-A major cause of difference is unexpected suffix combinations. These can be
-subdivided into combinations of what seem to be suffixes but are not, and
-rare combinations of valid suffixes.
-
-
-
-The first case is illustrated by the word disenchanted. P stems this to
-disench, first taking off suffix ed, and then removing ant, which is
-a suffix in English, although not a suffix in this word. P also stems
-disenchant to disench, so the two words disenchant and
-disenchanted are conflated by P, even though they make an error in the
-stemming process. But ant is a noun type suffix, and so does not combine
-with ed. anted is therefore omitted from the suffix list of LP, so LP
-stems disenchanted to disenchant, but disenchant to disench.
-
-
-
-This illustrates a frequently encountered problem in stemming. S1
-and S2 are suffixes of a language, but the combination
-S1S2 is
-not. A word has the form xS1, where x is some string, but in
-xS1, S1 is not actually a suffix, but part of the stem.
-S2 is a valid suffix for this word, so xS1S2 is
-another word in the language. An algorithmic stemmer stems xS1 to
-x in error. If presented with xS1S2 it can either
-(a) stem it to xS1, knowing S1 cannot be a suffix in
-this context, or (b) stem it to x, ignoring the knowledge to be
-derived from the presence of S2. (a) gives the correct stemming
-of at least xS1S2, although the stemming of xS1
-will be wrong, while (b) overstems both words, but at least achieves
-their conflation. In other words (a) fails to conflate the two forms, but
-may achieve correct conflations of xS1S2 with similar forms
-xS1S3, xS1S4 etc., while (b) conflates
-the two forms, but at the risk of additional false conflations. Often a study
-of the results of a stemming strategy on a sample vocabulary leads one to
-prefer approach (b) to (a) for certain classes of ending. This is
-true in particular of the inflectional endings of English, which is why the
-removals in step 1 of P are not remembered in some state variable, which
-records whether the ending just removed is verb-type, noun-or-verb-type etc.
-On balance you get better results by throwing that information away, and then
-the many word pairs on the pattern of disenchant / disenchanted will
-conflate together.
-
-
-
-Other examples from V can be given: in misrepresenting, ent is
-not a suffix, and enting not a valid suffix combination); in
-witnessed, ness is not a suffix, and nessed not a valid
-suffix combination.
-
-
-
-This highlights a disadvantage of stemmers that work with a fixed list of
-endings. To get the flexibility of context-free ending removal, we need to
-build in extra endings which are not grammatically correct (like anted =
-ant + ed), and this adds considerably to the burden of constructing
-the list. In fact L does not include anted, but it does include for
-example antic (ant + ic), which may be serving a similar
-purpose.
-
-
-
-For the second case, the rare combinations of valid suffixes, one may instance
-ableness. Here again the multi-step stemmer makes life easier. P removes
-ness in step 3 and able in step 4, but without making any necessary
-connection. L has ableness as an ending, dictionaries contain many
-ableness words, and it is an easy matter to make the connection across from
-able to ness in diagram 1 and generate extra endings. Nevertheless the
-ending is very rare in actual use. For example, Dickens’ Nicholas Nickleby
-contains no examples, Bleak House contains two, in the same sentence:
-
-
-
- I was sure you would feel it yourself and would excuse the
- reasonableness of MY feelings when coupled with the known
- excitableness of my little woman.
-
-
-
-reasonableness is perhaps the commonest word in English of this form, and
-excitableness (instead of excitability) is there for contrast. Thackeray’s
-Vanity Fair, a major source in testing out P and Porter2, contains one
-word of this form, charitableness. One may say of this word that it is
-inevitably rare, because it has no really distinct
-meaning from the simpler charity, but that it has to be formed by adding
-ableness rather than ability, because the repeated ity in charity +
-ability is morphologically unacceptable. Other rare combinations are
-ateness, entness
-and eds (as in intendeds and beloveds).
-fuls is another interesting case. The ful suffix, usually adjectival,
-can sometimes create nouns, giving plurals such as mouthfuls and
-spoonfuls. But in longer words sful is a more ‘elegant’ plural
-(handbagsful, dessertspoonsful).
-
-
-
-These account for most of the differences, but there are a few others.
-
-
-
-One is in forms like bricklayers → bricklai (P), bricklay (LP).
-Terminal y is usefully turned to i to help conflate words where y is changed
-to i and es added to form the plural, but this does not happen when
-y
-follows a vowel. LP improves on P here, but the Porter2 algorithm makes the
-same improvement, so we have nothing to learn.
-There is also a difference in words endings lle or lles,
-quadrille → quadril (P), quadrill (LP). This is because e and
-l
-removal are successive in step 5 of P, and done as alternatives in the
-respelling rules
-of LP. In LP this is not quite correct, since
-Lovins makes it clear that her transformation rules should be
-applied in succession. Even so, LP seems better than P, suggesting
-that step 5b of P (undouble l) should not have been attempted after e removal
-in step 5a. So here is a possible small improvement to Porter2. Another
-small, but quite interesting difference, is the condition attached to the
-ative ending. The ending generator makes B the removal condition by a
-natural process, but in P its removal condition is A. This goes back to step
-3 as originally presented in the paper of 1980:
-
-
-
- (m>0) ICATE → IC
- (m>0) ATIVE →
- (m>0) ALIZE → AL
- (m>0) ICITI → IC
- (m>0) ICAL → IC
- (m>0) FUL →
- (m>0) NESS →
-
-(m>0) corresponds to A. With removal condition B, the second line would be
-
--
- (m>1) ATIVE →
-
-
-which looks slightly incongruous. Nevertheless it is probably correct, because we
-remove a half suffix from icate, alize, icity and ical when the stem
-length is at least s1, and so we should remove the full ate + ive suffix when the stem
-length is at least s2. We should not be influenced by ful and ness.
-They are ‘native English’ stems, unlike the other five, which
-have a ‘Romance’ origin, and for these two condition A has been found to
-be more appropriate. In fact putting in this adjustment to Porter2 results in an
-improvement in the small class of words thereby affected.
-
-
-Conclusion
-
-
-You never learn all there is to know about a computer program, unless the
-program is really very simple. So even after 20 years of regular use,
-we can learn something new about P by creating LP and comparing the
-two. And in the process we learn a lot about L, the Lovins stemmer itself.
-
-
-
-The truth is that the main motivation for studying L was to see how well the
-Snowball system could be used for implementing and analyzing Lovins’
-original work, and the interest in what she had actually achieved in 1968
-only came later. I hope that this short account helps clarify her work, and
-place it the context of the development of stemmers since then.
-
-
-Notes
-
-
-The http addresses below have a ‘last visited’ date of December 2001.
-
-
-
-- The Lovins stemmer is available at
-
-
-
- - http://www.cs.waikato.ac.nz/~eibe/stemmers
- - http://sourceforge.net/projects/stemmers
-
-
-
-- See
http://www-uilots.let.uu.nl/~uplift/
-
-- See
http://snowball.sourceforge.net
-
-- See
http://promo.net/pg/
-
-- See
http://snowball.sourceforge.net/english/voc.txt
-
-- In looking at verbs with the pattern ride, rode, ridden, Palmer,
-1965, notes that ‘we should perhaps add STRIDE, with past tense strode,
-but without a past participle (there is no *stridden).’
-
-- See
https://tartarus.org/~martin/PorterStemmer/
-
-- Lovins (1968), p. 25, mentions that a stemming algorithm developed by
- James L. Dolby in California used a two-syllable minimum stem length as a
- condition for most of the stemming.
-
-
-Bibiliography
-
-
-Andrews K (1971) The development of a fast conflation algorithm for English.
-Dissertation for the Diploma in Computer Science, Computer Laboratory,
-University of Cambridge.
-
-
-
-Harman D (1991) How effective is suffixing? Journal of the American
-Society for Information Science, 42: 7-15.
-
-
-
-Kraaij W and Pohlmann R (1994) Porter’s stemming algorithm for Dutch. In
-Noordman LGM and de Vroomen WAM, eds. Informatiewetenschap 1994:
-Wetenschappelijke bijdragen aan de derde STINFON Conferentie, Tilburg,
-1994. pp. 167-180.
-
-
-
-Kraaij W and Pohlmann R (1995) Evaluation of a Dutch stemming algorithm.
-Rowley J, ed. The New Review of Document and Text Management, volume 1,
-Taylor Graham, London, 1995. pp. 25-43,
-
-
-
-Krovetz B (1995) Word sense disambiguation for large text databases. PhD
-Thesis. Department of Computer Science, University of Massachusetts
-Amherst.
-
-
-
-Lennon M, Pierce DS, Tarry BD and Willett P (1981) An evaluation of some
-conflation algorithms for information retrieval. Journal of Information
-Science, 3: 177-183.
-
-
-
-Lovins JB (1968) Development of a stemming algorithm. Mechanical
-Translation and Computational Linguistics, 11: 22-31.
-
-
-
-Overhage, CFJ (1966) Plans for project Intrex. Science, 152:
-1032-1037.
-
-
-
-Palmer FR (1965) A linguistic study of the English verb. London,
-Longmans.
-
-
-
-Porter MF (1980) An algorithm for suffix stripping. Program, 14:
-130-137.
-
-
-Appendix 1
-
-
-The Lovins stemmer in Snowball.
-
-
-[% highlight_file('lovins') %]
-
-Appendix 2
-
-
-The Porter stemmer, cast, as far as is possible, into Lovins form.
-
-
-[% highlight_file('porter_recast_as_lovins') %]
-
-Appendix 3
-
-
-The list of 181 endings included by the [% highlight_inline('get') %] directive in the program
-of Appendix 2. The numbers to the right show their frequency of occurrence
-in the sample vocabulary. The 75 rare endings are shown commented out.
-
-
-[% highlight("
- 'abilities' B /* (3) */
- 'ability' B /* (14) */
- 'able' B /* (293) */
- 'ables' B /* (4) */
- 'ably' B /* (68) */
- 'al' B /* (285) */
- 'alism' B /* (5) */
-// 'alisms' B /* (-) */
- 'alities' B /* (7) */
- 'ality' B /* (24) */
- 'alization' B /* (1) */
-// 'alizationed' B /* (-) */
-// 'alizationing' B /* (-) */
-// 'alizations' B /* (-) */
- 'alize' B /* (2) */
- 'alized' B /* (4) */
-// 'alizer' B /* (-) */
-// 'alizered' B /* (-) */
-// 'alizering' B /* (-) */
-// 'alizers' B /* (-) */
-// 'alizes' B /* (-) */
-// 'alizing' B /* (-) */
- 'ally' B /* (78) */
- 'alness' B /* (2) */
-// 'alnesses' B /* (-) */
- 'als' B /* (46) */
- 'ance' B /* (93) */
- 'ances' B /* (30) */
- 'ancies' B /* (2) */
- 'ancy' B /* (18) */
- 'ant' B /* (92) */
- 'ants' B /* (29) */
- 'ate' B /* (261) */
- 'ated' B /* (208) */
- 'ately' B /* (38) */
- 'ates' B /* (73) */
- 'ating' B /* (119) */
- 'ation' B /* (356) */
- 'ational' B /* (4) */
-// 'ationalism' B /* (-) */
-// 'ationalisms' B /* (-) */
-// 'ationalities' B /* (-) */
-// 'ationality' B /* (-) */
-// 'ationalize' B /* (-) */
-// 'ationalized' B /* (-) */
-// 'ationalizes' B /* (-) */
-// 'ationalizing' B /* (-) */
- 'ationally' B /* (2) */
-// 'ationalness' B /* (-) */
-// 'ationalnesses' B /* (-) */
-// 'ationals' B /* (-) */
-// 'ationed' B /* (-) */
-// 'ationing' B /* (-) */
- 'ations' B /* (139) */
- 'ative' B /* (40) */
- 'atively' B /* (4) */
-// 'ativeness' B /* (-) */
-// 'ativenesses' B /* (-) */
- 'atives' B /* (7) */
-// 'ativities' B /* (-) */
-// 'ativity' B /* (-) */
- 'ator' B /* (25) */
- 'ators' B /* (10) */
- 'ement' B /* (70) */
-// 'emently' B /* (-) */
- 'ements' B /* (31) */
- 'ence' B /* (100) */
- 'ences' B /* (25) */
- 'encies' B /* (9) */
- 'ency' B /* (41) */
- 'ent' D /* (154) */
- 'ently' D /* (53) */
- 'ents' D /* (25) */
- 'er' B /* (613) */
- 'ered' B /* (44) */
- 'ering' B /* (31) */
- 'ers' B /* (281) */
- 'ful' A /* (163) */
- 'fulness' A /* (31) */
-// 'fulnesses' A /* (-) */
- 'fuls' A /* (5) */
- 'ibilities' B /* (2) */
- 'ibility' B /* (10) */
- 'ible' B /* (53) */
- 'ibles' B /* (2) */
- 'ibly' B /* (14) */
- 'ic' B /* (142) */
- 'ical' B /* (91) */
-// 'icalism' B /* (-) */
-// 'icalisms' B /* (-) */
-// 'icalities' B /* (-) */
- 'icality' B /* (1) */
-// 'icalize' B /* (-) */
-// 'icalized' B /* (-) */
-// 'icalizer' B /* (-) */
-// 'icalizered' B /* (-) */
-// 'icalizering' B /* (-) */
-// 'icalizers' B /* (-) */
-// 'icalizes' B /* (-) */
-// 'icalizing' B /* (-) */
- 'ically' B /* (59) */
-// 'icalness' B /* (-) */
-// 'icalnesses' B /* (-) */
- 'icals' B /* (2) */
- 'icate' B /* (9) */
- 'icated' B /* (7) */
-// 'icately' B /* (-) */
- 'icates' B /* (4) */
- 'icating' B /* (3) */
- 'ication' B /* (23) */
-// 'icational' B /* (-) */
-// 'icationals' B /* (-) */
-// 'icationed' B /* (-) */
-// 'icationing' B /* (-) */
- 'ications' B /* (8) */
- 'icative' B /* (2) */
-// 'icatively' B /* (-) */
-// 'icativeness' B /* (-) */
-// 'icativenesses' B /* (-) */
-// 'icatives' B /* (-) */
-// 'icativities' B /* (-) */
-// 'icativity' B /* (-) */
- 'icities' B /* (1) */
- 'icity' B /* (5) */
- 'ics' B /* (21) */
- 'ion' C /* (383) */
- 'ional' C /* (18) */
-// 'ionalism' C /* (-) */
-// 'ionalisms' C /* (-) */
- 'ionalities' C /* (1) */
- 'ionality' C /* (1) */
-// 'ionalize' C /* (-) */
-// 'ionalized' C /* (-) */
-// 'ionalizer' C /* (-) */
-// 'ionalizered' C /* (-) */
-// 'ionalizering' C /* (-) */
-// 'ionalizers' C /* (-) */
-// 'ionalizes' C /* (-) */
-// 'ionalizing' C /* (-) */
- 'ionally' C /* (12) */
- 'ionalness' C /* (1) */
-// 'ionalnesses' C /* (-) */
- 'ionals' C /* (1) */
- 'ioned' C /* (13) */
- 'ioning' C /* (3) */
- 'ions' C /* (192) */
- 'ism' B /* (33) */
- 'isms' B /* (5) */
- 'ities' B /* (62) */
- 'ity' B /* (236) */
- 'ive' B /* (132) */
- 'ively' B /* (34) */
- 'iveness' B /* (14) */
-// 'ivenesses' B /* (-) */
- 'ives' B /* (12) */
-// 'ivities' B /* (-) */
- 'ivity' B /* (1) */
- 'ization' B /* (4) */
-// 'izational' B /* (-) */
-// 'izationals' B /* (-) */
-// 'izationed' B /* (-) */
-// 'izationing' B /* (-) */
- 'izations' B /* (1) */
- 'ize' B /* (32) */
- 'ized' B /* (32) */
- 'izer' B /* (3) */
-// 'izered' B /* (-) */
-// 'izering' B /* (-) */
- 'izers' B /* (1) */
- 'izes' B /* (6) */
- 'izing' B /* (30) */
- 'ly' E /* (135) */
- 'ment' B /* (105) */
-// 'mently' B /* (-) */
- 'ments' B /* (50) */
- 'ness' A /* (428) */
- 'nesses' A /* (21) */
- 'ous' B /* (340) */
- 'ously' B /* (130) */
- 'ousness' B /* (22) */
-// 'ousnesses' B /* (-) */
-") %]
-
-Appendix 4
-
-
-An ANSI C program which will generate on stdout
the raw ending list
-(endings with condition letters) from which the list of Appendix 3 is
-constructed.
-
-
-[% highlight_file('porter_recast_as_lovins_generator.c') %]
-
-[% footer %]
diff --git a/algorithms/lovins/stemmer.tt b/algorithms/lovins/stemmer.tt
deleted file mode 100644
index 9de41fb..0000000
--- a/algorithms/lovins/stemmer.tt
+++ /dev/null
@@ -1,332 +0,0 @@
-[% header('The Lovins stemming algorithm') %]
-
-Links to resources
-
-
-
-
-The first ever published stemming algorithm was: Lovins JB (1968) Development of
-a stemming algorithm. Mechanical Translation and Computational Linguistics,
-11: 22-31. Julie Beth Lovins’ paper was remarkable for the early date at which
-it was done, and for its seminal influence on later work in
-this area.
-
-
-
-The design of the algorithm was much influenced by the technical vocabulary
-with which Lovins found herself working (subject term keywords attached to
-documents in the materials science and engineering field). The subject term
-list may also have been slightly limiting in that certain common endings
-are not represented (ements and ents for example, corresponding to
-the singular forms ement and ent), and also in that the algorithm's
-treatment of short words, or words with short stems, can be rather
-destructive.
-
-
-
-The Lovins algorithm is noticeably bigger than the Porter algorithm,
-because of its very extensive endings list. But in one way that is used to
-advantage: it is faster. It has effectively traded space for time, and with
-its large suffix set it needs just two major steps to remove a suffix,
-compared with the eight of the Porter algorithm.
-
-
-
-transformation rules. Each ending is associated with one of the
-conditions. In the first step the longest ending is found which satisfies
-its associated condition, and is removed. In the second step the 35 rules
-are applied to transform the ending. The second step is done whether or not
-an ending is removed in the first step.
-
-
-
-For example, nationally has the ending ationally, with associated
-condition, B, ‘minimum stem length = 3’. Since removing ationally
-would leave a stem of length 1 this is rejected. But it also has ending
-ionally with associated condition A. Condition A is ‘no restriction on
-stem length’, so ionally is removed, leaving nat.
-
-
-
-The transformation rules handle features like letter undoubling (sitting
-→ sitt → sit), irregular plurals (matrix and matrices),
-and English morphological oddities ultimately caused by the behaviour of
-Latin verbs of the second conjugation (assume / assumption,
-commit / commission etc). Although they are described as being
-applied in turn, they can be broken into two stages, rule 1 being done in
-stage 1, and either zero or one of rules 2 to 35 being done in stage 2.
-
-
-
-Here is the list of endings as given in Appendix A of Lovins’ paper. They
-are grouped by length, from 11 characters down to 1. Each ending is
-followed by its condition code.
-
-
-
-
Appendix A. The list of endings
-
-
- |
- | | .11. |
- | | alistically B | | arizability A | | izationally B |
- |
- | | .10. |
- | | antialness A | | arisations A | | arizations A | | entialness A |
- |
- | | .09. |
- | | allically C | | antaneous A | | antiality A | | arisation A |
- | | arization A | | ationally B | | ativeness A | | eableness E |
- | | entations A | | entiality A | | entialize A | | entiation A |
- | | ionalness A | | istically A | | itousness A | | izability A |
- | | izational A |
- |
- | | .08. |
- | | ableness A | | arizable A | | entation A | | entially A |
- | | eousness A | | ibleness A | | icalness A | | ionalism A |
- | | ionality A | | ionalize A | | iousness A | | izations A |
- | | lessness A |
- |
- | | .07. |
- | | ability A | | aically A | | alistic B | | alities A |
- | | ariness E | | aristic A | | arizing A | | ateness A |
- | | atingly A | | ational B | | atively A | | ativism A |
- | | elihood E | | encible A | | entally A | | entials A |
- | | entiate A | | entness A | | fulness A | | ibility A |
- | | icalism A | | icalist A | | icality A | | icalize A |
- | | ication G | | icianry A | | ination A | | ingness A |
- | | ionally A | | isation A | | ishness A | | istical A |
- | | iteness A | | iveness A | | ivistic A | | ivities A |
- | | ization F | | izement A | | oidally A | | ousness A |
- |
- | | .06. |
- | | aceous A | | acious B | | action G | | alness A |
- | | ancial A | | ancies A | | ancing B | | ariser A |
- | | arized A | | arizer A | | atable A | | ations B |
- | | atives A | | eature Z | | efully A | | encies A |
- | | encing A | | ential A | | enting C | | entist A |
- | | eously A | | ialist A | | iality A | | ialize A |
- | | ically A | | icance A | | icians A | | icists A |
- | | ifully A | | ionals A | | ionate D | | ioning A |
- | | ionist A | | iously A | | istics A | | izable E |
- | | lessly A | | nesses A | | oidism A |
- |
- | | .05. |
- | | acies A | | acity A | | aging B | | aical A |
- | | alist A | | alism B | | ality A | | alize A |
- | | allic BB | | anced B | | ances B | | antic C |
- | | arial A | | aries A | | arily A | | arity B |
- | | arize A | | aroid A | | ately A | | ating I |
- | | ation B | | ative A | | ators A | | atory A |
- | | ature E | | early Y | | ehood A | | eless A |
- | | elity A | | ement A | | enced A | | ences A |
- | | eness E | | ening E | | ental A | | ented C |
- | | ently A | | fully A | | ially A | | icant A |
- | | ician A | | icide A | | icism A | | icist A |
- | | icity A | | idine I | | iedly A | | ihood A |
- | | inate A | | iness A | | ingly B | | inism J |
- | | inity CC | | ional A | | ioned A | | ished A |
- | | istic A | | ities A | | itous A | | ively A |
- | | ivity A | | izers F | | izing F | | oidal A |
- | | oides A | | otide A | | ously A |
- |
- | | .04. |
- | | able A | | ably A | | ages B | | ally B |
- | | ance B | | ancy B | | ants B | | aric A |
- | | arly K | | ated I | | ates A | | atic B |
- | | ator A | | ealy Y | | edly E | | eful A |
- | | eity A | | ence A | | ency A | | ened E |
- | | enly E | | eous A | | hood A | | ials A |
- | | ians A | | ible A | | ibly A | | ical A |
- | | ides L | | iers A | | iful A | | ines M |
- | | ings N | | ions B | | ious A | | isms B |
- | | ists A | | itic H | | ized F | | izer F |
- | | less A | | lily A | | ness A | | ogen A |
- | | ward A | | wise A | | ying B | | yish A |
- |
- | | .03. |
- | | acy A | | age B | | aic A | | als BB |
- | | ant B | | ars O | | ary F | | ata A |
- | | ate A | | eal Y | | ear Y | | ely E |
- | | ene E | | ent C | | ery E | | ese A |
- | | ful A | | ial A | | ian A | | ics A |
- | | ide L | | ied A | | ier A | | ies P |
- | | ily A | | ine M | | ing N | | ion Q |
- | | ish C | | ism B | | ist A | | ite AA |
- | | ity A | | ium A | | ive A | | ize F |
- | | oid A | | one R | | ous A |
- |
- | | .02. |
- | | ae A | | al BB | | ar X | | as B |
- | | ed E | | en F | | es E | | ia A |
- | | ic A | | is A | | ly B | | on S |
- | | or T | | um U | | us V | | yl R |
- | | s' A | | 's A |
- |
- | | .01. |
- | | a A | | e A | | i A | | o A |
- | | s W | | y B |
-
-
-
-
-Here are the 29 conditions, called A to Z, AA, BB and CC (* stands for any letter):
-
-
-
-
Appendix B. Codes for context-sensitive rules associated with
-certain endings
-
-
-
-
- | | A | | No restrictions on stem |
- | | B | | Minimum stem length = 3 |
- | | C | | Minimum stem length = 4 |
- | | D | | Minimum stem length = 5 |
- | | E | | Do not remove ending after e |
- | | F | | Minimum stem length = 3 and do not remove ending after e |
- | | G | | Minimum stem length = 3 and remove ending only after f |
- | | H | | Remove ending only after t or ll |
- | | I | | Do not remove ending after o or e |
- | | J | | Do not remove ending after a or e |
- | | K | | Minimum stem length = 3 and remove ending only after l, i or u*e |
- | | L | | Do not remove ending after u, x or s, unless s follows o |
- | | M | | Do not remove ending after a, c, e or m |
- | | N | | Minimum stem length = 4 after s**, elsewhere = 3 |
- | | O | | Remove ending only after l or i |
- | | P | | Do not remove ending after c |
- | | Q | | Minimum stem length = 3 and do not remove ending after l or n |
- | | R | | Remove ending only after n or r |
- | | S | | Remove ending only after dr or t, unless t follows t |
- | | T | | Remove ending only after s or t, unless t follows o |
- | | U | | Remove ending only after l, m, n or r |
- | | V | | Remove ending only after c |
- | | W | | Do not remove ending after s or u |
- | | X | | Remove ending only after l, i or u*e |
- | | Y | | Remove ending only after in |
- | | Z | | Do not remove ending after f |
- | | AA | | Remove ending only after d, f, ph, th, l, er, or, es or t |
- | | BB | | Minimum stem length = 3 and do not remove ending after met or ryst |
- | | CC | | Remove ending only after l |
-
-
-
-
-There is an implicit assumption in each condition, A included, that the minimum
-stem length is 2.
-
-
-
-Finally, here are the 35 transformation rules.
-
-
-
-
Appendix C. Transformation rules used in recoding stem terminations
-
-
-
-
- | | 1 | | remove one of double b, d, g, l, m, n, p, r, s, t |
- | | 2 | | iev → ief |
- | | 3 | | uct → uc |
- | | 4 | | umpt → um |
- | | 5 | | rpt → rb |
- | | 6 | | urs → ur |
- | | 7 | | istr → ister |
- | | 7a | | metr → meter |
- | | 8 | | olv → olut |
- | | 9 | | ul → l except following a, o, i |
- | | 10 | | bex → bic |
- | | 11 | | dex → dic |
- | | 12 | | pex → pic |
- | | 13 | | tex → tic |
- | | 14 | | ax → ac |
- | | 15 | | ex → ec |
- | | 16 | | ix → ic |
- | | 17 | | lux → luc |
- | | 18 | | uad → uas |
- | | 19 | | vad → vas |
- | | 20 | | cid → cis |
- | | 21 | | lid → lis |
- | | 22 | | erid → eris |
- | | 23 | | pand → pans |
- | | 24 | | end → ens except following s |
- | | 25 | | ond → ons |
- | | 26 | | lud → lus |
- | | 27 | | rud → rus |
- | | 28 | | her → hes except following p, t |
- | | 29 | | mit → mis |
- | | 30 | | ent → ens except following m |
- | | 31 | | ert → ers |
- | | 32 | | et → es except following n |
- | | 33 | | yt → ys |
- | | 34 | | yz → ys |
-
-
-
-
-(Rule 30 as given here corrects a typographical error in the published
-paper of 1968.)
-
-
-
-The following examples show the intentions behind these rules.
-
-
--
-
- | | 1 | | rubb[ing] → rub, embedd[ed] → embed etc |
- | | 2 | | believ[e] → belief |
- | | 3 | | induct[ion] → induc[e] |
- | | 4 | | consumpt[ion] → consum[e] |
- | | 5 | | absorpt[ion] → absorb |
- | | 6 | | recurs[ive] → recur |
- | | 7 | | administr[ate] → administ[er] |
- | | 7a | | parametr[ic] → paramet[er] |
- | | 8 | | dissolv[ed] → dissolut[ion] |
- | | 9 | | angul[ar] → angl[e] |
- | | 10 | | vibex → vibic[es] |
- | | 11 | | index → indic[es] |
- | | 12 | | apex → apic[es] |
- | | 13 | | cortex → cortic[al] |
- | | 14 | | anthrax → anthrac[ite] |
- | | 15 | | ? |
- | | 16 | | matrix → matric[es] |
- | | 17 | | ? |
- | | 18 | | persuad[e] → persuas[ion] |
- | | 19 | | evad[e] → evas[ion] |
- | | 20 | | decid[e] → decis[ion] |
- | | 21 | | elid[e] → elis[ion] |
- | | 22 | | derid[e] → deris[ion] |
- | | 23 | | expand → expans[ion] |
- | | 24 | | defend → defens[ive] |
- | | 25 | | respond → respons[ive] |
- | | 26 | | collud[e] → collus[ion] |
- | | 27 | | obtrud[e] → obtrus[ion] |
- | | 28 | | adher[e] → adhes[ion] |
- | | 29 | | remit → remis[s][ion] |
- | | 30 | | extent → extens[ion] |
- | | 31 | | convert[ed] → convers[ion] |
- | | 32 | | parenthet[ic] → parenthes[is] |
- | | 33 | | analyt[ic] → analys[is] |
- | | 34 | | analyz[ed] → analys[ed] |
-
-
-
-The Lovins algorithm in Snowball
-
-
-And here is the Lovins algorithm in Snowball. The natural representation
-of the Lovins endings, conditions and rules in Snowball, is, I believe, a
-vindication of the appropriateness of Snowball for stemming work. Once the
-tables had been established, getting the Snowball version running was the
-work of a few minutes.
-
-
-[% highlight_file('lovins') %]
-
-[% footer %]
diff --git a/algorithms/norwegian/stemmer.tt b/algorithms/norwegian/stemmer.tt
deleted file mode 100644
index 30578d1..0000000
--- a/algorithms/norwegian/stemmer.tt
+++ /dev/null
@@ -1,109 +0,0 @@
-[% header('Norwegian stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'havnedistrikt', 'opning']) %]
-
-The stemming algorithm
-
-
-The Norwegian alphabet includes the following additional letters,
-
-
--
- æ å ø
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u y æ å ø
-
-
-
-R2 is not used: R1 is defined in the same way as in the
-German stemmer.
-(See the note on R1 and R2.)
-
-
-
-Define a valid s-ending as one of
-
-
--
-b c d f g h j
-l m n o p r t v
-y z,
-
or k not preceded by a vowel.
-
-
-
-Do each of steps 1, 2 and 3.
-
-
-
-Step 1:
-
-
--
- Search for the longest among the following suffixes in R1, and
- perform the action indicated.
-
- - (a)
- a e ede ande ende ane ene hetene en
- heten ar er heter as es edes endes
- enes hetenes ens hetens ers ets et het
- ast
-
- delete
-
- (b)
- s
-
- delete if preceded by a valid s-ending
-
- (c)
- erte ert
-
- replace with er
-
-
- (Of course the letter of the valid s-ending is
- not necessarily in R1)
-
-
-
-
-Step 2:
-
-
--
-
- If the word ends dt or vt in R1, delete the t.
-
-
-
- (For example, meldt → meld, operativt → operativ)
-
-
-
-
-Step 3:
-
-
--
- Search for the longest among the following suffixes in R1, and if found,
- delete.
-
-
- leg eleg ig eig lig elig els
- lov elov slov hetslov
-
-
-
-The same algorithm in Snowball
-
-[% highlight_file('norwegian') %]
-
-[% footer %]
diff --git a/algorithms/porter/stemmer.tt b/algorithms/porter/stemmer.tt
deleted file mode 100644
index c345a88..0000000
--- a/algorithms/porter/stemmer.tt
+++ /dev/null
@@ -1,656 +0,0 @@
-[% header('The Porter stemming algorithm') %]
-
-Links to resources
-
-
-
-
-Here is a case study on how to code up a stemming algorithm in Snowball. First,
-the definition of the Porter stemmer, as it appeared in Program, Vol 14 no. 3 pp
-130-137, July 1980.
-
-
-
-
THE ALGORITHM
-
-
-A consonant in a word is a letter other than A, E, I, O or U, and other
-than Y preceded by a consonant. (The fact that the term ‘consonant’ is
-defined to some extent in terms of itself does not make it ambiguous.) So in
-TOY the consonants are T and Y, and in SYZYGY they are S, Z and G. If a
-letter is not a consonant it is a vowel.
-
-
-
-A consonant will be denoted by c, a vowel by v. A list ccc... of length
-greater than 0 will be denoted by C, and a list vvv... of length greater
-than 0 will be denoted by V. Any word, or part of a word, therefore has one
-of the four forms:
-
-
-
- - CVCV ... C
-
- CVCV ... V
-
- VCVC ... C
-
- VCVC ... V
-
-
-
-These may all be represented by the single form
-
-
-
-
- [C]VCVC ... [V]
-
-
-
-where the square brackets denote arbitrary presence of their contents.
-Using (VC)m to denote VC repeated m times, this may again be written as
-
-
-
-
- [C](VC)m[V].
-
-
-
-m will be called the measure of any word or word part when represented in
-this form. The case m = 0 covers the null word. Here are some examples:
-
-
-
- m=0 | | TR, EE, TREE, Y, BY.
- |
m=1 | | TROUBLE, OATS, TREES, IVY.
- |
m=2 | | TROUBLES, PRIVATE, OATEN, ORRERY.
- |
-
-
-The rules for removing a suffix will be given in the form
-
-
-
-
- (condition) S1 → S2
-
-
-
-This means that if a word ends with the suffix S1, and the stem before S1
-satisfies the given condition, S1 is replaced by S2. The condition is
-usually given in terms of m, e.g.
-
-
-
-
- (m > 1) EMENT →
-
-
-
-Here S1 is ‘EMENT’ and S2 is null. This would map REPLACEMENT to REPLAC,
-since REPLAC is a word part for which m = 2.
-
-
-
-The ‘condition’ part may also contain the following:
-
-
-
-*S | | - | | the stem ends with S (and similarly for the other letters).
-
- |
*v* | | - | | the stem contains a vowel.
-
- |
*d | | - | | the stem ends with a double consonant (e.g. -TT, -SS).
-
- |
*o | | - | | the stem ends cvc, where the second c is not W, X or Y (e.g.
- -WIL, -HOP).
- |
-
-
-And the condition part may also contain expressions with and, or and
-not, so that
-
-
-
-
- (m>1 and (*S or *T))
-
-
-
-tests for a stem with m>1 ending in S or T, while
-
-
-
-
- (*d and not (*L or *S or *Z))
-
-
-
-tests for a stem ending with a double consonant other than L, S or Z.
-Elaborate conditions like this are required only rarely.
-
-
-
-In a set of rules written beneath each other, only one is obeyed, and this
-will be the one with the longest matching S1 for the given word. For
-example, with
-
-
-
- SSES | | → | | SS
- |
IES | | → | | I
- |
SS | | → | | SS
- |
S | | → | |
- |
-
-
-(here the conditions are all null) CARESSES maps to CARESS since SSES is
-the longest match for S1. Equally CARESS maps to CARESS (S1=‘SS’) and CARES
-to CARE (S1=‘S’).
-
-
-
-In the rules below, examples of their application, successful or otherwise,
-are given on the right in lower case. The algorithm now follows:
-
-
-
-Step 1a
-
-
-
- SSES | | → | | SS | | | | caresses | | → | | caress
- |
IES | | → | | I | | | | ponies | | → | | poni
- |
| | | | | | | | ties | | → | | ti
- |
SS | | → | | SS | | | | caress | | → | | caress
- |
S | | → | | | | | | cats | | → | | cat
- |
-
-
-Step 1b
-
-
-
- (m>0) EED | | → | | EE | | | | feed | | → | | feed
- |
| | | | | | | | agreed | | → | | agree
- |
(*v*) ED | | → | | | | | | plastered | | → | | plaster
- |
| | | | | | | | bled | | → | | bled
- |
(*v*) ING | | → | | | | | | motoring | | → | | motor
- |
| | | | | | | | sing | | → | | sing
- |
-
-
-If the second or third of the rules in Step 1b is successful, the following
-is done:
-
-
-
- AT | | → | | ATE | | | | conflat(ed) | | → | | conflate
- |
BL | | → | | BLE | | | | troubl(ed) | | → | | trouble
- |
IZ | | → | | IZE | | | | siz(ed) | | → | | size
- |
(*d and not (*L or *S or *Z))
- | | → | | single letter | | | | hopp(ing) | | → | | hop
- |
| | | | | | | | tann(ed) | | → | | tan
- |
| | | | | | | | fall(ing) | | → | | fall
- |
| | | | | | | | hiss(ing) | | → | | hiss
- |
| | | | | | | | fizz(ed) | | → | | fizz
- |
(m=1 and *o)
- | | → | | E | | | | fail(ing) | | → | | fail
- |
| | | | | | | | fil(ing) | | → | | file
- |
-
-
-The rule to map to a single letter causes the removal of one of the double
-letter pair. The -E is put back on -AT, -BL and -IZ, so that the suffixes
--ATE, -BLE and -IZE can be recognised later. This E may be removed in step
-4.
-
-
-
-Step 1c
-
-
-
- (*v*) Y | | → | | I | | | | happy | | → | | happi
- |
| | | | | | | | sky | | → | | sky
- |
-
-
-Step 1 deals with plurals and past participles. The subsequent steps are
-much more straightforward.
-
-
-
-Step 2
-
-
-
- (m>0) ATIONAL | | → | | ATE | | | | relational | | → | | relate
- |
(m>0) TIONAL | | → | | TION | | | | conditional | | → | | condition
- |
| | | | | | | | rational | | → | | rational
- |
(m>0) ENCI | | → | | ENCE | | | | valenci | | → | | valence
- |
(m>0) ANCI | | → | | ANCE | | | | hesitanci | | → | | hesitance
- |
(m>0) IZER | | → | | IZE | | | | digitizer | | → | | digitize
- |
(m>0) ABLI | | → | | ABLE | | | | conformabli | | → | | conformable
- |
(m>0) ALLI | | → | | AL | | | | radicalli | | → | | radical
- |
(m>0) ENTLI | | → | | ENT | | | | differentli | | → | | different
- |
(m>0) ELI | | → | | E | | | | vileli | | → | | vile
- |
(m>0) OUSLI | | → | | OUS | | | | analogousli | | → | | analogous
- |
(m>0) IZATION | | → | | IZE | | | | vietnamization | | → | | vietnamize
- |
(m>0) ATION | | → | | ATE | | | | predication | | → | | predicate
- |
(m>0) ATOR | | → | | ATE | | | | operator | | → | | operate
- |
(m>0) ALISM | | → | | AL | | | | feudalism | | → | | feudal
- |
(m>0) IVENESS | | → | | IVE | | | | decisiveness | | → | | decisive
- |
(m>0) FULNESS | | → | | FUL | | | | hopefulness | | → | | hopeful
- |
(m>0) OUSNESS | | → | | OUS | | | | callousness | | → | | callous
- |
(m>0) ALITI | | → | | AL | | | | formaliti | | → | | formal
- |
(m>0) IVITI | | → | | IVE | | | | sensitiviti | | → | | sensitive
- |
(m>0) BILITI | | → | | BLE | | | | sensibiliti | | → | | sensible
- |
-
-
-The test for the string S1 can be made fast by doing a program switch on
-the penultimate letter of the word being tested. This gives a fairly even
-breakdown of the possible values of the string S1. It will be seen in fact
-that the S1-strings in step 2 are presented here in the alphabetical order
-of their penultimate letter. Similar techniques may be applied in the other
-steps.
-
-
-
-Step 3
-
-
-
- (m>0) ICATE | | → | | IC | | | | triplicate | | → | | triplic
- |
(m>0) ATIVE | | → | | | | | | formative | | → | | form
- |
(m>0) ALIZE | | → | | AL | | | | formalize | | → | | formal
- |
(m>0) ICITI | | → | | IC | | | | electriciti | | → | | electric
- |
(m>0) ICAL | | → | | IC | | | | electrical | | → | | electric
- |
(m>0) FUL | | → | | | | | | hopeful | | → | | hope
- |
(m>0) NESS | | → | | | | | | goodness | | → | | good
- |
-
-
-Step 4
-
-
-
- (m>1) AL | | → | | | | | | revival | | → | | reviv
- |
(m>1) ANCE | | → | | | | | | allowance | | → | | allow
- |
(m>1) ENCE | | → | | | | | | inference | | → | | infer
- |
(m>1) ER | | → | | | | | | airliner | | → | | airlin
- |
(m>1) IC | | → | | | | | | gyroscopic | | → | | gyroscop
- |
(m>1) ABLE | | → | | | | | | adjustable | | → | | adjust
- |
(m>1) IBLE | | → | | | | | | defensible | | → | | defens
- |
(m>1) ANT | | → | | | | | | irritant | | → | | irrit
- |
(m>1) EMENT | | → | | | | | | replacement | | → | | replac
- |
(m>1) MENT | | → | | | | | | adjustment | | → | | adjust
- |
(m>1) ENT | | → | | | | | | dependent | | → | | depend
- |
(m>1 and (*S or *T)) ION
- | | → | | | | | | adoption | | → | | adopt
- |
(m>1) OU | | → | | | | | | homologou | | → | | homolog
- |
(m>1) ISM | | → | | | | | | communism | | → | | commun
- |
(m>1) ATE | | → | | | | | | activate | | → | | activ
- |
(m>1) ITI | | → | | | | | | angulariti | | → | | angular
- |
(m>1) OUS | | → | | | | | | homologous | | → | | homolog
- |
(m>1) IVE | | → | | | | | | effective | | → | | effect
- |
(m>1) IZE | | → | | | | | | bowdlerize | | → | | bowdler
- |
-
-
-The suffixes are now removed. All that remains is a little tidying up.
-
-
-
-Step 5a
-
-
-
- (m>1) E | | → | | | | | | probate | | → | | probat
- |
| | | | | | | | rate | | → | | rate
- |
(m=1 and not *o) E
- | | → | | | | | | cease | | → | | ceas
- |
-
-
-Step 5b
-
-
-
- (m > 1 and *d and *L)
- | | → | | single letter | | | | controll | | → | | control
- |
| | | | | | | | roll | | → | | roll
- |
-
-
-
-Now, turning it into Snowball.
-
-
-
-The Porter stemmer makes a use of a measure, m, of the length of a word or
-word part. If C is a sequence of one or more consonants, and V a sequence
-of one or more vowels, any word part has the form
-
-
--
- [C](VC)m[V],
-
-
-
-which is to be read as an optional C, followed by m repetitions of VC,
-followed by an optional V. This defines m. So for crepuscular the
-measure would be 4.
-
-
-
- c r e p u s c u l a r
- | | | | |
- [C] V C V C V C V C
- 1 2 3 4
-
-
-
-Most of the rules for suffix removal involve leaving behind a stem whose
-measure exceeds some value, for example,
-
-
--
- (m > 0) eed → ee
-
-
-
-means ‘replace eed with ee if the stem before eed has measure
-m > 0’. Implementations of the Porter stemmer usually have a routine that
-computes m each time there is a possible candidate for removal.
-
-
-
-In fact the only tests on m in the Porter stemmer are m > 0, m > 1, and,
-at two interesting points, m = 1. This suggests that there are two
-critical positions in a word: the point at which, going from left to
-right, m > 0 becomes true, and then the point at which m > 1 becomes true.
-It turns out that m > 0 becomes true at the point after the first consonant
-following a vowel, and m > 1 becomes true at the point after the first
-consonant following a vowel following a consonant following a vowel.
-Calling these positions p1 and p2, we can determine them quite simply in
-Snowball:
-
-
-[% highlight("
- define v 'aeiouy'
-
- /* ... */
-
- do(
- gopast v gopast non-v setmark p1
- gopast v gopast non-v setmark p2
- )
-") %]
-
-
-The region to the right of p1 will be denoted by R1, the region to the
-right of p2 by R2:
-
-
-
- c r e p u s c u l a r
- | |
- p1 p2
- <--- R1 --->
- <-- R2 -->
-
-
-
-We can test for being in these regions with calls to R1
and R2
, defined by,
-
-
-[% highlight("
- define R1 as $p1 <= cursor
- define R2 as $p2 <= cursor
-") %]
-
-
-and using these tests instead of computing m is acceptable, so long as the
-stemming process never alters the p1 and p2 positions, which is indeed true
-in the Porter stemmer.
-
-
-
-A particularly interesting feature of the stemmers presented here is the
-common use they make of the positions p1 and p2. The details of marking
-p1
-and p2 vary between the languages because the definitions of vowel and
-consonant vary. For example, French i preceded and followed by vowel
-should be treated as a consonant (inquiétude); Portuguese (ã and õ
-should be treated as a vowel-consonant pair (São João). A third
-important position is pV, which tries to mark the position of the shortest
-acceptable verb stem. Its definition varies somewhat between languages.
-The Porter stemmer does not use a pV explicitly, but the idea appears when
-the verb endings ing and ed are removed only when preceded by a vowel.
-In English therefore pV would be defined as the position after the first
-vowel.
-
-
-
-The Porter stemmer is divided into five steps, step 1 is divided further
-into steps 1a, 1b and 1c, and step 5 into steps 5a and 5b. Step 1 removes
-the i-suffixes, and steps 2 to 4 the d-suffixes (*). Composite d-suffixes are
-reduced to single d-suffixes one at a time. So for example if a word ends
-icational, step 2 reduces it to icate and step 3 to ic. Three steps are
-sufficient for this process in English. Step 5 does some tidying up.
-
-
-
-One can see how easily the stemming rules translate into Snowball by
-comparing the definition of Step 1a from the 1980 paper,
-
-
-
- Step 1a:
- SSES → SS caresses → caress
- IES → I ponies → poni
- ties → ti
- SS → SS caress → caress
- S → cats → cat
-
-
-
-with its Snowball equivalent,
-
-
-[% highlight("
- define Step_1a as (
- [substring] among (
- 'sses' (<-'ss')
- 'ies' (<-'i')
- 'ss' ()
- 's' (delete)
- )
- )
-") %]
-
-
-The word to be stemmed is being scanned right to left from the end. The
-longest of 'sses'
, 'ies'
, 'ss'
or 's'
is searched for and defined as the
-slice. (If none are found, Step_1a signals f.) If 'sses'
is found, it is
-replaced by 'ss'
, and so on. Of course, replacing 'ss'
by 'ss'
is a dummy
-action, so we can write
-
-
-[% highlight("
- 'ss' ()
-") %]
-
-
-instead of
-
-
-[% highlight("
- 'ss' (<-'ss')
-") %]
-
-
-Remember that delete
just means <- ''
.
-
-
-
-The really tricky part of the whole algorithm is step 1b,
-which may be worth looking at in detail. Here it is, without the
-example words on the far right,
-
-
-
- Step 1b:
- (m > 0) EED → EE
- (*v*) ED →
- (*v*) ING →
-
- If the second or third of the rules in Step 1b is successful, the
- following is done:
-
- AT → ATE
- BL → BLE
- IZ → IZE
- (*d and not (*L or *S or *Z)) → single letter
- (m = 1 and *o) → E
-
-
-
-The first part of the rule means that eed maps to ee if eed is in R1
-(which is equivalent to m > 0), or ed and ing are removed if they are
-preceded by a vowel. In Snowball this is simply,
-
-
-[% highlight("
- define Step_1b as (
- [substring] among (
- 'eed' (R1 <-'ee')
- 'ed'
- 'ing' (test gopast v delete)
- )
- )
-") %]
-
-
-But this must be modified by the second part of the rule. *d indicates a
-test for double letter consonant — bb, dd etc. *L, *S, *Z are tests
-for l, s, z. *o is a short vowel test — it is matched by
-consonant-vowel-consonant, where the consonant on the right is not w, x
-or y. If the short vowel test is satisfied, m = 1 is equivalent to the
-cursor being at p1. So the second part of the rule means, map at, bl, iz
-to ate, ble, ize; map certain double letters to single letters; and
-add e after a short vowel in words of one syllable.
-
-
-
-We first need two extra groupings,
-
-
-[% highlight("
- define v 'aeiouy'
- define v_WXY v + 'wxY' // v with 'w', 'x' and 'y'-consonant
- define v_LSZ v + 'lsz' // v with 'l', 's', 'z'
-") %]
-
-
-and a test for a short vowel,
-
-
-[% highlight("
- define shortv as ( non-v_WXY v non-v )
-") %]
-
-
-(The v_WXY
test comes first because we are scanning backwards, from right to
-left.)
-
-
-
-The double to single letter map can be done as follows: first define the
-slice as the next non-v_LSZ
and copy it to a string, ch
, as a single
-character,
-
-
-[% highlight("
- strings ( ch )
-
- /* ... */
-
- [non-v_LSZ] ->ch
-") %]
-
-
-A further test, ch
, tests that the next letter of the string is the same
-as the one in ch
, and if this gives signal t, delete
deletes the slice,
-
-
-[% highlight("
- [non-v_LSZ] ->ch ch delete
-") %]
-
-
-Step_1b
can then be written like this,
-
-
-[% highlight("
- define Step_1b as (
- [substring] among (
- 'eed' (R1 <-'ee')
- 'ed'
- 'ing' (
- test gopast v delete
- (test among('at' 'bl' 'iz') <+ 'e')
- or
- ([non-v_LSZ]->ch ch delete)
- or
- (atmark p1 test shortv <+ 'e')
- )
- )
- )
-") %]
-
-
-But we can improve the appearance, and speed, of this by turning the
-second part of the rule into another among
command, noting that the only
-letters that need undoubling are b, d, f, g, m, n, p, r
-and t,
-
-
-[% highlight("
- define Step_1b as (
- [substring] among (
- 'eed' (R1 <-'ee')
- 'ed'
- 'ing' (
- test gopast v delete
- test substring among(
- 'at' 'bl' 'iz'
- (<+ 'e')
- 'bb' 'dd' 'ff' 'gg' 'mm' 'nn' 'pp' 'rr' 'tt'
- // ignoring double c, h, j, k, q, v, w, and x
- ([next] delete)
- '' (atmark p1 test shortv <+ 'e')
- )
- )
- )
- )
-") %]
-
-
-Note the null string in the second among
, which acts as a default case.
-
-
-
-The Porter stemmer in Snowball is given below. This is an exact
-implementation of the algorithm described in the 1980 paper, unlike the
-other implementations distributed by the author, which have, and have
-always had, three small points of difference (clearly indicated) from the
-original algorithm. Since all other implementations of the algorithm seen
-by the author are in some degree inexact, this may well be the first ever
-correct implementation.
-
-
-The full algorithm in Snowball
-
-[% highlight_file('porter') %]
-
-[% footer %]
diff --git a/algorithms/portuguese/stemmer.tt b/algorithms/portuguese/stemmer.tt
deleted file mode 100644
index 945a37e..0000000
--- a/algorithms/portuguese/stemmer.tt
+++ /dev/null
@@ -1,175 +0,0 @@
-[% header('Portuguese stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'boa', 'quiabo']) %]
-
-The stemming algorithm
-
-
-Letters in Portuguese include the following accented forms,
-
-
--
- á é í ó ú â ê ô ç ã õ ü
-
-The following letters are vowels:
--
- a e i o u á é í ó ú â ê ô
-
-And the two nasalised vowel forms,
--
- ã õ
-
-
-
-should be treated as a vowel followed by a consonant.
-
-
-
-ã and õ are therefore replaced by a~ and o~ in the word, where ~ is a
-separate character to be treated as a consonant. And then —
-
-
-
-R2
-(see the note on R1 and R2)
-and RV have the same definition as in the
- Spanish stemmer.
-
-
-
-Always do step 1.
-
-
-
-Step 1: Standard suffix removal
-
-
--
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
- - eza ezas ico ica icos icas ismo ismos
- ável ível ista istas oso osa
- osos osas amento amentos imento imentos
- adora ador aça~o adoras adores aço~es
- ante antes ância
-
- delete if in R2
-
- logia logias
-
- replace with log if in R2
-
- ução uções
-
- replace with u if in R2
-
- ência ências
-
- replace with ente if in R2
-
- amente
-
- delete if in R1
-
- if preceded by iv, delete if in R2 (and if further preceded by at,
- delete if in R2), otherwise,
-
- if preceded by os, ic or ad, delete if in R2
-
- mente
-
- delete if in R2
-
- if preceded by ante, avel or ível, delete if in R2
-
- idade idades
-
- delete if in R2
-
- if preceded by abil, ic or iv, delete if in R2
-
- iva ivo ivas ivos
-
- delete if in R2
-
- if preceded by at, delete if in R2
-
- ira iras
-
- replace with ir if in RV and preceded by e
-
-
-
-
-Do step 2 if no ending was removed by step 1.
-
-
-
-Step 2: Verb suffixes
-
-
--
- Search for the longest among the following suffixes in RV, and if found,
- delete.
-
-
- ada ida ia aria eria iria ará ara erá era irá ava asse esse
- isse aste este iste ei arei erei irei am iam ariam eriam iriam
- aram eram iram avam em arem erem irem assem essem issem ado ido
- ando endo indo ara~o era~o ira~o ar er ir as adas idas ias arias
- erias irias arás aras erás eras irás avas es ardes erdes
- irdes ares eres ires asses esses isses astes estes istes is ais
- eis íeis aríeis eríeis iríeis áreis areis éreis ereis
- íreis ireis ásseis ésseis ísseis áveis ados idos ámos
- amos íamos aríamos eríamos iríamos áramos éramos
- íramos ávamos emos aremos eremos iremos ássemos êssemos
- íssemos imos armos ermos irmos eu iu ou ira
- iras
-
-If the last step to be obeyed — either step 1 or 2 — altered the word,
-do step 3
-
-Step 3
--
- Delete suffix i if in RV and preceded by c
-
-
-
-Alternatively, if neither steps 1 nor 2 altered the word, do step 4
-
-
-
-Step 4: Residual suffix
-
-
--
- If the word ends with one of the suffixes
-
-
- os a i o á í ó
-
- in RV, delete it
-
-
-
-Always do step 5
-
-
-
-Step 5:
-
-
--
-
- If the word ends with one of
-
- -
- e é ê
-
-
- in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV,
- delete the u (or i).
-
-
-
- Or if the word ends ç remove the cedilla
-
-
-
-
-And finally:
-
-
--
- Turn a~, o~ back into ã, õ
-
-
-The same algorithm in Snowball
-
-[% highlight_file('portuguese') %]
-
-[% footer %]
diff --git a/algorithms/romance.tt b/algorithms/romance.tt
deleted file mode 100644
index 2c1d703..0000000
--- a/algorithms/romance.tt
+++ /dev/null
@@ -1,106 +0,0 @@
-[% header('Romance language stemmers') %]
-
-Links to resources
-
-
-
-
-The Romance languages have a wealth of different i-suffixes (*) among the verb
-forms, and relatively few for the other parts of speech. In addition to
-this, many verbs exhibit irregularities. Many also have short stems,
-leading to dangers of over-stemming. The verb, therefore, tends to
-dominate initial thinking about stemming in these languages.
-
-
-
-An algorithmic stemmer can usually reduce the multiple forms of a verb to at
-most two or three, and often just one. This is probably
-adequate for standard IR use, where the verb is used rather less than other
-parts of speech in short queries.
-
-
-
-In French the verb endings ent and ons cannot be removed without
-unacceptable overstemming. The ons form is rarer, but ent forms
-are quite common, and will appear regularly throughout a stemmed vocabulary.
-
-
-
-In Italian, the final vowel of nouns and adjectives indicates number and
-gender (amico is male friend, amica is female friend) and its removal is a
-necessary part of stemming, but the final vowel sometimes separates words
-of different meanings (banco is bench, banca is bank), which leads to some
-over-stemming.
-
-
-
-The d-suffixes of all four languages follow a similar pattern. They can be
-tabulated as follows,
-
-
-
-
- | | | | French | | Spanish | | Portug. | | Italian
- |
- |
noun | | ANCE | | ance | | anza | | eza | | anza
- |
adjective | | IC | | ique | | ico | | ico | | ico
- |
noun | | ISM | | isme | | ismo | | ismo | | ismo
- |
adjective | | ABLE | | able | | able | | ável | | abile
- |
adjective | | IBLE | | - | | ible | | ível | | ibile
- |
noun | | IST | | iste | | ista | | ista | | ista
- |
adjective | | OUS | | eux | | oso | | oso | | oso
- |
noun | | MENT | | ment | | amiento | | amento | | mente
- |
noun | | ATOR | | ateur | | ador | | ador | | attore
- |
noun | | ATRESS | | atrice | | - | | - | | atrice
- |
noun | | ATION | | ation | | ación | | ação | | azione
- |
noun | | LOGY | | logie | | logía | | logía | | logia
- |
noun | | USION | | usion | | ución | | ución | | uzione
- |
noun | | ENCE | | ence | | encia | | ência | | enza
- |
adjective | | ENT | | ent | | ente | | ente | | ente
-
- |
noun | | ANCE | | ance | | ancia | | ância | | anza
- |
noun | | ANT | | ant | | ante | | ante | | ante
-
- |
adverb | | LY | | (e)ment | | (a)mente | | (a)mente | |
-(a)mente
- |
noun | | ITY | | ité | | idad | | idade | | ità
- |
adjective | | IVE | | if | | ive | | ivo | | ivo
- |
verb | | ATE | | at | | at | | at | | at
- |
-
-
-Equivalent English forms are shown in upper case. In English, ATE is a valid ending, but
-in the Romance languages it only exists in combinations. The endings can appear in a
-number of styles. In Italian, oso can also be osa, osi or ose, French
-ique becomes ic in combinations.
-
-
-
-The important combining forms are summarised in the following picture:
-
-
-
-
-
-In English, ABLE combines with LY to form ABLY. So in French, for example,
-able combines with (e)ment to form ablement.
-In some languages particular combinations are rare. In Italian, for example,
-ANT + LY, which would be the ending antemente, is so rare that it does not
-figure in the stemming algorithm.
-According to the picture, we
-should encounter the forms ICATIVELY and ICATIVITY, and dictionaries
-instance a few English words with these endings (communicatively for
-example).
-But in practice three is the maximum number of derivational
-suffixes that one need consider in combination.
-
-
-[% footer %]
diff --git a/algorithms/romanian/stemmer.tt b/algorithms/romanian/stemmer.tt
deleted file mode 100644
index 3e70031..0000000
--- a/algorithms/romanian/stemmer.tt
+++ /dev/null
@@ -1,224 +0,0 @@
-[% header('Romanian stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'abruptă', 'ocol']) %]
-
-
-(For the background to this work, see the
-credits page. Following earlier misgivings on the wisdom
-of removing IST/ISM endings, in this stemmer they are now conflated to a single
-form. It can easily be modified it to bring it in line with the other Romance
-stemmers: see the internal comments marked ‘IST
’.
-
-
-
-It is assumed that hyphenated forms are split into separate words prior to
-stemming.)
-
-
-The stemming algorithm
-
-
-Letters in Romanian include the following accented forms,
-
-
--
- ă â î ș ț
-
-
-
-The following letters are vowels:
-
-
--
- a ă â e i î o u
-
-
-
-Before full Unicode support was widespread it was common to use ş and
-ţ (cedilla instead of comma-below) in Romanian text as these characters
-were more readily available in 8-bit character sets. The original version of
-this algorithm only recognised the cedilla forms, but the current version
-instead normalises the old forms as a first step: replace ş by
-ș and ţ by ț.
-
-
-
-Then, i and u between vowels are put into upper case
-(so that they are treated as consonants).
-
-
-
-R1, R2
-(see the note on R1 and R2)
-and RV then have the same definition as in the
- Spanish stemmer.
-
-
-
-Always do steps 0, 1, 2 and 4. (Step 3 is conditional on steps 1 and 2.)
-
-
-
-Step 0: Removal of plurals (and other simplifications)
-
-
--
- Search for the longest among the following suffixes, and, if
- it is in R1, perform the
- action indicated.
-
- - ul ului
-
- delete
-
- aua
-
- replace with a
-
- ea ele elor
-
- replace with e
-
- ii iua iei iile iilor ilor
-
- replace with i
-
- ile
-
- replace with i if not preceded by ab
-
- atei
-
- replace with at
-
- ație ația
-
- replace with ați
-
-
-
-
-Step 1: Reduction of combining suffixes
-
-
--
- Search for the longest among the following suffixes, and, if
- it is in R1, preform the replacement action indicated.
- Then repeat this step until no replacement occurs.
-
- - abilitate abilitati abilităi abilități
-
- replace with abil
-
- ibilitate
-
- replace with ibil
-
- ivitate ivitati ivităi ivități
-
- replace with iv
-
- icitate icitati icităi icități
- icator icatori
- iciv iciva icive icivi icivă
- ical icala icale icali icală
-
- replace with ic
-
- ativ ativa ative ativi ativă ațiune
- atoare ator atori
- ătoare ător ători
-
- replace with at
-
- itiv itiva itive itivi itivă ițiune
- itoare itor itori
-
- replace with it
-
-
-
-
-Step 2: Removal of ‘standard’ suffixes
-
-
--
- Search for the longest among the following suffixes, and, if
- it is in R2, perform the action indicated.
-
- - at ata ată ati ate
- ut uta ută uti ute
- it ita ită iti ite
- ic ica ice ici ică
- abil abila abile abili abilă
- ibil ibila ibile ibili ibilă
- oasa oasă oase os osi oși
- ant anta ante anti antă
- ator atori
- itate itati ităi ități
- iv iva ive ivi ivă
-
- delete
-
- iune iuni
-
- delete if preceded by ț, and replace the ț by t.
-
- ism isme
- ist ista iste isti istă iști
-
- replace with ist
-
-
-
-
-Do step 3 if no suffix was removed either by step 1 or step 2.
-
-
-
-Step 3: Removal of verb suffixes
-
-
--
- Search for the longest suffix in region RV among the following,
- and perform the action indicated.
-
- - are ere ire âre
- ind ând
- indu ându
- eze
- ească
- ez ezi ează esc ești
- ește
- ăsc ăști
- ăște
- am ai au
- eam eai ea eați eau
- iam iai ia iați iau
- ui
- ași arăm arăți ară
- uși urăm urăți ură
- iși irăm irăți iră
- âi âși ârăm ârăți âră
- asem aseși ase aserăm aserăți aseră
- isem iseși ise iserăm iserăți iseră
- âsem âseși âse âserăm âserăți âseră
- usem useși use userăm userăți useră
-
-
- delete if preceded in RV by a consonant or u
-
- ăm ați
- em eți
- im iți
- âm âți
- seși serăm serăți seră
- sei se
- sesem seseși sese seserăm seserăți seseră
-
- delete
-
-
-
-
-Step 4: Removal of final vowel
-
-
--
-Search for the longest among the suffixes
-
-
- a e i ie ă
-
-and, if it is in RV, delete it.
-
-
-
-And finally:
-
-
--
- Turn I, U back into i, u.
-
-
-The same algorithm in Snowball
-
-[% highlight_file('romanian') %]
-
-[% footer %]
diff --git a/algorithms/russian/stemmer.tt b/algorithms/russian/stemmer.tt
deleted file mode 100644
index 9e7b408..0000000
--- a/algorithms/russian/stemmer.tt
+++ /dev/null
@@ -1,327 +0,0 @@
-[% header('Russian stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('russian', 'Russian') %]
-
-
-
-The Snowball stemmer represents the Cyrillic alphabet with ASCII characters,
-following the standard Library of Congress transliteration scheme.
-
-
-
-[% algorithm_vocab([60, 'в', 'п']) %]
-
-The stemming algorithm
-
-
-i-suffixes (*) of Russian tend to be quite regular, with irregularities of
-declension involving a change to the stem. Irregular forms therefore
-usually just generate two or more possible stems. Stems in Russian can
-be very short, and many of the suffixes are also particle words that make
-‘natural stopwords’, so a tempting way of running the stemmer is to set a
-minimum stem length of zero, and thereby reduce to null all words which
-are made up entirely of suffix parts. We have been a little more cautious,
-and have insisted that a minimum stem contains one vowel.
-
-
-
-The 32 letters of the Russian alphabet are as follows, with the
-transliterated forms that we will use here shown in brackets:
-
-
-
- а (a)
- | | б (b)
- | | в (v)
- | | г (g)
- | | д (d)
- | | е (e)
- | | ж (zh)
- | | з (z)
-
- |
и (i)
- | | й (ì)
- | | к (k)
- | | л (l)
- | | м (m)
- | | н (n)
- | | о (o)
- | | п (p)
-
- |
р (r)
- | | с (s)
- | | т (t)
- | | у (u)
- | | ф (f)
- | | х (kh)
- | | ц (ts)
- | | ч (ch)
-
- |
ш (sh)
- | | щ (shch)
- | | ъ (")
- | | ы (y)
- | | ь (')
- | | э (è)
- | | ю (iu)
- | | я (ia)
-
- |
-
-
-There is a 33rd letter, ё (e"), but it is rarely used and often
-replaced by е in informal writing. The original algorithm here assumed it
-had already been mapped to е (e); since 2018-03-16 the Snowball
-implementation we provide performs this mapping for you.
-
-
-
-The following are vowels:
-
-
--
- а (a) е (e) и (i) о (o) у (u) ы (y)
- э (è) ю (iu) я (ia)
-
-
-
-In any word, RV is the region after the first vowel, or the end of the word
-if it contains no vowel.
-
-
-
-R1 is the region after the first non-vowel following a vowel, or the end of
-the word if there is no such non-vowel.
-
-
-
-R2 is the region after the first non-vowel following a vowel in R1, or the
-end of the word if there is no such non-vowel.
-
-
-
-For example:
-
-
-
- p r o t i v o e s t e s t v e n n o m
- |<------ RV ------>|
- |<----- R1 ------>|
- |<----- R2 ------>|
-
-
-
-(See note on R1 and R2.)
-
-
-
-We now define the following classes of ending:
-
-
-
-PERFECTIVE GERUND:
-
-
--
-
- group 1: в (v) вши (vshi) вшись (vshis')
-
-
-
- group 2: ив (iv) ивши (ivshi) ившись (ivshis')
- ыв (yv) ывши (yvshi) ывшись (yvshis')
-
-
-
-
-group 1 endings must follow а (a) or я (ia)
-
-
-
-ADJECTIVE:
-
-
--
- ее (ee) ие (ie) ые (ye) ое (oe) ими (imi) ыми
- (ymi) ей (eì) ий (iì) ый (yì) ой (oì) ем
- (em) им (im) ым (ym) ом (om) его (ego) ого (ogo)
- ему (emu) ому (omu) их (ikh) ых (ykh) ую (uiu)
- юю (iuiu) ая (aia) яя (iaia)
- ою (oiu)
- ею (eiu)
-
-
-
-PARTICIPLE:
-
-
--
-
- group 1: ем (em) нн (nn) вш (vsh) ющ (iushch) щ (shch)
-
-
-
- group 2: ивш (ivsh) ывш (yvsh) ующ (uiushch)
-
-
-
-
-group 1 endings must follow а (a) or я (ia)
-
-
-
-REFLEXIVE:
-
-
--
-
- ся (sia) сь (s')
-
-
-
-
-VERB:
-
-
--
-
- group 1: ла (la) на (na) ете (ete) йте (ìte) ли (li)
- й (ì) л (l) ем (em) н (n) ло (lo) но (no) ет
- (et) ют (iut) ны (ny) ть (t') ешь (esh') нно (nno)
-
-
-
- group 2: ила (ila) ыла (yla) ена (ena) ейте (eìte)
- уйте (uìte) ите (ite) или (ili) ыли
- (yli) ей (eì) уй (uì) ил (il) ыл (yl) им (im)
- ым (ym) ен (en) ило (ilo) ыло (ylo) ено (eno) ят
- (iat) ует (uet) уют (uiut) ит (it) ыт (yt) ены
- (eny) ить (it') ыть (yt') ишь (ish')
- ую (uiu) ю (iu)
-
-
-
-
-group 1 endings must follow а (a) or я (ia)
-
-
-
-NOUN:
-
-
--
-
-а (a) ев (ev) ов (ov) ие (ie) ье ('e) е (e) иями
-(iiami) ями (iami) ами (ami) еи (ei) ии (ii) и (i)
-ией (ieì) ей (eì) ой (oì) ий (iì) й (ì)
-иям (iiam) ям (iam) ием (iem) ем (em) ам (am) ом
-(om) о (o) у (u) ах (akh) иях (iiakh) ях (iakh) ы
-(y) ь (') ию (iiu) ью ('iu) ю (iu) ия (iia) ья
-('ia) я (ia)
-
-
-
-
-SUPERLATIVE:
-
-
--
-
- ейш (eìsh) ейше (eìshe)
-
-
-
-
-These are all i-suffixes. The list of d-suffixes is very short,
-
-
-
-DERIVATIONAL:
-
-
--
-
- ост (ost) ость (ost')
-
-
-
-
-Define an ADJECTIVAL ending as an ADJECTIVE ending optionally preceded
-by a PARTICIPLE ending.
-
-
--
- For example, in
-
- бегавшая | | = | | бега | | + | | вш | | + | | ая
- |
(begavshaia | | = | | bega | | + | | vsh | | + | | aia)
- |
- ая (aia) is an adjective ending, and вш (vsh) a participle ending of group 1
- (preceded by the final а (a) of бега (bega)), so вшая (vshaia) is an
- adjectival ending.
-
-
-
-In searching for an ending in a class, always choose the longest one
-from the class.
-
-
--
- So in seaching for a NOUN ending for величие (velichie), choose ие (ie) rather than
- е (e).
-
-
-
-Undouble н (n) means, if the word ends нн (nn), remove the last letter.
-
-
-
-Here now are the stemming rules.
-
-
-
-All tests take place in the RV part of the word.
-
-
--
- So in the test for perfective gerund, the а (a) or я (ia) which the group 1
- endings must follow must itself be in RV. In other words the letters
- before the RV region are never examined in the stemming process.
-
-
-
-Do each of steps 1, 2, 3 and 4.
-
-
-
-Step 1:
-Search for a PERFECTIVE GERUND ending. If one is found remove it, and that
-is then the end of step 1. Otherwise try and remove a REFLEXIVE ending,
-and then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a
-NOUN ending. As soon as one of the endings (1) to (3) is found remove it,
-and terminate step 1.
-
-
-
-Step 2: If the word ends with и (i), remove it.
-
-
-
-Step 3: Search for a DERIVATIONAL ending in R2 (i.e. the entire ending
-must lie in R2), and if one is found, remove it.
-
-
-
-Step 4: (1) Undouble н (n), or, (2) if the word ends with a SUPERLATIVE ending,
-remove it and undouble н (n), or (3) if the word ends ь (') (soft sign) remove it.
-
-
-The same algorithm in Snowball
-
-[% highlight_file('russian') %]
-
-[% footer %]
diff --git a/algorithms/scandinavian.tt b/algorithms/scandinavian.tt
deleted file mode 100644
index 94f6d13..0000000
--- a/algorithms/scandinavian.tt
+++ /dev/null
@@ -1,34 +0,0 @@
-[% header('Scandinavian language stemmers') %]
-
-Links to resources
-
-
-
-
-The stemmers for these three Scandinavian languages are all very simple,
-and quite similar to each other. But between the languages there is a difference
-in which endings can be removed without difficulty, even though the endings
-are very similar. For example, in Norwegian
-the ending ede can be removed safely, but not in Danish.
-
-
-
-To the definite article (the in English, der etc in German) there
-corresponds
-a noun ending in the Scandinavian languages. This ending cannot always be removed
-with certainty. In Swedish, for example, the en form is removed, but not the
-t or n form,
-
-
-
- husen | | | | hus
- |
flickan | | → | | flickan
- |
äpplet | | | | äpplet
-
- |
-
-[% footer %]
diff --git a/algorithms/serbian/stemmer.tt b/algorithms/serbian/stemmer.tt
deleted file mode 100644
index c7298ba..0000000
--- a/algorithms/serbian/stemmer.tt
+++ /dev/null
@@ -1,273 +0,0 @@
-[% header('Serbian stemming algorithm') %]
-
-Links to resources
-
-
-[% algorithm_lis('serbian', 'Serbian') %]
-
-
-[% algorithm_vocab([
-'ambasade',
-'ambasadi',
-'abdominalna',
-'abdominalno',
-'abdominalnih',
-'abdominalnim',
-'abdominalnog',
-'abdominalnoj',
-'abdominalnom',
-'abdominalnu',
-'abeceda',
-'abecede',
-'abecedi',
-'abecedni',
-'abecednih',
-'abecednim',
-'abecedno',
-'abecednog',
-'abecednom',
-'abecedom',
-'abecedu',
-'abecendom',
-'ablendovanje',
-'ablenduje',
-'ablenduju',
-'ablendujući',
-'abnormalan',
-'abnormalije',
-'abnormalijom',
-'abnormalna',
-'abnormalne',
-'abnormalni',
-'abnormalnih',
-'abnormalnim',
-'abnormalno',
-'abnormalnog',
-'abnormalnom',
-'abnormalnost',
-'abnormalnosti',
-'abnormalnostima',
-'abnormalnu',
-], [
-'obustavila',
-'obustavile',
-'obustavili',
-'obustavilo',
-'obustavio',
-'ocenjivala',
-'ocenjivali',
-'ocenjivan',
-'ocenjivana',
-'ocenjivane',
-'ocenjivani',
-'ocenjivano',
-'ocenjivanja',
-'ocenjivanje',
-'ocenjivanju',
-'ocenjivao',
-'ocenjivati',
-'ocenjuje',
-'ocenjujem',
-'ocenjujemo',
-'ocenjuješ',
-'ocenjujete',
-'ocenjuju',
-'ocenjujući',
-'očajan',
-'očajna',
-'očajne',
-'očajni',
-'očajno',
-'padobrana',
-'padobranaca',
-'padobranci',
-'padobrancima',
-'padobranom',
-'padobranskim',
-'padobransku',
-'padobranu',
-'paganska',
-'paganske',
-'paganski',
-'paganskih',
-]) %]
-
-The stemming algorithm
-
-
-The Serbian language is a Slavic language (Indo-European) of the South Slavic
-subgroup. It is highly inflected and uses similar rules for morphological
-derivation and flexion as other Slavic languages, especially ones derived from
-the Serbo-Croatian language used in the former Yugoslavia. Because of this
-highly inflected characteristic a stemmer for Serbian language will have many
-more rules than stemmers for less inflected languages.
-
-
-
-Serbian Stemmer described in this document is based on the Croatian
-Stemmer which is published under the GNU Lesser General Public License.
-Mark Regions, Morphological Changes (Step_1) and Stemming
-(Step_2) routines are based on the Croatian Stemming Algorithm. In
-addition, some of the existing rules for Morphological Changes and Stemming
-(Step_1 and Step_2 among lists) have been modified and new rules have
-been added for the needs of the Serbian Stemmer.
-
-
-
-Latin alphabet in Serbian includes the following letters with diacritics:
-
-
--
- č ć đ š ž
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u
-
-
-
-There is also letter - r - that isn't a vowel but it is sometimes used for syllabification.
-
-
-Main Routines of Serbian Stemming Algorithm are:
-
-
-- Conversion of Cyrillic alphabet to Latin alphabet
-
-The Serbian language uses both Cyrillic and Latin alphabets, but
-these days most people use the Latin alphabet on their PCs, Phones, etc. This
-algorithm is developed mostly for the purposes of the Information Retrieval,
-therefore the first thing it does is to convert Cyrillic letters to
-Latin.
-
-
-
-- Prelude
-
-In Serbian language there are two dialects: Ekavian and
-Ijekavian. For example words:
-
-- senka (Ekavian)
-
- sjenka (Ijekavian)
-
-have the same meaning (Shadow), also words:
-
-- mleko (Ekavian)
-
- mlijeko (Ijekavian)
-
-have the same meaning (Milk) but are spelled differently and because
-mostly used dialect in Serbia is Ekavian the next thing to do is to
-replace Ijekavian dialect with it.
-
-
-
-These days it is also common, although not valid, to use combination of letters
-"d" and "j" instead of a single letter "đ". For example
-people will more often write "Novak Djoković" instead of "Novak
-Đoković" and because this algorithm is developed with Information Retrieval
-in mind they should be treated as the same terms.
-
-
-- Mark Regions
-
-R1 is either:
-
-- a region after the first vowel if there are at least two letters outside
-of it, otherwise it is a region after the first non-vowel following a vowel,
-
- a region after the first "r" if there are at least two letters
-outside of it, otherwise it is a region after the first non-"r"
-following an "r".
-
-
-Note that every suffix which the stemmer can remove contains at least one
-vowel, so in the degenerate case of an input which contains no vowels there
-is nothing to be done. The Snowball implementation of this stemmer sets
-R1 to be a zero length region at the end of the word if the input
-contains no vowels and no "r".
-
-
-In Serbian language there are some words in which "r" letter is used for
-syllabification and in such words vowels can appear at the very end - for
-example word "grmlje".
-
-
-
-So before algorithm decide what will R1 be, it needs to look if and
-where "r" letter occurs and where is the first vowel. If it finds "r"
-that occurred before the first vowel and there is at least one letter between
-them this means that "r" is used for syllabification and R1 is
-2), otherwise R1 is 1).
-
-
-
-For example:
-
-- "tr|go|va|čki" - in this word "tr" is the first syllable
- which means that "r" is used for syllabification and R1 =
- "govački"
-
-
- "tre|ne|rka" - in this word there is a letter "r" before the
- first vowel but there aren't any letters between them which means that
- "r" isn't used for syllabification and R1 = "nerka".
-
-
- "r|ta|njski" - in this word "r" is the first syllable but if
- we use "tanjski" as R1 it won't left enough letters outside
- of it, so we need to shrink it down to a region after the first
- non-"r" following an "r" which is in this case =
- "anjski".
-
-
- "a|vi|on" - similar to the previous case but with a vowel instead
- of an "r".
-
-
-Inside Mark Regions routine there is a test routine that is used to
-check for letters with diacritics and is used later to apply certain rules in
-stemming. Result of this test routine is stored inside no_diacritics flag.
-This test routine is used because people these days tend to use letters without
-diacritics (instead of the proper ones with diacritics) and we need to take
-this into account also.
-
-
-
-- Morphological Changes
-
-Very last thing to do, before any stemming is done, are morphological changes.
-These changes are applied so that we get the same stems for different forms of a
-word.
-
-
-
-For example words:
-
-- "pravilan" (Masculine, Singular)
-
- "pravilna" (Feminine, Singular)
-
- "pravilno" (Neuter, Singular)
-
-should have the same stem. To get that result the algorithm will first change
-word "pravilan" (Masculine, Singular) to "pravilni" (Masculine,
-Plural) and after that the word will be stemmed.
-
-
-
-- Stemming
-
-There are two steps for stemming. The first contains most of the rules and is
-the primary stemming routine and the second one will try to stem the word only
-if the first one failed to do so - whether it was because there were no rules
-that could be applied or the rule overlapped the R1 region. The second
-step contains a few rules that will do proper stemming for most words that
-couldn't be stemmed using the rules from the first step.
-
-
-
-
-The full algorithm in Snowball
-
-[% highlight_file('serbian') %]
-
-[% footer %]
diff --git a/algorithms/spanish/stemmer.tt b/algorithms/spanish/stemmer.tt
deleted file mode 100644
index 2098c3b..0000000
--- a/algorithms/spanish/stemmer.tt
+++ /dev/null
@@ -1,205 +0,0 @@
-[% header('Spanish stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'che', 'torá']) %]
-
-The stemming algorithm
-
-
-Letters in Spanish include the following accented forms,
-
-
--
- á é í ó ú ü ñ
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u á é í ó ú ü
-
-
-
-R2 is defined in the usual way —
-see the note on R1 and R2.
-
-
-
-RV is defined as follows (and this is not the same as the
- French stemmer
-definition):
-
-
-
-If the second letter is a consonant, RV is the region after the next
-following vowel, or if the first two letters are vowels, RV is the region
-after the next consonant, and otherwise (consonant-vowel case) RV is the
-region after the third letter. But RV is the end of the word if these
-positions cannot be found.
-
-
-
-For example,
-
-
-
- m a c h o o l i v a t r a b a j o á u r e o
- |...| |...| |.......| |...|
-
-
-
-Always do steps 0 and 1.
-
-
-
-Step 0: Attached pronoun
-
-
--
- Search for the longest among the following suffixes
-
-
- me se sela selo selas selos la le lo las les los nos
-
- and delete it, if comes after one of
- -
- (a) iéndo ándo ár ér ír
- (b) ando iendo ar er ir
- (c) yendo following u
-
-
- in RV. In the case of (c), yendo must lie in RV, but the preceding
- u can be outside it.
-
-
-
- In the case of (a), deletion is followed by removing the acute accent
- (for example, haciéndola → haciendo).
-
-
-
-
-Step 1: Standard suffix removal
-
-
--
- Search for the longest among the following suffixes, and perform the
- action indicated.
-
- - anza anzas ico ica icos icas ismo ismos able ables ible ibles ista
- istas oso osa osos osas amiento amientos imiento
- imientos
-
- delete if in R2
-
- adora ador ación adoras adores aciones ante antes ancia ancias
-
- delete if in R2
-
- if preceded by ic, delete if in R2
-
- logía logías
-
- replace with log if in R2
-
- ución uciones
-
- replace with u if in R2
-
- encia encias
-
- replace with ente if in R2
-
- amente
-
- delete if in R1
-
- if preceded by iv, delete if in R2 (and if further preceded by at,
- delete if in R2), otherwise,
-
- if preceded by os, ic or ad, delete if in R2
-
- mente
-
- delete if in R2
-
- if preceded by ante, able or ible, delete if in R2
-
- idad idades
-
- delete if in R2
-
- if preceded by abil, ic or iv, delete if in R2
-
- iva ivo ivas ivos
-
- delete if in R2
-
- if preceded by at, delete if in R2
-
-
-
-
-Do step 2a if no ending was removed by step 1.
-
-
-
-Step 2a: Verb suffixes beginning y
-
-
--
- Search for the longest among the following suffixes in RV, and if found,
- delete if preceded by u.
-
-
- ya ye yan yen yeron yendo yo yó yas yes yais
- yamos
-
- (Note that the preceding u need not be in RV.)
-
-
-
-Do Step 2b if step 2a was done, but failed to remove a suffix.
-
-
-
-Step 2b: Other verb suffixes
-
-
--
- Search for the longest among the following suffixes in RV, and perform the
- action indicated.
-
- - en es éis emos
-
- delete, and if preceded by gu delete the u (the gu need not be in
- RV)
-
- arían arías arán arás aríais aría aréis aríamos aremos
- ará aré
- erían erías erán erás eríais ería eréis eríamos eremos
- erá eré
- irían irías irán irás iríais iría iréis iríamos iremos
- irá iré
- aba ada ida ía ara iera ad ed id ase iese aste iste an aban ían
- aran ieran asen iesen aron ieron ado ido ando iendo ió ar er ir as
- abas adas idas ías aras ieras ases ieses ís áis abais íais
- arais ierais aseis ieseis asteis isteis ados idos amos ábamos
- íamos imos áramos iéramos iésemos ásemos
-
- delete
-
-
-
-
-Always do step 3.
-
-
-
-Step 3: residual suffix
-
-
--
- Search for the longest among the following suffixes in RV, and perform the
- action indicated.
-
- - os a o á í ó
-
- delete if in RV
-
- e é
-
- delete if in RV, and if preceded by gu with the u in RV delete the u
-
-
-
-
-And finally:
-
-
--
- Remove acute accents
-
-
-The same algorithm in Snowball
-
-[% highlight_file('spanish') %]
-
-[% footer %]
diff --git a/algorithms/swedish/stemmer.tt b/algorithms/swedish/stemmer.tt
deleted file mode 100644
index aaf8c82..0000000
--- a/algorithms/swedish/stemmer.tt
+++ /dev/null
@@ -1,107 +0,0 @@
-[% header('Swedish stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([40, 'jakt', 'klo']) %]
-
-The stemming algorithm
-
-
-The Swedish alphabet includes the following additional letters,
-
-
--
- ä å ö
-
-
-
-The following letters are vowels:
-
-
--
- a e i o u y ä å ö
-
-
-
-R2 is not used: R1 is defined in the same way as in the
-German stemmer.
-(See the note on R1 and R2.)
-
-
-
-Define a valid s-ending as one of
-
-
--
-b c d f g h j k
-l m n o p r t v
-y
-
-
-
-Do each of steps 1, 2 and 3.
-
-
-
-Step 1:
-
-
--
- Search for the longest among the following suffixes in R1, and
- perform the action indicated.
-
- - (a)
- a arna erna heterna orna ad e ade
- ande arne are aste en anden aren heten
- ern ar er heter or as arnas ernas
- ornas es ades andes ens arens hetens erns
- at andet het ast
-
- delete
-
- (b)
- s
-
- delete if preceded by a valid s-ending
-
- (Of course the letter of the valid s-ending is
- not necessarily in R1)
-
-
-
-Step 2:
-
-
--
- Search for one of the following suffixes in R1, and if found
- delete the last letter.
-
-
- dd gd nn dt gt kt tt
-
- (For example, friskt → frisk, fröknarnn
fröknarn)
-
-
-
-Step 3:
-
-
--
- Search for the longest among the following suffixes in R1, and
- perform the action indicated.
-
- - lig ig els
-
- delete
-
- löst
-
- replace with lös
-
- fullt
-
- replace with full
-
-
-
-The same algorithm in Snowball
-
-[% highlight_file('swedish') %]
-
-[% footer %]
diff --git a/algorithms/turkish/stemmer.tt b/algorithms/turkish/stemmer.tt
deleted file mode 100644
index 3a8c2a4..0000000
--- a/algorithms/turkish/stemmer.tt
+++ /dev/null
@@ -1,47 +0,0 @@
-[% header('Turkish stemming algorithm') %]
-
-Links to resources
-
-
-
-
-The Turkish stemming algorithm was provided by Evren Kapusuz Cilden. It stems
-only noun and nominal verb suffixes because noun stems are more important for
-information retrieval, and only handling these simplifies the algorithm
-significantly.
-
-
-
-In her paper (linked above) Evren explains
-
-
-
-
-The stemmer can be enhanced to stem all kinds of verb suffixes. In Turkish,
-there are over fifty suffixes that can be affixed to verbs [2]. The
-morphological structure of verb suffixes is more complicated than noun
-suffixes. Despite this, one can use the methodology presented in this paper to
-enhance the stemmer to find stems of all kinds of Turkish words.
-
-
-
-where [2] is a reference to the following paper:
-
--
-
-Gulsen Eryigit and Esref Adali.
-An Affix Stripping Morphological Analyzer for Turkish
-Proceedings of the IAESTED International
-Conference
-ARTIFICIAL INTELLIGENCE AND APPLICATIONS, February 16-18,2004, Innsbruck, Austria.
-
-
-
-The algorithm in Snowball
-
-[% highlight_file('turkish') %]
-
-[% footer %]
diff --git a/algorithms/yiddish/stemmer.tt b/algorithms/yiddish/stemmer.tt
deleted file mode 100644
index 136e9b0..0000000
--- a/algorithms/yiddish/stemmer.tt
+++ /dev/null
@@ -1,202 +0,0 @@
-[% header('Yiddish stemming algorithm') %]
-
-Links to resources
-
-
-
-[% algorithm_vocab([
-'אַװעקבלאָנדזשען',
-'אַװעקבלאָנדזשענדיק',
-'אַװעקבלאָנדזשענדיקן',
-'אַװעקבלאָנדזשענדיקס',
-'אַװעקבלאָנדזשענדיקע',
-'אַװעקבלאָנדזשענדיקער',
-'אַװעקגײן',
-'אַװעקגײנדיק',
-'אַװעקגײנדיקן',
-'אַװעקגײנדיקס',
-'אַװעקגײנדיקע',
-'אַװעקגײנדיקער',
-'אַװעקגנבֿענען',
-'אַװעקגנבֿענענדיק',
-'אַװעקגנבֿענענדיקן',
-'אַװעקגנבֿענענדיקס',
-'אַװעקגנבֿענענדיקע',
-'אַװעקגנבֿענענדיקער',
-'אַװעקגעבלאָנדזשעט',
-'אַװעקגעבלאָנדזשעטן',
-'אַװעקגעבלאָנדזשעטס',
-'אַװעקגעבלאָנדזשעטע',
-'אַװעקגעבלאָנדזשעטער',
-'אַװעקגעבן',
-'אַװעקגעבנדיק',
-'אַװעקגעבנדיקן',
-'אַװעקגעבנדיקס',
-'אַװעקגעבנדיקע',
-'אַװעקגעבנדיקער',
-'אַװעקגעגאַנגען',
-'אַװעקגעגאַנגענס',
-'אַװעקגעגאַנגענע',
-'אַװעקגעגאַנגענעם',
-'אַװעקגעגאַנגענער',
-'אַװעקגעגנבֿעט',
-], [
-'אַבֿידות',
-'אַבסטראַקטסטער',
-'אַדורכבײַסנדיקער',
-'אַדורכגעביסן',
-'אַדורכגעשמועסט',
-'אַדורכפֿירנדיק',
-'אַװעקגעגאַנגען',
-'אַװעקגעגאַנגענעם',
-'אַװעקגענומענער',
-'אמתדיק',
-'אמתדיקן',
-'אמתדיקע',
-'אמתדיקער',
-'באַהאַלטן',
-'ביכער',
-'געאַכלט',
-'געאײַלט',
-'געאײַלן',
-'געבאָדענעם',
-'געבאָטענעם',
-'געשדכנטע',
-'עראָפּלאַנען',
-'פֿאַרגאַנגענהײט',
-'פֿאָרױסגעגאַנגענע',
-'קינדהײט',
-'װילן',
-'װילסט',
-]) %]
-
-The stemming algorithm
-
-Groupings
-We setup the following groupings:
-
-
- - Niked
- - All niked used in Yiddish and Hebrew
- - AlefBeys
- -
- All actual letters in the Hebrew alphabet, including:
-
- - The alphabet itself: א ב ג ד ה ו ז ח ט י כ ל מ נ ס ע פ צ ק ר ש ת
- - Final consonants: ך ם ן ף ץ
- - Ligatures: װ ױ ײ
-
-
- - Vowel
- - א ו י ע ױ ײ
- - Consonant
- - AlefBeys - Vowel
-
-
-Pre-processing
-
- - We replace two ו, where the second one is not וּ, with װ.
- - We replace ו י, where the י is not a יִ, with ױ.
- - We replace two י, where the second one is not a יִ, with ײ.
- - We replace final forms (e.g. ץ) with their normal form (e.g. צ).
- - We remove all niked.
-
-
-Marking regions
-
- Only a single marker is used: P1.
- To begin with, this is set at the end of the word.
-
-
-
- - If the word begins with גע (except for געלט and געבן) it is replaced with "GE" and the cursor is advanced.
- -
- Next, if the word begins with any verbal prefix, the cursor is advanced past this prefix.
- Prefixes include (niked added for clarity, not included in algorithm):
-
-
- - Free stressed: אַדורכ, דורכ, אַהינ, אַהער, אַװעק, מיט, אַנטקעגנ, אַקעגנ, אַנידער, אַראָפּ, אַרױס, אַרױפ, אַרומ, אַרײנ, אַרונטער, אַריבער, נאָכ, פאַרבײַ, אַהײמ, אַפיר, פאַרױס, פונאַנדער, צוזאַמענ, צונױפ, צוריק.
- - Stressed: אױס, אױפ, אומ, אונטער, איבער, אײֲנ, אָנ, אָפּ, בײַ, פאָר, צו.
- - Unstressed: אַנט, באַ, דער, צע
-
- - If the verbal prefix is followed by גע (except for געבן), it is replaced with "GE" and the cursor is advanced (e.g. אַװעקגעגאַנגען).
- - If the verbal prefix is followed by צו (except for צוגן, צוקט or צוקן with nothing afterwards), it is replaced with "TSU" and the cursor is advanced (e.g. אַרומצוגײן).
-
-
-We are now at the start of the main portion of the word (past any verbal prefix and past participle marker).
-
-
- - The following valid Yiddish three-consonant sequences are skipped: שפר, שטר, שטש, דזש.
- - If there is a sequence of three consonants, the cursor is advanced past them, and P1 is marked.
- - Otherwise, the cursor is advanced to the first vowel, and then up to the first non-vowel, minus 1, and P1 is marked.
- - If P1 is not at least 3 letters beyond the main portion, it is advanced past the 3rd letter.
-
-
-Backwards mode
-
-Unless otherwise stated, all deletes ensure we are beyond P1.
-In each pass, at the first level of bullets, the longest matching suffix always wins.
-
-First pass:
-
- - Delete plural/adjective endings: ער, ערס, ן, ס, ען, נס, ענערס, ענס, עס.
-
- - Exceptions: יע is not deleted (e.g. אגיטאַציע), יעס becomes יע
-
- - Delete plural/adjective endings: ענעם, ענער, ענע, ענס
-
- - If preceded by an irregular past participle ending in ן, replace it with the stem, e.g. געגאַנגען becomes גײ.
-
- - Delete the verb/past participle ending: ט.
-
- - Because of the above, also delete noun/adjectives ending in טן, טע, טער, טס so that they stem identically to the equivalent noun with the ט.
- - Similarly for past participles: -tns, -tene, -tenem, -tener טנס, טענע, טענעם, טענער
- - If the ט was before P1, we try to perform the same action while leaving the ט in place
- - At the same time, if preceded by an irregular past participles ending in ט, replace it with the stem, e.g. געבראַכט becomes ברענג.
-
- - Delete the past participle endings: עט, עטן, עטס, עטע, עטער
-
- Anything ending with גײן is transformed to גײ.
- - Anything ending in an irregular past participle is corrected to the stem.
- - Delete noun endings: ונג, הײט, קײט, יקײט, שאַפֿט.
- - Delete noun endings: יזם, יזמס.
- - Delete Hebraic plural ending: ים
- - Replace the Hebraic plural ending ות with ה.
- - Delete the diminutive endings: עלעך, עלע, לעך, עלעס, עלען.
- - Delete the noun endings: יסט, יסטן.
-
- - Exceptions to the above: words ending in גיסט or שיסט.
-
- - Delete the verb ending סטו.
- - Delete the superlative endings: סטער, סטע, סטן.
- - Delete the verb ending: סט.
-
-
-Second pass - after the first pass, do the following to the remaining stem:
-
- - Delete noun endings: ונג, הײט, קײט, יקײט, שאַפֿט.
- - Delete the diminutive ending ל if it follows a consonant.
-
-
-Third pass - after the second pass, do the following to the remaining stem:
-
- - Delete the adjective endings יק, יג, ניק, דיק, יש, for words like אָפהענגיקײט.
-
- - Exceptions to the above: words ending in גליק or בליק.
-
- - Delete the present participle ending: נדיק
- -
- Delete the present participle ending ענדיק if it follows נג, נק, נ, מ, a consonant and ל or a vowel.
- Otherwise, delete just the נדיק portion.
-
-
-
-Finally, all remaining GE and TSU are deleted.
-
-The same algorithm in Snowball
-
-[% highlight_file('yiddish') %]
-
-[% footer %]
diff --git a/codesets/guide.tt b/codesets/guide.tt
deleted file mode 100644
index eb02f9f..0000000
--- a/codesets/guide.tt
+++ /dev/null
@@ -1,93 +0,0 @@
-[% header('Character codes') %]
-
-
-Snowball (since version 2.0) supports specifying non-ASCII characters using
-the standard Unicode notation U+XXXX
where XXXX is a string of
-hex digits. However, this doesn't make for very readable source code, so the
-Snowball scripts on this site define more mnemonic representations of the
-non-ASCII characters which they use - for example, the German stemmer includes
-the lines
-
-
-[% highlight('
- /* special characters */
-
- stringdef a" ' _ "'{U+00E4}'" _ '
- stringdef o" ' _ "'{U+00F6}'" _ '
- stringdef u" ' _ "'{U+00FC}'
- stringdef ss '{U+00DF}'
-") %]
-
-
-(In Unicode, hex values E4, F6, FC and DF are the numeric values
-of characters ä, ö, ü and ß respectively.)
-
-
-
-Then the code which follows uses [% highlight_inline("'{a" _ '"' _ "}'") %] when it wants
-ä, etc.
-
-
-
-Using literal Unicode character in strings in the source file may work in some
-cases, but isn't really supported - the snowball compiler doesn't (currently at
-least) have the concept of "source character set", so at best you'll limit
-which programming languages your stemmer can be used with.
-
-
-
-If you wish to describe other Latin-alphabet based codesets for use in stemmers
-we recommend using the following conventions:
-
-
-
- accent | | ASCII form | | example
- |
acute | | single quote | | e' for é
- |
grave | | grave | | a` for à
- |
umlaut | | double quote | | u" for ü
- |
circumflex | | circumflex | | i^ for î
- |
cedilla | | letter c | | cc for ç
- |
tilde | | tilde | | n~ for ñ
- |
ring | | letter o | | ao for å
- |
line through | | solidus | | o/ for ø
-
- |
breve | | plus | | a+ for ă
- |
double acute | | letter q | | oq for ő
- |
comma below | | , | | t, for ț
- |
-
-
-And, should they ever arise, use r
for left and right
-hook (as in Polish), and v
for hacek (as in Czech).
-
-
-
-The ‘line-through’ accent covers a numbers of miscellaneous cases: the
-Scandinavian o/
, Icelandic d/
and Polish l/
.
-
-
-
-Use ae
and ss
for æ ligature and the German
-ß, with
-upper case forms AE
and SS
. Use th
for Icelandic thorn.
-
-
-
-We used to recommend ,
for cedilla, but we need a way to
-represent comma-below for Romanian, so we've repurposed ,
-for that and now recommend c
for cedilla instead.
-
-
-
-If you're writing a new stemmer, see below for a file of suitable
-stringdef
lines you can cut and paste into your code.
-
-
-Links
-
-
-
-[% footer %]
diff --git a/compiler/snowman.tt b/compiler/snowman.tt
deleted file mode 100644
index 442af83..0000000
--- a/compiler/snowman.tt
+++ /dev/null
@@ -1,1558 +0,0 @@
-[% header('Snowball Manual') %]
-
-Links to resources
-
-
-
-Snowball definition
-
-
-Snowball is a small string-handling language, and its name was chosen as a
-tribute to SNOBOL (Farber 1964, Griswold 1968 —
-see the references at the end of the
-introduction),
-with which it shares the
-concept of string patterns delivering signals that are used to control the
-flow of the program.
-
-
-1 Data types
-
-
-The basic data types handled by Snowball are strings of characters, signed
-integers, and boolean truth values, or more simply strings, integers
-and booleans. Snowball supports Unicode characters, which may be represented
-as UTF-8, 8-bit characters, or 16-bit wide characters (depending on the
-programming language code is being generated for - for C, all these options are
-supported).
-
-
-2 Names
-
-
-A name in Snowball starts with an ASCII letter, followed by zero or more ASCII
-letters, digits and underscores. A name can be of type string,
-integer, boolean, routine, external or
-grouping. All names must be declared. A declaration has the form
-
-
-
- Ts ( ... )
-
-
-
-where symbol T
is one of string
, integer
etc, and the region in
-brackets contains a list of names separated by whitespace. For example,
-
-
-[% highlight("
- integers ( p1 p2 )
- booleans ( Y_found )
-
- routines (
- shortv
- R1 R2
- Step_1a Step_1b Step_1c Step_2 Step_3 Step_4 Step_5a Step_5b
- )
-
- externals ( stem )
-
- groupings ( v v_WXY v_LSZ )
-") %]
-
-
-p1
and p2
are integers, Y_found
is boolean, and so on. Snowball is quite
-strict about the declarations, so all the names go in the same name space,
-no name may be declared twice, all used names must be declared, no two
-routine definitions can have the same name, etc. Names declared and
-subsequently not used are merely reported in a warning message.
-
-
-A name may not be one of the reserved words of Snowball. Additionally, names
-for externals must be valid function/method names in the language being
-generated in most cases, which generally means they can't be reserved words
-in that language (e.g. [% highlight_inline("externals (null)") %] will generate
-invalid Java code containing a method public boolean null()
.)
-For internal symbols we add a prefix to avoid this issue, but an external
-has to provide an external interface. When generating C code, the
--eprefix
option provides a potential solution to this problem.
-
-
-
-Names in Snowball are case-sensitive, but external names which differ only in
-case will cause a problem for languages with case-insensitive identifiers (such
-as Pascal). This issue is avoided for internal symbols in such languages by
-encoding case difference via an added prefix.
-
-
-
-So for portability a little care is needed when choosing names for externals.
-The convention when using Snowball to implement stemming algorithms is to have
-a single external named stem
, which should be safe.
-
-
-3 Literals
-
-3.1 Integer Literals
-
-
-A literal integer is an ASCII digit sequence, and is always interpreted as
-decimal.
-
-
-3.2 String Literals
-
-
-A literal string is written between single quotes, for example,
-
-
-[% highlight("
- 'aeiouy'
-") %]
-
-
-Two special insert characters for use in literal strings are defined by
-the directive [% highlight_inline("stringescapes AB") %], for example,
-
-
-[% highlight("
- stringescapes {}
-") %]
-
-
-Conventionally {
and }
are used as the insert
-characters, and we would recommend following this convention unless you want to
-use these as literal characters in your strings a lot. However,
- A
and B
can be any printing
-characters, except that A
can't be a single quote.
-(If A
and B
are the same then
- A
itself can never be escaped.)
-
-
-
-A subsequent occurrence of the stringescapes
directive redefines
-the insert characters (but any string macros already defined with
-stringdef
remain defined).
-
-
-
-Within insert characters, the following sequences are understood:
-
-
-
-
-User-defined string macros which can be specified using
-stringdef
. Macro m
is defined in the
-form stringdef m 'S'
, where 'S'
is a
-string, and m
a sequence of one or more printing
-characters. Thereafter, {m}
inside a string causes
- S
to be substituted in place of m
.
-
-
-
-New in Snowball 2.0: Unicode codepoints can be specified using the syntax
-U+
followed by one or more hex digits - for example,
-[% highlight_inline("'{U+FFFD}'") %]. These are automatically handled
-appropriately in all cases except if you want to generate C code to handle a
-single byte character set other than ISO-8859-1. Such cases are handled by
-defining string macros for the U+
codes in the character set,
-after which the same Snowball source can be used. You can't mix use of
-U+
codes defined as string macros and with their default
-meanings in the same compilation. When U+
codes are defined
-as string macros, snowball will upper case the characters after the
-+
if there's no macro defined with the case as given.
-
-
-
-By default {'}
will substitute '
and
-{{}
will substitute {
, although macros '
and {
may subsequently be
-redefined.
-
-
-
-A further feature is that {W}
inside
-a string, where W
is a
-sequence of whitespace characters including one or more newlines, is
-ignored. This enables long strings to be written over a number of lines.
-
-
-
-
-For example,
-
-
-[% highlight("
- stringescapes {}
-
- /* Spanish diacritics */
-
- stringdef a' '{U+00E1}' // a-acute
- stringdef e' '{U+00E9}' // e-acute
- stringdef i' '{U+00ED}' // i-acute
- stringdef o' '{U+00F3}' // o-acute
- stringdef u' '{U+00FA}' // u-acute
- stringdef u\" '{U+00FC}' // u-diaeresis
- stringdef n~ '{U+00F1}' // n-tilde
-
- /* All the characters in Spanish used to represent vowels */
-
- define v 'aeiou{a'}{e'}{i'}{o'}{u'}{u\"}'
-") %]
-
-4 Routines
-
-
-A routine definition has the form
-
-
-[% highlight("
- define R as C
-") %]
-
-
-where R
is the routine name and C
is a command, or bracketed group of
-commands. So a routine is defined as a sequence of zero or more commands.
-Snowball routines do not (at present) take parameters. For example,
-
-
-[% highlight("
- define Step_5b as ( // this defines Step_5b
- ['l'] // three commands here: [, 'l' and ]
- R2 'l' // two commands, R2 and 'l'
- delete // delete is one command
- )
-" _ '
- define R1 as $p1 <= cursor
- /* R1 is defined as the single command "$p1 <= cursor" */
-') %]
-
-
-A routine is called simply by using its name, R
, as a command.
-
-
-5 Commands and signals
-
-
-The flow of control in Snowball is arranged by the implicit use of
-signals, rather than the explicit use of constructs like the if
,
-else
, break
of C. The scheme is designed for handling strings, but is
-perhaps easier to introduce using integers. Suppose x
, y
, z
... are
-integers. The command
-
-
-[% highlight('
- $x = 1
-') %]
-
-
-sets x
to 1. The command
-
-
-[% highlight('
- $x > 0
-') %]
-
-
-tests if x
is greater than zero. Both commands give a signal t or f,
-(true or false), but while the second command gives t if x
is greater
-than zero and f otherwise, the first command always gives t. In Snowball,
-every command gives a t or f signal. A sequence of commands can be turned
-into a single command by putting them in a list surrounded by round
-brackets:
-
-
-
- ( C1 C2 C3 ... Ci Ci+1 ... )
-
-
-
-When this is obeyed, Ci+1
will be obeyed if each of the preceding C1
...
-Ci
give t, but as soon as a Ci
gives f, the subsequent Ci+1 Ci+2
...
-are ignored, and the whole sequence gives signal f. If all the Ci
give t,
-however, the bracketed command sequence also gives t. So,
-
-
-[% highlight('
- $x > 0 $y = 1
-') %]
-
-
-sets y
to 1 if x
is greater than zero. If x
is less than or equal to zero
-the two commands give f.
-
-
-
-If C1
and C2
are commands, we can build up the larger commands,
-
-
-
- C1 or C2
- - — Do
C1
. If it gives t ignore C2
, otherwise do C2
. The resulting
- signal is t if and only C1
or C2
gave t.
- C1 and C2
- - — Do
C1
. If it gives f ignore C2
, otherwise do C2
. The resulting
- signal is t if and only C1
and C2
gave t.
- not C
- - — Do
C
. The resulting signal is t if C
gave f, otherwise f.
- try C
- - — Do
C
. The resulting signal is t whatever the signal of C
.
- fail C
- - — Do
C
. The resulting signal is f whatever the signal of C
.
-
-
-
-So for example,
-
-
-
- - [% highlight_inline('($x > 0 $y = 1) or ($y = 0)') %]
-
- — sets
y
to 1 if x
is greater than zero, otherwise to zero.
-
- - [% highlight_inline('try( ($x > 0) and ($z > 0) $y = 1)') %]
-
- — sets
y
to 1 if both x
and z
are greater than 0, and gives t.
-
-
-
-This last example is the same as
-
-
-[% highlight('
- try($x > 0 $z > 0 $y = 1)
-') %]
-
-
-so that and
seems unnecessary here. But we will see that and
has a
-particular significance in string commands.
-
-
-
-When a ‘monadic’ construct like not
, try
or fail
is not followed by a
-round bracket, the construct applies to the shortest following valid command.
-So for example
-
-
-[% highlight('
- try not $x < 1 $z > 0
-') %]
-
-
-would mean
-
-
-[% highlight('
- try ( not ( $x < 1 ) ) $z > 0
-') %]
-
-
-because [% highlight_inline('$x < 1') %] is the shortest valid command following not
, and then
-not $x < 1
is the shortest valid command following try
.
-
-
-
-The ‘dyadic’ constructs like and
and or
must sit in a bracketed list
-of commands anyway, for example,
-
-
-
- ( C1 C2 and C3 C4 or C5 )
-
-
-
-And then in this case C2
and C3
are connected by the and
; C4
and C5
are
-connected by the or
. So
-
-
-[% highlight('
- $x > 0 not $y > 0 or not $z > 0 $t > 0
-') %]
-
-
-means
-
-
-[% highlight('
- $x > 0 ((not ($y > 0)) or (not ($z > 0))) $t > 0
-') %]
-
-
-and
and or
are equally binding, and bind from left to right,
-so C1 or C2 and C3
means (C1 or C2) and C3
etc.
-
-
-6 Integer commands
-
-
-There are two sorts of integer commands - assignments and comparisons. Both
-are built from Arithmetic Expressions (AEs).
-
-
-Arithmetic Expressions (AEs)
-
-
-An AE consists of integer names, literal numbers and a few other things
-connected by dyadic +
, -
, *
and /
, and monadic -
, with the same
-binding powers and semantics as C. As well as integer names and literal
-numbers, the following may be used in AEs:
-
-
-
-minint | | — the minimum negative number
- |
maxint | | — the maximum positive number
- |
cursor | | — the current value of the string cursor
- |
limit | | — the current value of the string limit
- |
size | | — the size of the string, in "slots"
- |
sizeof s | | — the number of "slots" in s , where s is the name of a string or (since Snowball 2.1) a literal string
- |
New in Snowball 2.0: |
-len | | — the length of the string, in Unicode characters
- |
lenof s | | — the number of Unicode characters in s , where s is the name of a string or (since Snowball 2.1) a literal string
- |
-
-
-[% highlight_inline('size') %] and [% highlight_inline('sizeof') %] count in
-"slots" - see the "Character representation" section below for details.
-
-
-
-The cursor and limit concepts are explained below.
-
-
-Integer assignments
-
-
-An integer assignment has the form
-
-
-
- $X assign_op AE
-
-
-
-where X
is an integer name and assign_op is one of the five assignments
- =
, +=
, -=
, *=
, or /=
.
-The meanings are the same as in C.
-
-
-
-For example,
-
-
-[% highlight('
- $p1 = limit // set p1 to the string limit
-') %]
-
-
-Integer assignments always give the signal t.
-
-
-Integer comparisons
-
-
-An integer comparison has the form
-
-
-
- $X rel_op AE
-
-
-
-or (since Snowball 2.0):
-
-
-
- $(AE1 rel_op AE2)
-
-
-
-where X
is an integer name and rel_op is one of the six tests
- ==
, !=
, >=
,
- >
, <=
, or <
.
-Again, the meanings are the same as in C.
-
-
-
-Examples of integer comparisons are,
-
-
-[% highlight('
- $p1 <= cursor // signal is f if the cursor is before position p1
- $(len >= 3) // signal is f unless the string is at least 3 characters long
-') %]
-
-
-The second form is more general since an integer name is a valid AE, but it
-also allows comparisons which don't involve integer variables. Before support
-for this was added the second example could only be achieved by assigning
-len
to a variable and then testing that variable instead.
-
-
-7 String commands
-
-
-If s
is a string name, a string command has the form
-
-
-[% highlight('
- $s C
-') %]
-
-
-where C
is a command that operate on the string. Strings can be processed
-left-to-right or right-to-left, but we will describe only the
-left-to-right case for now. The string has a cursor, which we will
-denote by c, and a limit point, or limit, which we will denote by l. c
-advances towards l in the course of a string command, but the various
-constructs and
, or
, not
etc have side-effects which keep moving it
-backwards. Initially c is at the start and l the end of the string. For
-example,
-
-
-
- 'a|n|i|m|a|d|v|e|r|s|i|o|n'
- | |
- c l
-
-
-
-c, and l, mark the boundaries between characters, and not
-characters themselves. The characters between c and l will be denoted by
-c:l.
-
-
-
-If C
gives t, the cursor c will have a new, well-defined value. But if C
-gives f, c is undefined. Its later value will in fact be determined by the
-outer context of commands in which C
came to be obeyed, not by C
itself.
-
-
-
-Here is a list of the commands that can be used to operate on strings.
-
-
-a) Setting a value
-
-
-= S
- - where
S
is the name of a string or a literal string. c:l is set equal
- to S
, and l is adjusted to point to the end of the copied string. The
- signal is t. For example,
-
-[% highlight('
- $x ' _ " = 'animadversion' /* literal string */" _ '
- $y = x /* string name */
-') %]
-
-
-
-b) Basic tests
-
-
-S
- - here and below,
S
is the name of a string or a literal string. If c:l
- begins with the substring S
, c is repositioned to the end of this
- substring, and the signal is t. Otherwise the signal is f. For example,
-
-[% highlight('
- $x ' _ "'anim' /* gives t, assuming the string is 'animadversion' */" _ '
- $x ' _ "('anim' 'ad' 'vers')" _ '
- /* ditto */
-
- $t ' _ "= 'anim'" _ '
- $x t /* ditto */
-') %]
-
- true
, false
- true
is a dummy command that generates signal t. false
generates
- signal f. They are sometimes useful for emphasis,
-
-[% highlight("
- define start_off as true // nothing to do
- define exception_list as false // put in among(...) list later
-") %]
-
- true
is equivalent to ()
-C1 or C2
- - This is like the case for integers described above, but the extra
- touch is that if
C1
gives f, c is set back to its old position after
- C1
has given f and before C2
is tried, so that the test takes place on
- the same point in the string. So we have
-
-[% highlight('
- $x ' _ "('anim' /* signal t */
- 'ation' /* signal f */
- ) or
- ( 'an' /* signal t - from the beginning */
- )
-") %]
-
- C1 and C2
- - And similarly c is set back to its old position after
C1
has given t
- and before C2
is tried. So,
-
-[% highlight('
- $x ' _ "'anim' and 'an' /* signal t */" _ '
- $x ' _ "('anim' 'an') /* signal f, since 'an' and 'ad' mis-match */
-") %]
-
- not C
-try C
- - These are like the integer tests, with the added feature that c is set
- back to its old position after an f signal is turned into t. So,
-
-[% highlight('
- $x ' _ "(not 'animation' not 'immersion')
- /* both tests are done at the start of the string */
-" _ '
- $x ' _ "(try 'animus' try 'an'
- 'imad')
- /* - gives t */
-") %]
-
-
- try C | | is equivalent to | | C or true
- |
- test C
- - This does command
C
but without advancing c. Its signal is the same as
- the signal of C
, but following signal t, c is set back to its old
- value.
-
- test C | | is equivalent to | | not not C
- |
test C1 C2 | | is equivalent to | | C1 and C2
- |
- fail C
- - This does
C
and gives signal f. It is equivalent to C false
. Like
- false
it is useful, but only rarely.
-
- do C
- - This does
C
, puts c back to its old value and gives signal t. It is
- very useful as a way of suppressing the side effect of f signals and
- cursor movement.
-
- do C | | is equivalent to | | try test C
- |
| | or | | test try C
- |
- goto C
- - c is moved right until obeying
C
gives t. But if c cannot be moved
- right because it is at l the signal is f. c is set back to the position
- it had before the last obeying of C
, so the effect is to leave c before
- the pattern which matched against C
.
-
-[% highlight('
- $x goto' _ " 'ad' /* positions c after 'anim' */" _ '
- $x goto' _ " 'ax' /* signal f */
-") %]
-
- gopast C
- - Like goto, but c is not set back, so the effect is to leave c after
- the pattern which matched against
C
.
-
-[% highlight('
- $x gopast' _ " 'ad' /* positions c after 'animad' */
-") %]
-
- repeat C
- C
is repeated until it gives f. When this happens c is set back to the
- position it had before the last repetition of C
, and repeat C
gives
- signal t. For example,
-
-[% highlight('
- $x repeat gopast' _ " 'a' /* position c after the last 'a' */
-") %]
-
-loop AE C
- - This is like
C C ... C
written out AE times, where AE is an arithmetic
- expression. For example,
-
-[% highlight('
- $x loop 2 gopast' _ " ('a' or 'e' or 'i' or 'o' or 'u')
- /* position c after the second vowel */
-") %]
-
- The equivalent expression in C has the shape,
-
-[% highlight("
- { int i;
- int limit = AE;
- for (i = 0; i < limit; i++) C;
- }
-", "c") %]
-
- atleast AE C
- - This is equivalent to
loop AE C repeat C
.
-
- hop AE
- - moves c AE character positions towards l, but if AE is negative, or if
- there are less than AE characters between c and l the signal is f.
- For example,
-
-[% highlight("
- test hop 3
-") %]
-
- tests that c:l contains more than 2 characters.
-
-
next
- - is equivalent to
hop 1
.
-
-
-c) Moving text about
-
-
-We have seen in (a) that $x = y
, when x
and y
are strings, sets c:l of x
-to the value of y
. Conversely
-
-
-[% highlight('
- $x => y
-') %]
-
-
-sets the value of y
to the c:l region of x
.
-
-
-
-A more delicate mechanism for pushing text around is to define a substring,
-or slice of the string being tested. Then
-
-
-
-- [% highlight_inline('[') %]
-
- sets the left-end of the slice to c,
-
- [% highlight_inline(']') %]
-
- sets the right-end of the slice to c,
-
- [% highlight_inline("-> s") %]
-
- moves the slice to variable
s
,
- - [% highlight_inline("<- S") %]
-
- replaces the slice with variable (or literal)
S
.
-
-
-
-For example
-
-
-[% highlight("
- /* assume x holds 'animadversion' */" _ '
- $x ( [ ' _ " // '[animadversion' - [ set as indicated
- loop 2 gopast 'a'
- // '[anima|dversion' - c is marked by '|'
- ] // '[anima]dversion' - ] set as indicated
- -> y // y is 'anima'
- )
-") %]
-
-
-For any string, the slice ends should be assumed to be unset until they are
-set with the two commands [
, ]
. Thereafter the slice ends will retain
-the same values until altered.
-
-
-
-- [% highlight_inline("delete") %]
-
- is equivalent to [% highlight_inline("<- ''") %]
-
-
-
-This next example deletes all vowels in x,
-
-
-[% highlight("
- define vowel ('a' or 'e' or 'i' or 'o' or 'u')
- /* ... */" _ '
- $ x repeat ( gopast([vowel]) delete )
-') %]
-
-
-As this example shows, the slice markers [
and ]
often appear as
-pairs in a bracketed style, which makes for easy reading of the Snowball
-scripts. But it must be remembered that, unusually in a computer
-programming language, they are not true brackets.
-
-
-
-More simply, text can be inserted at c.
-
-
-
-- [% highlight_inline("insert S") %]
-
- insert variable or literal
S
before c, moving c to the right of the
- insert. <+
is a synonym for insert
.
-
- - [% highlight_inline("attach S") %]
-
- the same, but leave c at the left of the insert.
-
-
-d) Marks
-
-
-The cursor, c, (and the limit, l) can be thought of as having a numeric
-value, from zero upwards:
-
-
-
- | a | n | i | m | a | d | v | e | r | s | i | o | n |
- 0 1 2 3 4 5 6 7 8 9 10 11 12 13
-
-
-
-It is these numeric values of c and l which are accessible through
-cursor
and limit
in arithmetic expressions.
-
-
-
-- [% highlight_inline("setmark X") %]
-
- sets
X
to the current value of c, where X
is an integer variable.
- It's equivalent to: [% highlight_inline("$X = cursor") %]
-
- - [% highlight_inline("tomark AE") %]
-
- moves c forward to the position given by AE,
-
-
- [% highlight_inline("atmark AE") %]
-
- tests if c is at position AE (t or f signal).
- It's equivalent to: [% highlight_inline("$(cursor == AE)") %]
-
-
-
-In the case of [% highlight_inline("tomark AE") %], a similar fail condition occurs as with [% highlight_inline("hop AE") %].
-If c is already beyond AE, or if position l is before position AE, the
-signal is f.
-
-
-
-In the stemming algorithms, certain regions of the word are defined by
-setting marks, and later the failure condition of [% highlight_inline("tomark") %] is used to see if
-c is inside a particular region.
-
-
-
-Two other commands put c at l, and test if c is at l,
-
-
-
-- [% highlight_inline("tolimit") %]
-
- moves c forward to l (signal t always),
-
-
- [% highlight_inline("atlimit") %]
-
- tests if c is at l (t or f signal).
-
-
-e) Changing l
-
-
-In this account of string commands we see c moving right towards l, while
-l stays fixed at the end. In fact l can be reset to a new position between
-c and its old position, to act as a shorter barrier for the movement of c.
-
-
-
-setlimit C1 for C2
- C1
is obeyed, and if it gives f the signal from setlimit
- is f with no further action.
-
-
-
- Otherwise, the final value of c becomes the new
- position of l. c is then set back to its old value before C1
was
- obeyed, and C2
is obeyed. Finally l is set back to its old position,
- and the signal of C2
becomes the signal of setlimit
.
-
-
-
- So the signal is f if either C1
or C2
gives f, otherwise t.
- For example,
-
-
-[% highlight('
- $x ( setlimit goto' _ " 's' // 'animadver}sion' new l as marked '}'
- for // below, '|' marks c after each goto
- ( goto 'a' and // '|animadver}sion'
- goto 'e' and // 'animadv|er}sion'
- goto 'i' and // 'an|imadver}sion'
- )
- )
-") %]
-
-
- This checks that x has characters ‘a’, ‘e’ and ‘i’ before the first
- ‘s’.
-
-
-
-
-f) Backward processing
-
-
-String commands have been described with c to the left of l and moving
-right. But the process can be reversed.
-
-
-
-- [% highlight_inline("backwards C") %]
-
- c and l are swapped over, and c moves left towards l.
C
is obeyed, the
- signal given by C
becomes the signal of backwards C
, and c and l are
- swapped back to their old values (except that l may have been adjusted
- because of deletions and insertions). C
cannot contain another
- [% highlight_inline("backwards") %] command.
-
- - [% highlight_inline("reverse C") %]
-
- A similar idea, but here c simply moves left instead of moving right,
- with the beginning of the string as the limit, l.
C
can contain other
- [% highlight_inline("reverse") %] commands, but it cannot contain commands to do deletions or
- insertions — it must be used for testing only. (Without this
- restriction Snowball's semantics would become very untidy.)
-
-
-
-Forward and backward processing are entirely symmetric, except that forward
-processing is the default direction, and literal strings are always
-written out forwards, even when they are being tested backwards. So the
-following are equivalent,
-
-
-[% highlight('
- $x (' _ "
- 'ani' 'mad' 'version' atlimit
- )
-" _ '
- $x backwards (' _ "
- 'version' 'mad' 'ani' atlimit
- )
-") %]
-
-
-If a routine is defined for backwards mode processing, it must be included
-inside a backwardmode(...)
declaration.
-
-
-g) substring and among
-
-
-The use of [% highlight_inline("substring") %] and [% highlight_inline("among") %] is central to the implementation of the
-stemming algorithms. It is like a case switch on strings. In its simpler
-form,
-
-
-
- substring among('S1' 'S2' 'S3' ...)
-
-
-
-searches for the longest matching substring 'S1'
or 'S2'
or 'S3'
... from
-position c. (The 'Si'
must all be different.) So this has the same
-semantics as
-
-
-
- ('S1' or 'S2' or 'S3' ...)
-
-
-
-— so long as the 'Si'
are written out in decreasing order of length.
-
-
-
-substring
may be omitted, in which case it is attached to its following
-among
, so
-
-
-[% highlight("
- among(/*...*/)
-") %]
-
-
-without a preceding [% highlight_inline("substring") %] is equivalent to
-
-
-[% highlight("
- (substring among(/*...*/))
-") %]
-
-
-[% highlight_inline("substring") %] may also be detached from its [% highlight_inline("among") %], although it must
-precede it textually in the same routine in which the [% highlight_inline("among") %] appears.
-The more general form of [% highlight_inline("substring /* ... */ among") %] is,
-
-
-
- substring
- C
- among( 'S11' 'S12' ... (C1)
- 'S21' 'S22' ... (C2)
- ...
-
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-Obeying substring
searches for a longest match among the 'Sij'
. The
-signal from substring
is t if a match is found, otherwise f.
-Any commands C
between the substring
and among
will be run after this
-search and only if the search finds a match (it would be equivalent to remove C
and replace each
-Ci
with C Ci
). When the
-among
comes to be obeyed, the Ci
corresponding to the matched 'Sij'
is
-obeyed, and its signal becomes the signal of the among
command.
-
-
-
-substring/among
pairs must match up textually inside each routine
-definition. But there is no problem with an among
containing other
-substring/among
pairs, and substring
is optional before among
anyway.
-The essential constraint is that two substring
s must be separated by an
-among
, and each substring
must be followed by an among
.
-
-
-
-The effect of obeying among
when the preceding substring
is not obeyed
-is undefined. This would happen for example here,
-
-
-[% highlight('
- try($x != 617 substring)' _ "
- among(...) // 'substring' is bypassed in the exceptional case where x == 617
-") %]
-
-
-The significance of separating the substring
from the among
is to allow
-them to work in different contexts. For example,
-
-
-
- setlimit tomark L for substring
-
- among( 'S11' 'S12' ... (C1)
- ...
-
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-Here the test for the longest 'Sij'
is constrained to the region between c
-and the mark point given by integer L
. But the commands Ci
operate outside
-this limit. Another example is
-
-
-
- reverse substring
-
- among( 'S11' 'S12' ... (C1)
- ...
-
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-The substring test is in the opposite direction in the string to the
-direction of the commands Ci
.
-
-
-
-The last (Cn)
may be omitted, in which case (true)
is assumed.
-
-
-
-Each string 'Sij'
may be optionally followed by a
-routine name,
-
-
-
- among(
- 'S11' R11 'S12' R12 ... (C1)
- 'S21' R21 'S22' R22 ... (C2)
- ...
- 'Sn1' Rn1 'Sn2' Rn1 ... (Cn)
- )
-
-
-
-If a routine name is not specified, it is equivalent
-to a routine which simply returns signal t,
-
-
-[% highlight("
- define null as true
-") %]
-
-
-— so we can imagine each 'Sij'
having its associated routine
-Rij
. Then obeying the among
causes a search for the longest
-'Sij'
whose corresponding routine
-Rij
gives t.
-
-
-
-The routines Rij
should be written without any
-side-effects, other than the inevitable cursor movement. (c is in
-any case set back to its old value following a call of
-Rij
.)
-
-
-8 Booleans
-
-
-[% highlight_inline("set B") %] and [% highlight_inline("unset B") %] set B
to true and false respectively, where B
is a
-boolean name. [% highlight_inline("B") %] as a command gives a signal t if it is set true, f
-otherwise. For example,
-
-
-[% highlight("
- booleans ( Y_found ) // declare the boolean
-
- /* ... */
-
- unset Y_found // unset it
- do ( ['y'] <-'Y' set Y_found )
- /* if c:l begins 'y' replace it by 'Y' and set Y_found */
-
- do repeat(goto (v ['y']) <-'Y' set Y_found)
- /* repeatedly move down the string looking for v 'y' and
- replacing 'y' with 'Y'. Whenever the replacement takes
- place set Y_found. v is a test for a vowel, defined as
- a grouping (see below). */
-
-
- /* Y_found means there are some letters Y in the string.
- Later we can use this to trigger a conversion back to
- lower case y. */
-
- /* ... */
-
- do (Y_found repeat(goto (['Y']) <- 'y')
-") %]
-
-9 Groupings
-
-
-A grouping brings characters together and enables them to be looked for
-with a single test.
-
-
-
-If G
is declared as a grouping, it can be defined by
-
-
-
- define G G1 op G2 op G3 ...
-
-
-
-where op is +
or -
, and G1
, G2
, G3
are literal strings, or groupings that
-have already been defined. (There can be zero or more of these additional
-op components). For example,
-
-
-[% highlight("
- define capital_letter 'ABDEFGHIJKLMNOPQRSTUVWXYZ'
- define small_letter 'abdefghijklmnopqrstuvwxyz'
- define letter capital_letter + small_letter
- define vowel 'aeiou' + 'AEIOU'
- define consonant letter - vowel
- define digit '0123456789'
- define alphanumeric letter + digit
-") %]
-
-
-Once G
is defined, it can be used as a command, and is equivalent to a test
-
-
-
- 'ch1' or 'ch2' or ...
-
-
-
-where ch1
, ch2
... list all the characters in the grouping.
-
-
-
-[% highlight_inline("non G") %] is the converse test, and matches any character except the
-characters of G
. Note that [% highlight_inline("non G") %] is not the same as [% highlight_inline("not G") %], in fact
-
-
-
-[% highlight_inline("non G") %] is equivalent to [% highlight_inline("(not G next)") %]
-
-
-
-[% highlight_inline("non") %] may be optionally followed by hyphen, for example:
-
-
-[% highlight("
- non-vowel
- non-digit
-") %]
-
-
-Bear in mind that [% highlight_inline("non-vowel") %] doesn't only match a
-consonant - it'll match any character which isn't in the vowel
-grouping. Failing to consider this has lead to bugs in stemming algorithms -
-for example, here we intended to undouble a consonant:
-
-
-[% highlight("
- [non-vowel] -> ch
- ch
- delete
-") %]
-
-
-The problem with this code is it will also mangle numbers with repeated digits,
-for example 1900
would become 190
. A good rule of
-thumb here seems to be to use an inclusive grouping check if the code goes on
-to delete the character matched:
-
-
-[% highlight("
- [consonant] -> ch
- ch
- delete
-") %]
-
-10 A Snowball program
-
-
-A complete program consists of a sequence of declarations followed by a
-sequence of definitions of groupings and routines. Routines which are
-implicitly defined as operating on c:l from right to left must be included
-in a backwardmode(...)
declaration.
-
-
-
-A Snowball program is called up via a simple
-API
-through its defined externals. For example,
-
-
-[% highlight("
- externals ( stem1 stem2 )
- /* ... */
- define stem1 as ( /* stem1 commands */ )
- define stem2 as ( /* stem2 commands */ )
-") %]
-
-
-The API also allows a current string to be defined, and this becomes the
-c:l string for the external routine to work on. Its final value is the
-result handed back through the API.
-
-
-
-The strings, integers and booleans are accessible from any point in the
-program, and exist throughout the running of the Snowball program. They are
-therefore like static declarations in C.
-
-
-11 Comments, and other whitespace fillers
-
-
-At a deeper level, a program is a sequence of tokens, interspersed with
-whitespace. Names, reserved words, literal numbers and strings are all
-tokens. Various symbols, made up of non-alphanumerics, are also tokens.
-
-
-
-A name, reserved word or number is terminated by the first character that
-cannot form part of it. A symbol is recognised as the longest sequence of
-characters that forms a valid symbol. So +=-
is two symbols, +=
and
--
, because +=
is a valid symbol in the language while +=-
is not.
-Whitespace separates tokens but is otherwise ignored. This of course is
-like C.
-
-
-
-Occasionally a newer version of Snowball may add a new token. So as not to
-break existing programs, any such tokens declared as a name (via
-[% highlight_inline('integers') %], [% highlight_inline('routines') %], etc)
-will lose their token status for the rest of the program. This applies
-to the tokens
-[% highlight_inline('len') %]
-and
-[% highlight_inline('lenof') %].
-
-
-
-Anywhere that whitespace can occur, there may also occur:
-
-
-
-(a) Comments, in the usual multi-line [% highlight_inline('/* .... */') %] or single line
-[% highlight_inline('// ...') %] format.
-
-
-
-(b) Get directives. These are like #include
commands in C, and have the form
-[% highlight_inline("get 'S'") %], where 'S'
is a literal string. For example,
-
-
-[% highlight("
- get '/home/martin/snowball/main-hdr' // include the file contents
-") %]
-
-
-(c) [% highlight_inline("stringescapes XY") %] where X
and Y
are any two printing characters.
-
-
-
-(d) [% highlight_inline("stringdef m 'S'") %] where m
is sequence of characters not including
-whitespace and terminated with whitespace, and 'S'
is a literal string.
-
-
-12 Character representation
-
-
-In this description of Snowball, it is assumed that strings are composed of
-characters, and that characters can be defined numerically, but the numeric range
-of these characters is not defined. As implemented, three different schemes
-are supported. Characters can either be (a) bytes in the range 0 to 255,
-as in traditional C strings, or (b) byte pairs in the range 0 to 65535,
-as in Java strings, or (c) UTF-8 encoded bytes sequences in the range 0
-to 65535, so that a character may occupy 1, 2 or 3 bytes.
-
-
-
-For case (c), we need to make a slight separation of the concept of
-characters into symbols, the units of text being represented, and
-slots, the units of space into which they map. (So in case (a), all
-slots are one byte; in case (b) all slots are two bytes.)
-c and l have numeric values that can be used in AEs (arithmetic
-expressions). These values count the number of slots. Similarly
-setmark
, tomark
and atmark
are remembering and then using slot
-counts. size
and sizeof
measure string size
-in slots, not symbols. However, hop N
moves c over N
symbols,
-not N
slots, and next
is equivalent to hop 1
.
-
-
-
-Snowball 2.0 adds len
and lenof
, which measure string length in symbols
-(so they're the same as size
and sizeof
in cases (a) and (b), but
-different in case (c)).
-
-
-
-So long as these simple distinctions are recognised, the same Snowball
-script can be compiled to work with any of the three encoding schemes.
-
-
-13 Legacy Features
-
-13.1 hex and decimal
-
-
-This section documents features of Snowball for which there's a strongly
-prefered alternative. They're still support for compatibility with
-existing code which uses them, but you shouldn't use them in then code.
-We document them here so that their meaning in existing code can be
-understood, and especially to aid updated to the preferred alternatives.
-
-
-
-In a stringdef
, string may be preceded by the word hex
,
-or the word decimal
. This was how non-ASCII characters
-were specified before support for specifying Unicode codepoints using the
-U+
notation was added.
-
-
-
-hex
and decimal
mean that the contents of the string
-are interpreted as characters values written out in hexadecimal, or decimal,
-notation. The characters should be separated by spaces. For example,
-
-
-[% highlight("
- hex 'DA' /* is character hex DA */
- hex 'D A' /* is the two characters, hex D and A (carriage
- return, and line feed) */
- decimal '10' /* character 10 (line feed) */
- decimal '13 10' /* characters 13 and 10 (carriage return, and
- line feed) */
-") %]
-
-
-The following forms are equivalent,
-
-
-[% highlight("
- hex 'd a' /* lower case also allowed */
- hex '0D 000A' /* leading zeroes ignored */
- hex ' D A ' /* extra spacing is harmless */
-") %]
-
-
-The interpretation of the values is as Unicode codepoints if command
-line option -utf8
or -widechars
is specified, and as
-character values in an unspecified single byte character set otherwise. For
-ASCII and ISO-8859-1 the character values match Unicode codepoints, but to
-handle other single byte character sets (e.g. ISO-8859-2 or KOI8-R) you needed
-a special version of a Snowball source with different character values
-specified via stringdef
. The U+
notation allows
-you to use a single Snowball source in this situation.
-
-
-13.2 among starter command
-
-
-The among
command supports a "starter" command, C
-in this example:
-
-
-
- among( (C)
- 'S11' 'S12' ... (C1)
- 'S21' 'S22' ... (C2)
- ...
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-This is equivalent to adding C
at the start of each
-Ci
:
-
-
-
- among( 'S11' 'S12' ... (C C1)
- 'S21' 'S22' ... (C C2)
- ...
- 'Sn1' 'Sn2' ... (C Cn)
- )
-
-
-
-However, both are equivalent to:
-
-
-
- substring C
- among( 'S11' 'S12' ... (C1)
- 'S21' 'S22' ... (C2)
- ...
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-This requires an explicit substring
but seems clearer so
-we recommend using this in new code and have designated the use of a starter as
-a legacy feature.
-
-
-
-A starter is also allowed with an explicit substring
, for example:
-
-
-
- substring
- Cs
- among( (Ca)
- 'S11' 'S12' ... (C1)
- 'S21' 'S22' ... (C2)
- ...
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-
-is equivalent to:
-
-
-
- substring
- Cs
- Ca
- among( 'S11' 'S12' ... (C1)
- 'S21' 'S22' ... (C2)
- ...
- 'Sn1' 'Sn2' ... (Cn)
- )
-
-
-Snowball syntax
-
--
-
-In the grammar which follows, ||
is used for alternatives,
- [X]
means that X is
-optional, and [X]*
means that X is repeated zero or more
-times. meta-symbols are defined on the left. <char>
means any
-character.
-
-
-
-The definition of literal string
does not allow for the escaping
-conventions established by the stringescapes
directive. The command
-?
is a debugging aid.
-
-
-
-<letter> ::= a || b || ... || z || A || B || ... || Z
-<digit> ::= 0 || 1 || ... || 9
-<name> ::= <letter> [ <letter> || <digit> || _ ]*
-<s_name> ::= <name>
-<i_name> ::= <name>
-<b_name> ::= <name>
-<r_name> ::= <name>
-<g_name> ::= <name>
-<literal string>::= '[<char>]*'
-<number> ::= <digit> [ <digit> ]*
-
-S ::= <s_name> || <literal string>
-G ::= <g_name> || <literal string>
-
-<declaration> ::= strings ( [<s_name>]* ) ||
- integers ( [<i_name>]* ) ||
- booleans ( [<b_name>]* ) ||
- routines ( [<r_name>]* ) ||
- externals ( [<r_name>]* ) ||
- groupings ( [<g_name>]* )
-
-<r_definition> ::= define <r_name> as C
-<plus_or_minus> ::= + || -
-<g_definition> ::= define <g_name> G [ <plus_or_minus> G ]*
-
-AE ::= (AE) ||
- AE + AE || AE - AE || AE * AE || AE / AE || - AE ||
- maxint || minint || cursor || limit ||
- size || sizeof S ||
- len || lenof S ||
- <i_name> || <number>
-
-<i_assign> ::= $ <i_name> = AE ||
- $ <i_name> += AE || $ <i_name> -= AE ||
- $ <i_name> *= AE || $ <i_name> /= AE
-
-<i_test_op> ::= == || != || > || >= || < || <=
-
-<i_test> ::= $ ( AE <i_test_op> AE ) ||
- $ <i_name> <i_test_op> AE
-
-<s_command> ::= $ <s_name> C
-
-C ::= ( [C]* ) ||
- <i_assign> || <i_test> || <s_command> || C or C || C and C ||
- not C || test C || try C || do C || fail C ||
- goto C || gopast C || repeat C || loop AE C ||
- atleast AE C || S || = S || insert S || attach S ||
- <- S || delete || hop AE || next ||
- => <s_name> || [ || ] || -> <s_name> ||
- setmark <i_name> || tomark AE || atmark AE ||
- tolimit || atlimit || setlimit C for C ||
- backwards C || reverse C || substring ||
- among ( [<literal string> [<r_name>] || (C)]* ) ||
- set <b_name> || unset <b_name> || <b_name> ||
- <r_name> || <g_name> || non [-] <g_name> ||
- true || false || ?
-
-P ::= [P]* || <declaration> ||
- <r_definition> || <g_definition> ||
- backwardmode ( P )
-
-<program> ::= P
-
-
-
-synonyms: <+ for insert
-
-
-[% footer %]
diff --git a/js/arabic-stemmer.js b/js/arabic-stemmer.js
deleted file mode 100644
index 7de24c0..0000000
--- a/js/arabic-stemmer.js
+++ /dev/null
@@ -1,1613 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var ArabicStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0640", -1, 1],
- ["\u064B", -1, 1],
- ["\u064C", -1, 1],
- ["\u064D", -1, 1],
- ["\u064E", -1, 1],
- ["\u064F", -1, 1],
- ["\u0650", -1, 1],
- ["\u0651", -1, 1],
- ["\u0652", -1, 1],
- ["\u0660", -1, 2],
- ["\u0661", -1, 3],
- ["\u0662", -1, 4],
- ["\u0663", -1, 5],
- ["\u0664", -1, 6],
- ["\u0665", -1, 7],
- ["\u0666", -1, 8],
- ["\u0667", -1, 9],
- ["\u0668", -1, 10],
- ["\u0669", -1, 11],
- ["\uFE80", -1, 12],
- ["\uFE81", -1, 16],
- ["\uFE82", -1, 16],
- ["\uFE83", -1, 13],
- ["\uFE84", -1, 13],
- ["\uFE85", -1, 17],
- ["\uFE86", -1, 17],
- ["\uFE87", -1, 14],
- ["\uFE88", -1, 14],
- ["\uFE89", -1, 15],
- ["\uFE8A", -1, 15],
- ["\uFE8B", -1, 15],
- ["\uFE8C", -1, 15],
- ["\uFE8D", -1, 18],
- ["\uFE8E", -1, 18],
- ["\uFE8F", -1, 19],
- ["\uFE90", -1, 19],
- ["\uFE91", -1, 19],
- ["\uFE92", -1, 19],
- ["\uFE93", -1, 20],
- ["\uFE94", -1, 20],
- ["\uFE95", -1, 21],
- ["\uFE96", -1, 21],
- ["\uFE97", -1, 21],
- ["\uFE98", -1, 21],
- ["\uFE99", -1, 22],
- ["\uFE9A", -1, 22],
- ["\uFE9B", -1, 22],
- ["\uFE9C", -1, 22],
- ["\uFE9D", -1, 23],
- ["\uFE9E", -1, 23],
- ["\uFE9F", -1, 23],
- ["\uFEA0", -1, 23],
- ["\uFEA1", -1, 24],
- ["\uFEA2", -1, 24],
- ["\uFEA3", -1, 24],
- ["\uFEA4", -1, 24],
- ["\uFEA5", -1, 25],
- ["\uFEA6", -1, 25],
- ["\uFEA7", -1, 25],
- ["\uFEA8", -1, 25],
- ["\uFEA9", -1, 26],
- ["\uFEAA", -1, 26],
- ["\uFEAB", -1, 27],
- ["\uFEAC", -1, 27],
- ["\uFEAD", -1, 28],
- ["\uFEAE", -1, 28],
- ["\uFEAF", -1, 29],
- ["\uFEB0", -1, 29],
- ["\uFEB1", -1, 30],
- ["\uFEB2", -1, 30],
- ["\uFEB3", -1, 30],
- ["\uFEB4", -1, 30],
- ["\uFEB5", -1, 31],
- ["\uFEB6", -1, 31],
- ["\uFEB7", -1, 31],
- ["\uFEB8", -1, 31],
- ["\uFEB9", -1, 32],
- ["\uFEBA", -1, 32],
- ["\uFEBB", -1, 32],
- ["\uFEBC", -1, 32],
- ["\uFEBD", -1, 33],
- ["\uFEBE", -1, 33],
- ["\uFEBF", -1, 33],
- ["\uFEC0", -1, 33],
- ["\uFEC1", -1, 34],
- ["\uFEC2", -1, 34],
- ["\uFEC3", -1, 34],
- ["\uFEC4", -1, 34],
- ["\uFEC5", -1, 35],
- ["\uFEC6", -1, 35],
- ["\uFEC7", -1, 35],
- ["\uFEC8", -1, 35],
- ["\uFEC9", -1, 36],
- ["\uFECA", -1, 36],
- ["\uFECB", -1, 36],
- ["\uFECC", -1, 36],
- ["\uFECD", -1, 37],
- ["\uFECE", -1, 37],
- ["\uFECF", -1, 37],
- ["\uFED0", -1, 37],
- ["\uFED1", -1, 38],
- ["\uFED2", -1, 38],
- ["\uFED3", -1, 38],
- ["\uFED4", -1, 38],
- ["\uFED5", -1, 39],
- ["\uFED6", -1, 39],
- ["\uFED7", -1, 39],
- ["\uFED8", -1, 39],
- ["\uFED9", -1, 40],
- ["\uFEDA", -1, 40],
- ["\uFEDB", -1, 40],
- ["\uFEDC", -1, 40],
- ["\uFEDD", -1, 41],
- ["\uFEDE", -1, 41],
- ["\uFEDF", -1, 41],
- ["\uFEE0", -1, 41],
- ["\uFEE1", -1, 42],
- ["\uFEE2", -1, 42],
- ["\uFEE3", -1, 42],
- ["\uFEE4", -1, 42],
- ["\uFEE5", -1, 43],
- ["\uFEE6", -1, 43],
- ["\uFEE7", -1, 43],
- ["\uFEE8", -1, 43],
- ["\uFEE9", -1, 44],
- ["\uFEEA", -1, 44],
- ["\uFEEB", -1, 44],
- ["\uFEEC", -1, 44],
- ["\uFEED", -1, 45],
- ["\uFEEE", -1, 45],
- ["\uFEEF", -1, 46],
- ["\uFEF0", -1, 46],
- ["\uFEF1", -1, 47],
- ["\uFEF2", -1, 47],
- ["\uFEF3", -1, 47],
- ["\uFEF4", -1, 47],
- ["\uFEF5", -1, 51],
- ["\uFEF6", -1, 51],
- ["\uFEF7", -1, 49],
- ["\uFEF8", -1, 49],
- ["\uFEF9", -1, 50],
- ["\uFEFA", -1, 50],
- ["\uFEFB", -1, 48],
- ["\uFEFC", -1, 48]
- ];
-
- /** @const */ var a_1 = [
- ["\u0622", -1, 1],
- ["\u0623", -1, 1],
- ["\u0624", -1, 1],
- ["\u0625", -1, 1],
- ["\u0626", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["\u0622", -1, 1],
- ["\u0623", -1, 1],
- ["\u0624", -1, 2],
- ["\u0625", -1, 1],
- ["\u0626", -1, 3]
- ];
-
- /** @const */ var a_3 = [
- ["\u0627\u0644", -1, 2],
- ["\u0628\u0627\u0644", -1, 1],
- ["\u0643\u0627\u0644", -1, 1],
- ["\u0644\u0644", -1, 2]
- ];
-
- /** @const */ var a_4 = [
- ["\u0623\u0622", -1, 2],
- ["\u0623\u0623", -1, 1],
- ["\u0623\u0624", -1, 1],
- ["\u0623\u0625", -1, 4],
- ["\u0623\u0627", -1, 3]
- ];
-
- /** @const */ var a_5 = [
- ["\u0641", -1, 1],
- ["\u0648", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["\u0627\u0644", -1, 2],
- ["\u0628\u0627\u0644", -1, 1],
- ["\u0643\u0627\u0644", -1, 1],
- ["\u0644\u0644", -1, 2]
- ];
-
- /** @const */ var a_7 = [
- ["\u0628", -1, 1],
- ["\u0628\u0627", 0, -1],
- ["\u0628\u0628", 0, 2],
- ["\u0643\u0643", -1, 3]
- ];
-
- /** @const */ var a_8 = [
- ["\u0633\u0623", -1, 4],
- ["\u0633\u062A", -1, 2],
- ["\u0633\u0646", -1, 3],
- ["\u0633\u064A", -1, 1]
- ];
-
- /** @const */ var a_9 = [
- ["\u062A\u0633\u062A", -1, 1],
- ["\u0646\u0633\u062A", -1, 1],
- ["\u064A\u0633\u062A", -1, 1]
- ];
-
- /** @const */ var a_10 = [
- ["\u0643\u0645\u0627", -1, 3],
- ["\u0647\u0645\u0627", -1, 3],
- ["\u0646\u0627", -1, 2],
- ["\u0647\u0627", -1, 2],
- ["\u0643", -1, 1],
- ["\u0643\u0645", -1, 2],
- ["\u0647\u0645", -1, 2],
- ["\u0647\u0646", -1, 2],
- ["\u0647", -1, 1],
- ["\u064A", -1, 1]
- ];
-
- /** @const */ var a_11 = [
- ["\u0646", -1, 1]
- ];
-
- /** @const */ var a_12 = [
- ["\u0627", -1, 1],
- ["\u0648", -1, 1],
- ["\u064A", -1, 1]
- ];
-
- /** @const */ var a_13 = [
- ["\u0627\u062A", -1, 1]
- ];
-
- /** @const */ var a_14 = [
- ["\u062A", -1, 1]
- ];
-
- /** @const */ var a_15 = [
- ["\u0629", -1, 1]
- ];
-
- /** @const */ var a_16 = [
- ["\u064A", -1, 1]
- ];
-
- /** @const */ var a_17 = [
- ["\u0643\u0645\u0627", -1, 3],
- ["\u0647\u0645\u0627", -1, 3],
- ["\u0646\u0627", -1, 2],
- ["\u0647\u0627", -1, 2],
- ["\u0643", -1, 1],
- ["\u0643\u0645", -1, 2],
- ["\u0647\u0645", -1, 2],
- ["\u0643\u0646", -1, 2],
- ["\u0647\u0646", -1, 2],
- ["\u0647", -1, 1],
- ["\u0643\u0645\u0648", -1, 3],
- ["\u0646\u064A", -1, 2]
- ];
-
- /** @const */ var a_18 = [
- ["\u0627", -1, 1],
- ["\u062A\u0627", 0, 2],
- ["\u062A\u0645\u0627", 0, 4],
- ["\u0646\u0627", 0, 2],
- ["\u062A", -1, 1],
- ["\u0646", -1, 1],
- ["\u0627\u0646", 5, 3],
- ["\u062A\u0646", 5, 2],
- ["\u0648\u0646", 5, 3],
- ["\u064A\u0646", 5, 3],
- ["\u064A", -1, 1]
- ];
-
- /** @const */ var a_19 = [
- ["\u0648\u0627", -1, 1],
- ["\u062A\u0645", -1, 1]
- ];
-
- /** @const */ var a_20 = [
- ["\u0648", -1, 1],
- ["\u062A\u0645\u0648", 0, 2]
- ];
-
- /** @const */ var a_21 = [
- ["\u0649", -1, 1]
- ];
-
- var /** boolean */ B_is_defined = false;
- var /** boolean */ B_is_verb = false;
- var /** boolean */ B_is_noun = false;
-
-
- /** @return {boolean} */
- function r_Normalize_pre() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- lab2: {
- var /** number */ v_3 = base.cursor;
- lab3: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- break lab3;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("0"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("1"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("2"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("3"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("4"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("5"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("6"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("7"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("8"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("9"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("\u0621"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("\u0623"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("\u0625"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("\u0626"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("\u0622"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("\u0624"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("\u0627"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("\u0628"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("\u0629"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("\u062A"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("\u062B"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("\u062C"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("\u062D"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("\u062E"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("\u062F"))
- {
- return false;
- }
- break;
- case 27:
- if (!base.slice_from("\u0630"))
- {
- return false;
- }
- break;
- case 28:
- if (!base.slice_from("\u0631"))
- {
- return false;
- }
- break;
- case 29:
- if (!base.slice_from("\u0632"))
- {
- return false;
- }
- break;
- case 30:
- if (!base.slice_from("\u0633"))
- {
- return false;
- }
- break;
- case 31:
- if (!base.slice_from("\u0634"))
- {
- return false;
- }
- break;
- case 32:
- if (!base.slice_from("\u0635"))
- {
- return false;
- }
- break;
- case 33:
- if (!base.slice_from("\u0636"))
- {
- return false;
- }
- break;
- case 34:
- if (!base.slice_from("\u0637"))
- {
- return false;
- }
- break;
- case 35:
- if (!base.slice_from("\u0638"))
- {
- return false;
- }
- break;
- case 36:
- if (!base.slice_from("\u0639"))
- {
- return false;
- }
- break;
- case 37:
- if (!base.slice_from("\u063A"))
- {
- return false;
- }
- break;
- case 38:
- if (!base.slice_from("\u0641"))
- {
- return false;
- }
- break;
- case 39:
- if (!base.slice_from("\u0642"))
- {
- return false;
- }
- break;
- case 40:
- if (!base.slice_from("\u0643"))
- {
- return false;
- }
- break;
- case 41:
- if (!base.slice_from("\u0644"))
- {
- return false;
- }
- break;
- case 42:
- if (!base.slice_from("\u0645"))
- {
- return false;
- }
- break;
- case 43:
- if (!base.slice_from("\u0646"))
- {
- return false;
- }
- break;
- case 44:
- if (!base.slice_from("\u0647"))
- {
- return false;
- }
- break;
- case 45:
- if (!base.slice_from("\u0648"))
- {
- return false;
- }
- break;
- case 46:
- if (!base.slice_from("\u0649"))
- {
- return false;
- }
- break;
- case 47:
- if (!base.slice_from("\u064A"))
- {
- return false;
- }
- break;
- case 48:
- if (!base.slice_from("\u0644\u0627"))
- {
- return false;
- }
- break;
- case 49:
- if (!base.slice_from("\u0644\u0623"))
- {
- return false;
- }
- break;
- case 50:
- if (!base.slice_from("\u0644\u0625"))
- {
- return false;
- }
- break;
- case 51:
- if (!base.slice_from("\u0644\u0622"))
- {
- return false;
- }
- break;
- }
- break lab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_Normalize_post() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u0621"))
- {
- return false;
- }
- base.cursor = base.limit_backward;
- }
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- lab1: {
- while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab2: {
- lab3: {
- var /** number */ v_4 = base.cursor;
- lab4: {
- base.bra = base.cursor;
- among_var = base.find_among(a_2);
- if (among_var == 0)
- {
- break lab4;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u0627"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u0648"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u064A"))
- {
- return false;
- }
- break;
- }
- break lab3;
- }
- base.cursor = v_4;
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_3;
- break;
- }
- }
- base.cursor = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_Checks1() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length <= 4)
- {
- return false;
- }
- B_is_noun = true;
- B_is_verb = false;
- B_is_defined = true;
- break;
- case 2:
- if (base.current.length <= 3)
- {
- return false;
- }
- B_is_noun = true;
- B_is_verb = false;
- B_is_defined = true;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step1() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0623"))
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0622"))
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0627"))
- {
- return false;
- }
- break;
- case 4:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0625"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step2() {
- base.bra = base.cursor;
- if (base.find_among(a_5) == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- if (base.current.length <= 3)
- {
- return false;
- }
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- if (!(base.eq_s("\u0627")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = v_1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step3a_Noun() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_6);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length <= 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step3b_Noun() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_7);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0628"))
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length <= 3)
- {
- return false;
- }
- if (!base.slice_from("\u0643"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step3_Verb() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_8);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_from("\u064A"))
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_from("\u062A"))
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_from("\u0646"))
- {
- return false;
- }
- break;
- case 4:
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_from("\u0623"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Prefix_Step4_Verb() {
- base.bra = base.cursor;
- if (base.find_among(a_9) == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- if (base.current.length <= 4)
- {
- return false;
- }
- B_is_verb = true;
- B_is_noun = false;
- if (!base.slice_from("\u0627\u0633\u062A"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step1a() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_10);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length < 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length < 6)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step1b() {
- base.ket = base.cursor;
- if (base.find_among_b(a_11) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length <= 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step2a() {
- base.ket = base.cursor;
- if (base.find_among_b(a_12) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length <= 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step2b() {
- base.ket = base.cursor;
- if (base.find_among_b(a_13) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length < 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step2c1() {
- base.ket = base.cursor;
- if (base.find_among_b(a_14) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step2c2() {
- base.ket = base.cursor;
- if (base.find_among_b(a_15) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Noun_Step3() {
- base.ket = base.cursor;
- if (base.find_among_b(a_16) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length < 3)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Verb_Step1() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_17);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length < 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length < 6)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Verb_Step2a() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_18);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length < 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (base.current.length <= 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 4:
- if (base.current.length < 6)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Verb_Step2b() {
- base.ket = base.cursor;
- if (base.find_among_b(a_19) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.current.length < 5)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_Verb_Step2c() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_20);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (base.current.length < 4)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (base.current.length < 6)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Suffix_All_alef_maqsura() {
- base.ket = base.cursor;
- if (base.find_among_b(a_21) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u064A"))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- B_is_noun = true;
- B_is_verb = true;
- B_is_defined = false;
- var /** number */ v_1 = base.cursor;
- r_Checks1();
- base.cursor = v_1;
- r_Normalize_pre();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- if (!B_is_verb)
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab4: {
- {
- var v_6 = 1;
- while(true)
- {
- var /** number */ v_7 = base.limit - base.cursor;
- lab5: {
- if (!r_Suffix_Verb_Step1())
- {
- break lab5;
- }
- v_6--;
- continue;
- }
- base.cursor = base.limit - v_7;
- break;
- }
- if (v_6 > 0)
- {
- break lab4;
- }
- }
- lab6: {
- var /** number */ v_8 = base.limit - base.cursor;
- lab7: {
- if (!r_Suffix_Verb_Step2a())
- {
- break lab7;
- }
- break lab6;
- }
- base.cursor = base.limit - v_8;
- lab8: {
- if (!r_Suffix_Verb_Step2c())
- {
- break lab8;
- }
- break lab6;
- }
- base.cursor = base.limit - v_8;
- if (base.cursor <= base.limit_backward)
- {
- break lab4;
- }
- base.cursor--;
- }
- break lab3;
- }
- base.cursor = base.limit - v_5;
- lab9: {
- if (!r_Suffix_Verb_Step2b())
- {
- break lab9;
- }
- break lab3;
- }
- base.cursor = base.limit - v_5;
- if (!r_Suffix_Verb_Step2a())
- {
- break lab2;
- }
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- lab10: {
- if (!B_is_noun)
- {
- break lab10;
- }
- var /** number */ v_9 = base.limit - base.cursor;
- lab11: {
- lab12: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab13: {
- if (!r_Suffix_Noun_Step2c2())
- {
- break lab13;
- }
- break lab12;
- }
- base.cursor = base.limit - v_10;
- lab14: {
- lab15: {
- if (!B_is_defined)
- {
- break lab15;
- }
- break lab14;
- }
- if (!r_Suffix_Noun_Step1a())
- {
- break lab14;
- }
- lab16: {
- var /** number */ v_12 = base.limit - base.cursor;
- lab17: {
- if (!r_Suffix_Noun_Step2a())
- {
- break lab17;
- }
- break lab16;
- }
- base.cursor = base.limit - v_12;
- lab18: {
- if (!r_Suffix_Noun_Step2b())
- {
- break lab18;
- }
- break lab16;
- }
- base.cursor = base.limit - v_12;
- lab19: {
- if (!r_Suffix_Noun_Step2c1())
- {
- break lab19;
- }
- break lab16;
- }
- base.cursor = base.limit - v_12;
- if (base.cursor <= base.limit_backward)
- {
- break lab14;
- }
- base.cursor--;
- }
- break lab12;
- }
- base.cursor = base.limit - v_10;
- lab20: {
- if (!r_Suffix_Noun_Step1b())
- {
- break lab20;
- }
- lab21: {
- var /** number */ v_13 = base.limit - base.cursor;
- lab22: {
- if (!r_Suffix_Noun_Step2a())
- {
- break lab22;
- }
- break lab21;
- }
- base.cursor = base.limit - v_13;
- lab23: {
- if (!r_Suffix_Noun_Step2b())
- {
- break lab23;
- }
- break lab21;
- }
- base.cursor = base.limit - v_13;
- if (!r_Suffix_Noun_Step2c1())
- {
- break lab20;
- }
- }
- break lab12;
- }
- base.cursor = base.limit - v_10;
- lab24: {
- lab25: {
- if (!B_is_defined)
- {
- break lab25;
- }
- break lab24;
- }
- if (!r_Suffix_Noun_Step2a())
- {
- break lab24;
- }
- break lab12;
- }
- base.cursor = base.limit - v_10;
- if (!r_Suffix_Noun_Step2b())
- {
- base.cursor = base.limit - v_9;
- break lab11;
- }
- }
- }
- if (!r_Suffix_Noun_Step3())
- {
- break lab10;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!r_Suffix_All_alef_maqsura())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_3;
- base.cursor = base.limit_backward;
- var /** number */ v_15 = base.cursor;
- lab26: {
- var /** number */ v_16 = base.cursor;
- lab27: {
- if (!r_Prefix_Step1())
- {
- base.cursor = v_16;
- break lab27;
- }
- }
- var /** number */ v_17 = base.cursor;
- lab28: {
- if (!r_Prefix_Step2())
- {
- base.cursor = v_17;
- break lab28;
- }
- }
- lab29: {
- var /** number */ v_18 = base.cursor;
- lab30: {
- if (!r_Prefix_Step3a_Noun())
- {
- break lab30;
- }
- break lab29;
- }
- base.cursor = v_18;
- lab31: {
- if (!B_is_noun)
- {
- break lab31;
- }
- if (!r_Prefix_Step3b_Noun())
- {
- break lab31;
- }
- break lab29;
- }
- base.cursor = v_18;
- if (!B_is_verb)
- {
- break lab26;
- }
- var /** number */ v_19 = base.cursor;
- lab32: {
- if (!r_Prefix_Step3_Verb())
- {
- base.cursor = v_19;
- break lab32;
- }
- }
- if (!r_Prefix_Step4_Verb())
- {
- break lab26;
- }
- }
- }
- base.cursor = v_15;
- r_Normalize_post();
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['ArabicStemmer'] = ArabicStemmer;
diff --git a/js/armenian-stemmer.js b/js/armenian-stemmer.js
deleted file mode 100644
index 669e63c..0000000
--- a/js/armenian-stemmer.js
+++ /dev/null
@@ -1,391 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var ArmenianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0580\u0578\u0580\u0564", -1, 1],
- ["\u0565\u0580\u0578\u0580\u0564", 0, 1],
- ["\u0561\u056C\u056B", -1, 1],
- ["\u0561\u056F\u056B", -1, 1],
- ["\u0578\u0580\u0561\u056F", -1, 1],
- ["\u0565\u0572", -1, 1],
- ["\u0561\u056F\u0561\u0576", -1, 1],
- ["\u0561\u0580\u0561\u0576", -1, 1],
- ["\u0565\u0576", -1, 1],
- ["\u0565\u056F\u0565\u0576", 8, 1],
- ["\u0565\u0580\u0565\u0576", 8, 1],
- ["\u0578\u0580\u0567\u0576", -1, 1],
- ["\u056B\u0576", -1, 1],
- ["\u0563\u056B\u0576", 12, 1],
- ["\u0578\u057E\u056B\u0576", 12, 1],
- ["\u056C\u0561\u0575\u0576", -1, 1],
- ["\u057E\u0578\u0582\u0576", -1, 1],
- ["\u057A\u0565\u057D", -1, 1],
- ["\u056B\u057E", -1, 1],
- ["\u0561\u057F", -1, 1],
- ["\u0561\u057E\u0565\u057F", -1, 1],
- ["\u056F\u0578\u057F", -1, 1],
- ["\u0562\u0561\u0580", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["\u0561", -1, 1],
- ["\u0561\u0581\u0561", 0, 1],
- ["\u0565\u0581\u0561", 0, 1],
- ["\u057E\u0565", -1, 1],
- ["\u0561\u0581\u0580\u056B", -1, 1],
- ["\u0561\u0581\u056B", -1, 1],
- ["\u0565\u0581\u056B", -1, 1],
- ["\u057E\u0565\u0581\u056B", 6, 1],
- ["\u0561\u056C", -1, 1],
- ["\u0568\u0561\u056C", 8, 1],
- ["\u0561\u0576\u0561\u056C", 8, 1],
- ["\u0565\u0576\u0561\u056C", 8, 1],
- ["\u0561\u0581\u0576\u0561\u056C", 8, 1],
- ["\u0565\u056C", -1, 1],
- ["\u0568\u0565\u056C", 13, 1],
- ["\u0576\u0565\u056C", 13, 1],
- ["\u0581\u0576\u0565\u056C", 15, 1],
- ["\u0565\u0581\u0576\u0565\u056C", 16, 1],
- ["\u0579\u0565\u056C", 13, 1],
- ["\u057E\u0565\u056C", 13, 1],
- ["\u0561\u0581\u057E\u0565\u056C", 19, 1],
- ["\u0565\u0581\u057E\u0565\u056C", 19, 1],
- ["\u057F\u0565\u056C", 13, 1],
- ["\u0561\u057F\u0565\u056C", 22, 1],
- ["\u0578\u057F\u0565\u056C", 22, 1],
- ["\u056F\u0578\u057F\u0565\u056C", 24, 1],
- ["\u057E\u0561\u056E", -1, 1],
- ["\u0578\u0582\u0574", -1, 1],
- ["\u057E\u0578\u0582\u0574", 27, 1],
- ["\u0561\u0576", -1, 1],
- ["\u0581\u0561\u0576", 29, 1],
- ["\u0561\u0581\u0561\u0576", 30, 1],
- ["\u0561\u0581\u0580\u056B\u0576", -1, 1],
- ["\u0561\u0581\u056B\u0576", -1, 1],
- ["\u0565\u0581\u056B\u0576", -1, 1],
- ["\u057E\u0565\u0581\u056B\u0576", 34, 1],
- ["\u0561\u056C\u056B\u057D", -1, 1],
- ["\u0565\u056C\u056B\u057D", -1, 1],
- ["\u0561\u057E", -1, 1],
- ["\u0561\u0581\u0561\u057E", 38, 1],
- ["\u0565\u0581\u0561\u057E", 38, 1],
- ["\u0561\u056C\u0578\u057E", -1, 1],
- ["\u0565\u056C\u0578\u057E", -1, 1],
- ["\u0561\u0580", -1, 1],
- ["\u0561\u0581\u0561\u0580", 43, 1],
- ["\u0565\u0581\u0561\u0580", 43, 1],
- ["\u0561\u0581\u0580\u056B\u0580", -1, 1],
- ["\u0561\u0581\u056B\u0580", -1, 1],
- ["\u0565\u0581\u056B\u0580", -1, 1],
- ["\u057E\u0565\u0581\u056B\u0580", 48, 1],
- ["\u0561\u0581", -1, 1],
- ["\u0565\u0581", -1, 1],
- ["\u0561\u0581\u0580\u0565\u0581", 51, 1],
- ["\u0561\u056C\u0578\u0582\u0581", -1, 1],
- ["\u0565\u056C\u0578\u0582\u0581", -1, 1],
- ["\u0561\u056C\u0578\u0582", -1, 1],
- ["\u0565\u056C\u0578\u0582", -1, 1],
- ["\u0561\u0584", -1, 1],
- ["\u0581\u0561\u0584", 57, 1],
- ["\u0561\u0581\u0561\u0584", 58, 1],
- ["\u0561\u0581\u0580\u056B\u0584", -1, 1],
- ["\u0561\u0581\u056B\u0584", -1, 1],
- ["\u0565\u0581\u056B\u0584", -1, 1],
- ["\u057E\u0565\u0581\u056B\u0584", 62, 1],
- ["\u0561\u0576\u0584", -1, 1],
- ["\u0581\u0561\u0576\u0584", 64, 1],
- ["\u0561\u0581\u0561\u0576\u0584", 65, 1],
- ["\u0561\u0581\u0580\u056B\u0576\u0584", -1, 1],
- ["\u0561\u0581\u056B\u0576\u0584", -1, 1],
- ["\u0565\u0581\u056B\u0576\u0584", -1, 1],
- ["\u057E\u0565\u0581\u056B\u0576\u0584", 69, 1]
- ];
-
- /** @const */ var a_2 = [
- ["\u0578\u0580\u0564", -1, 1],
- ["\u0578\u0582\u0575\u0569", -1, 1],
- ["\u0578\u0582\u0570\u056B", -1, 1],
- ["\u0581\u056B", -1, 1],
- ["\u056B\u056C", -1, 1],
- ["\u0561\u056F", -1, 1],
- ["\u0575\u0561\u056F", 5, 1],
- ["\u0561\u0576\u0561\u056F", 5, 1],
- ["\u056B\u056F", -1, 1],
- ["\u0578\u0582\u056F", -1, 1],
- ["\u0561\u0576", -1, 1],
- ["\u057A\u0561\u0576", 10, 1],
- ["\u057D\u057F\u0561\u0576", 10, 1],
- ["\u0561\u0580\u0561\u0576", 10, 1],
- ["\u0565\u0572\u0567\u0576", -1, 1],
- ["\u0575\u0578\u0582\u0576", -1, 1],
- ["\u0578\u0582\u0569\u0575\u0578\u0582\u0576", 15, 1],
- ["\u0561\u056E\u0578", -1, 1],
- ["\u056B\u0579", -1, 1],
- ["\u0578\u0582\u057D", -1, 1],
- ["\u0578\u0582\u057D\u057F", -1, 1],
- ["\u0563\u0561\u0580", -1, 1],
- ["\u057E\u0578\u0580", -1, 1],
- ["\u0561\u057E\u0578\u0580", 22, 1],
- ["\u0578\u0581", -1, 1],
- ["\u0561\u0576\u0585\u0581", -1, 1],
- ["\u0578\u0582", -1, 1],
- ["\u0584", -1, 1],
- ["\u0579\u0565\u0584", 27, 1],
- ["\u056B\u0584", 27, 1],
- ["\u0561\u056C\u056B\u0584", 29, 1],
- ["\u0561\u0576\u056B\u0584", 29, 1],
- ["\u057E\u0561\u056E\u0584", 27, 1],
- ["\u0578\u0582\u0575\u0584", 27, 1],
- ["\u0565\u0576\u0584", 27, 1],
- ["\u0578\u0576\u0584", 27, 1],
- ["\u0578\u0582\u0576\u0584", 27, 1],
- ["\u0574\u0578\u0582\u0576\u0584", 36, 1],
- ["\u056B\u0579\u0584", 27, 1],
- ["\u0561\u0580\u0584", 27, 1]
- ];
-
- /** @const */ var a_3 = [
- ["\u057D\u0561", -1, 1],
- ["\u057E\u0561", -1, 1],
- ["\u0561\u0574\u0562", -1, 1],
- ["\u0564", -1, 1],
- ["\u0561\u0576\u0564", 3, 1],
- ["\u0578\u0582\u0569\u0575\u0561\u0576\u0564", 4, 1],
- ["\u057E\u0561\u0576\u0564", 4, 1],
- ["\u0578\u057B\u0564", 3, 1],
- ["\u0565\u0580\u0564", 3, 1],
- ["\u0576\u0565\u0580\u0564", 8, 1],
- ["\u0578\u0582\u0564", 3, 1],
- ["\u0568", -1, 1],
- ["\u0561\u0576\u0568", 11, 1],
- ["\u0578\u0582\u0569\u0575\u0561\u0576\u0568", 12, 1],
- ["\u057E\u0561\u0576\u0568", 12, 1],
- ["\u0578\u057B\u0568", 11, 1],
- ["\u0565\u0580\u0568", 11, 1],
- ["\u0576\u0565\u0580\u0568", 16, 1],
- ["\u056B", -1, 1],
- ["\u057E\u056B", 18, 1],
- ["\u0565\u0580\u056B", 18, 1],
- ["\u0576\u0565\u0580\u056B", 20, 1],
- ["\u0561\u0576\u0578\u0582\u0574", -1, 1],
- ["\u0565\u0580\u0578\u0582\u0574", -1, 1],
- ["\u0576\u0565\u0580\u0578\u0582\u0574", 23, 1],
- ["\u0576", -1, 1],
- ["\u0561\u0576", 25, 1],
- ["\u0578\u0582\u0569\u0575\u0561\u0576", 26, 1],
- ["\u057E\u0561\u0576", 26, 1],
- ["\u056B\u0576", 25, 1],
- ["\u0565\u0580\u056B\u0576", 29, 1],
- ["\u0576\u0565\u0580\u056B\u0576", 30, 1],
- ["\u0578\u0582\u0569\u0575\u0561\u0576\u0576", 25, 1],
- ["\u0565\u0580\u0576", 25, 1],
- ["\u0576\u0565\u0580\u0576", 33, 1],
- ["\u0578\u0582\u0576", 25, 1],
- ["\u0578\u057B", -1, 1],
- ["\u0578\u0582\u0569\u0575\u0561\u0576\u057D", -1, 1],
- ["\u057E\u0561\u0576\u057D", -1, 1],
- ["\u0578\u057B\u057D", -1, 1],
- ["\u0578\u057E", -1, 1],
- ["\u0561\u0576\u0578\u057E", 40, 1],
- ["\u057E\u0578\u057E", 40, 1],
- ["\u0565\u0580\u0578\u057E", 40, 1],
- ["\u0576\u0565\u0580\u0578\u057E", 43, 1],
- ["\u0565\u0580", -1, 1],
- ["\u0576\u0565\u0580", 45, 1],
- ["\u0581", -1, 1],
- ["\u056B\u0581", 47, 1],
- ["\u057E\u0561\u0576\u056B\u0581", 48, 1],
- ["\u0578\u057B\u056B\u0581", 48, 1],
- ["\u057E\u056B\u0581", 48, 1],
- ["\u0565\u0580\u056B\u0581", 48, 1],
- ["\u0576\u0565\u0580\u056B\u0581", 52, 1],
- ["\u0581\u056B\u0581", 48, 1],
- ["\u0578\u0581", 47, 1],
- ["\u0578\u0582\u0581", 47, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [209, 4, 128, 0, 18];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- lab2: {
- if (!(base.in_grouping(g_v, 1377, 1413)))
- {
- break lab2;
- }
- break golab1;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_pV = base.cursor;
- golab3: while(true)
- {
- lab4: {
- if (!(base.out_grouping(g_v, 1377, 1413)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 1377, 1413)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 1377, 1413)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_adjective() {
- base.ket = base.cursor;
- if (base.find_among_b(a_0) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb() {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_noun() {
- base.ket = base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_ending() {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_3 = base.limit_backward;
- base.limit_backward = I_pV;
- var /** number */ v_4 = base.limit - base.cursor;
- r_ending();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_verb();
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- r_adjective();
- base.cursor = base.limit - v_6;
- var /** number */ v_7 = base.limit - base.cursor;
- r_noun();
- base.cursor = base.limit - v_7;
- base.limit_backward = v_3;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['ArmenianStemmer'] = ArmenianStemmer;
diff --git a/js/base-stemmer.js b/js/base-stemmer.js
deleted file mode 100644
index 930078b..0000000
--- a/js/base-stemmer.js
+++ /dev/null
@@ -1,296 +0,0 @@
-/**@constructor*/
-const BaseStemmer = function() {
- this.setCurrent = function(value) {
- this.current = value;
- this.cursor = 0;
- this.limit = this.current.length;
- this.limit_backward = 0;
- this.bra = this.cursor;
- this.ket = this.limit;
- };
-
- this.getCurrent = function() {
- return this.current;
- };
-
- this.copy_from = function(other) {
- this.current = other.current;
- this.cursor = other.cursor;
- this.limit = other.limit;
- this.limit_backward = other.limit_backward;
- this.bra = other.bra;
- this.ket = other.ket;
- };
-
- this.in_grouping = function(s, min, max) {
- if (this.cursor >= this.limit) return false;
- var ch = this.current.charCodeAt(this.cursor);
- if (ch > max || ch < min) return false;
- ch -= min;
- if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false;
- this.cursor++;
- return true;
- };
-
- this.in_grouping_b = function(s, min, max) {
- if (this.cursor <= this.limit_backward) return false;
- var ch = this.current.charCodeAt(this.cursor - 1);
- if (ch > max || ch < min) return false;
- ch -= min;
- if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false;
- this.cursor--;
- return true;
- };
-
- this.out_grouping = function(s, min, max) {
- if (this.cursor >= this.limit) return false;
- var ch = this.current.charCodeAt(this.cursor);
- if (ch > max || ch < min) {
- this.cursor++;
- return true;
- }
- ch -= min;
- if ((s[ch >>> 3] & (0X1 << (ch & 0x7))) == 0) {
- this.cursor++;
- return true;
- }
- return false;
- };
-
- this.out_grouping_b = function(s, min, max) {
- if (this.cursor <= this.limit_backward) return false;
- var ch = this.current.charCodeAt(this.cursor - 1);
- if (ch > max || ch < min) {
- this.cursor--;
- return true;
- }
- ch -= min;
- if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) {
- this.cursor--;
- return true;
- }
- return false;
- };
-
- this.eq_s = function(s)
- {
- if (this.limit - this.cursor < s.length) return false;
- if (this.current.slice(this.cursor, this.cursor + s.length) != s)
- {
- return false;
- }
- this.cursor += s.length;
- return true;
- };
-
- this.eq_s_b = function(s)
- {
- if (this.cursor - this.limit_backward < s.length) return false;
- if (this.current.slice(this.cursor - s.length, this.cursor) != s)
- {
- return false;
- }
- this.cursor -= s.length;
- return true;
- };
-
- /** @return {number} */ this.find_among = function(v)
- {
- var i = 0;
- var j = v.length;
-
- var c = this.cursor;
- var l = this.limit;
-
- var common_i = 0;
- var common_j = 0;
-
- var first_key_inspected = false;
-
- while (true)
- {
- var k = i + ((j - i) >>> 1);
- var diff = 0;
- var common = common_i < common_j ? common_i : common_j; // smaller
- // w[0]: string, w[1]: substring_i, w[2]: result, w[3]: function (optional)
- var w = v[k];
- var i2;
- for (i2 = common; i2 < w[0].length; i2++)
- {
- if (c + common == l)
- {
- diff = -1;
- break;
- }
- diff = this.current.charCodeAt(c + common) - w[0].charCodeAt(i2);
- if (diff != 0) break;
- common++;
- }
- if (diff < 0)
- {
- j = k;
- common_j = common;
- }
- else
- {
- i = k;
- common_i = common;
- }
- if (j - i <= 1)
- {
- if (i > 0) break; // v->s has been inspected
- if (j == i) break; // only one item in v
-
- // - but now we need to go round once more to get
- // v->s inspected. This looks messy, but is actually
- // the optimal approach.
-
- if (first_key_inspected) break;
- first_key_inspected = true;
- }
- }
- do {
- var w = v[i];
- if (common_i >= w[0].length)
- {
- this.cursor = c + w[0].length;
- if (w.length < 4) return w[2];
- var res = w[3](this);
- this.cursor = c + w[0].length;
- if (res) return w[2];
- }
- i = w[1];
- } while (i >= 0);
- return 0;
- };
-
- // find_among_b is for backwards processing. Same comments apply
- this.find_among_b = function(v)
- {
- var i = 0;
- var j = v.length
-
- var c = this.cursor;
- var lb = this.limit_backward;
-
- var common_i = 0;
- var common_j = 0;
-
- var first_key_inspected = false;
-
- while (true)
- {
- var k = i + ((j - i) >> 1);
- var diff = 0;
- var common = common_i < common_j ? common_i : common_j;
- var w = v[k];
- var i2;
- for (i2 = w[0].length - 1 - common; i2 >= 0; i2--)
- {
- if (c - common == lb)
- {
- diff = -1;
- break;
- }
- diff = this.current.charCodeAt(c - 1 - common) - w[0].charCodeAt(i2);
- if (diff != 0) break;
- common++;
- }
- if (diff < 0)
- {
- j = k;
- common_j = common;
- }
- else
- {
- i = k;
- common_i = common;
- }
- if (j - i <= 1)
- {
- if (i > 0) break;
- if (j == i) break;
- if (first_key_inspected) break;
- first_key_inspected = true;
- }
- }
- do {
- var w = v[i];
- if (common_i >= w[0].length)
- {
- this.cursor = c - w[0].length;
- if (w.length < 4) return w[2];
- var res = w[3](this);
- this.cursor = c - w[0].length;
- if (res) return w[2];
- }
- i = w[1];
- } while (i >= 0);
- return 0;
- };
-
- /* to replace chars between c_bra and c_ket in this.current by the
- * chars in s.
- */
- this.replace_s = function(c_bra, c_ket, s)
- {
- var adjustment = s.length - (c_ket - c_bra);
- this.current = this.current.slice(0, c_bra) + s + this.current.slice(c_ket);
- this.limit += adjustment;
- if (this.cursor >= c_ket) this.cursor += adjustment;
- else if (this.cursor > c_bra) this.cursor = c_bra;
- return adjustment;
- };
-
- this.slice_check = function()
- {
- if (this.bra < 0 ||
- this.bra > this.ket ||
- this.ket > this.limit ||
- this.limit > this.current.length)
- {
- return false;
- }
- return true;
- };
-
- this.slice_from = function(s)
- {
- var result = false;
- if (this.slice_check())
- {
- this.replace_s(this.bra, this.ket, s);
- result = true;
- }
- return result;
- };
-
- this.slice_del = function()
- {
- return this.slice_from("");
- };
-
- this.insert = function(c_bra, c_ket, s)
- {
- var adjustment = this.replace_s(c_bra, c_ket, s);
- if (c_bra <= this.bra) this.bra += adjustment;
- if (c_bra <= this.ket) this.ket += adjustment;
- };
-
- this.slice_to = function()
- {
- var result = '';
- if (this.slice_check())
- {
- result = this.current.slice(this.bra, this.ket);
- }
- return result;
- };
-
- this.assign_to = function()
- {
- return this.current.slice(0, this.limit);
- };
-};
-
-window['BaseStemmer'] = BaseStemmer;
diff --git a/js/basque-stemmer.js b/js/basque-stemmer.js
deleted file mode 100644
index dbd9e76..0000000
--- a/js/basque-stemmer.js
+++ /dev/null
@@ -1,849 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var BasqueStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["idea", -1, 1],
- ["bidea", 0, 1],
- ["kidea", 0, 1],
- ["pidea", 0, 1],
- ["kundea", -1, 1],
- ["galea", -1, 1],
- ["tailea", -1, 1],
- ["tzailea", -1, 1],
- ["gunea", -1, 1],
- ["kunea", -1, 1],
- ["tzaga", -1, 1],
- ["gaia", -1, 1],
- ["aldia", -1, 1],
- ["taldia", 12, 1],
- ["karia", -1, 1],
- ["garria", -1, 2],
- ["karria", -1, 1],
- ["ka", -1, 1],
- ["tzaka", 17, 1],
- ["la", -1, 1],
- ["mena", -1, 1],
- ["pena", -1, 1],
- ["kina", -1, 1],
- ["ezina", -1, 1],
- ["tezina", 23, 1],
- ["kuna", -1, 1],
- ["tuna", -1, 1],
- ["kizuna", -1, 1],
- ["era", -1, 1],
- ["bera", 28, 1],
- ["arabera", 29, 4],
- ["kera", 28, 1],
- ["pera", 28, 1],
- ["orra", -1, 1],
- ["korra", 33, 1],
- ["dura", -1, 1],
- ["gura", -1, 1],
- ["kura", -1, 1],
- ["tura", -1, 1],
- ["eta", -1, 1],
- ["keta", 39, 1],
- ["gailua", -1, 1],
- ["eza", -1, 1],
- ["erreza", 42, 1],
- ["tza", -1, 2],
- ["gaitza", 44, 1],
- ["kaitza", 44, 1],
- ["kuntza", 44, 1],
- ["ide", -1, 1],
- ["bide", 48, 1],
- ["kide", 48, 1],
- ["pide", 48, 1],
- ["kunde", -1, 1],
- ["tzake", -1, 1],
- ["tzeke", -1, 1],
- ["le", -1, 1],
- ["gale", 55, 1],
- ["taile", 55, 1],
- ["tzaile", 55, 1],
- ["gune", -1, 1],
- ["kune", -1, 1],
- ["tze", -1, 1],
- ["atze", 61, 1],
- ["gai", -1, 1],
- ["aldi", -1, 1],
- ["taldi", 64, 1],
- ["ki", -1, 1],
- ["ari", -1, 1],
- ["kari", 67, 1],
- ["lari", 67, 1],
- ["tari", 67, 1],
- ["etari", 70, 1],
- ["garri", -1, 2],
- ["karri", -1, 1],
- ["arazi", -1, 1],
- ["tarazi", 74, 1],
- ["an", -1, 1],
- ["ean", 76, 1],
- ["rean", 77, 1],
- ["kan", 76, 1],
- ["etan", 76, 1],
- ["atseden", -1, 3],
- ["men", -1, 1],
- ["pen", -1, 1],
- ["kin", -1, 1],
- ["rekin", 84, 1],
- ["ezin", -1, 1],
- ["tezin", 86, 1],
- ["tun", -1, 1],
- ["kizun", -1, 1],
- ["go", -1, 1],
- ["ago", 90, 1],
- ["tio", -1, 1],
- ["dako", -1, 1],
- ["or", -1, 1],
- ["kor", 94, 1],
- ["tzat", -1, 1],
- ["du", -1, 1],
- ["gailu", -1, 1],
- ["tu", -1, 1],
- ["atu", 99, 1],
- ["aldatu", 100, 1],
- ["tatu", 100, 1],
- ["baditu", 99, 5],
- ["ez", -1, 1],
- ["errez", 104, 1],
- ["tzez", 104, 1],
- ["gaitz", -1, 1],
- ["kaitz", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["ada", -1, 1],
- ["kada", 0, 1],
- ["anda", -1, 1],
- ["denda", -1, 1],
- ["gabea", -1, 1],
- ["kabea", -1, 1],
- ["aldea", -1, 1],
- ["kaldea", 6, 1],
- ["taldea", 6, 1],
- ["ordea", -1, 1],
- ["zalea", -1, 1],
- ["tzalea", 10, 1],
- ["gilea", -1, 1],
- ["emea", -1, 1],
- ["kumea", -1, 1],
- ["nea", -1, 1],
- ["enea", 15, 1],
- ["zionea", 15, 1],
- ["unea", 15, 1],
- ["gunea", 18, 1],
- ["pea", -1, 1],
- ["aurrea", -1, 1],
- ["tea", -1, 1],
- ["kotea", 22, 1],
- ["artea", 22, 1],
- ["ostea", 22, 1],
- ["etxea", -1, 1],
- ["ga", -1, 1],
- ["anga", 27, 1],
- ["gaia", -1, 1],
- ["aldia", -1, 1],
- ["taldia", 30, 1],
- ["handia", -1, 1],
- ["mendia", -1, 1],
- ["geia", -1, 1],
- ["egia", -1, 1],
- ["degia", 35, 1],
- ["tegia", 35, 1],
- ["nahia", -1, 1],
- ["ohia", -1, 1],
- ["kia", -1, 1],
- ["tokia", 40, 1],
- ["oia", -1, 1],
- ["koia", 42, 1],
- ["aria", -1, 1],
- ["karia", 44, 1],
- ["laria", 44, 1],
- ["taria", 44, 1],
- ["eria", -1, 1],
- ["keria", 48, 1],
- ["teria", 48, 1],
- ["garria", -1, 2],
- ["larria", -1, 1],
- ["kirria", -1, 1],
- ["duria", -1, 1],
- ["asia", -1, 1],
- ["tia", -1, 1],
- ["ezia", -1, 1],
- ["bizia", -1, 1],
- ["ontzia", -1, 1],
- ["ka", -1, 1],
- ["joka", 60, 3],
- ["aurka", 60, 10],
- ["ska", 60, 1],
- ["xka", 60, 1],
- ["zka", 60, 1],
- ["gibela", -1, 1],
- ["gela", -1, 1],
- ["kaila", -1, 1],
- ["skila", -1, 1],
- ["tila", -1, 1],
- ["ola", -1, 1],
- ["na", -1, 1],
- ["kana", 72, 1],
- ["ena", 72, 1],
- ["garrena", 74, 1],
- ["gerrena", 74, 1],
- ["urrena", 74, 1],
- ["zaina", 72, 1],
- ["tzaina", 78, 1],
- ["kina", 72, 1],
- ["mina", 72, 1],
- ["garna", 72, 1],
- ["una", 72, 1],
- ["duna", 83, 1],
- ["asuna", 83, 1],
- ["tasuna", 85, 1],
- ["ondoa", -1, 1],
- ["kondoa", 87, 1],
- ["ngoa", -1, 1],
- ["zioa", -1, 1],
- ["koa", -1, 1],
- ["takoa", 91, 1],
- ["zkoa", 91, 1],
- ["noa", -1, 1],
- ["zinoa", 94, 1],
- ["aroa", -1, 1],
- ["taroa", 96, 1],
- ["zaroa", 96, 1],
- ["eroa", -1, 1],
- ["oroa", -1, 1],
- ["osoa", -1, 1],
- ["toa", -1, 1],
- ["ttoa", 102, 1],
- ["ztoa", 102, 1],
- ["txoa", -1, 1],
- ["tzoa", -1, 1],
- ["\u00F1oa", -1, 1],
- ["ra", -1, 1],
- ["ara", 108, 1],
- ["dara", 109, 1],
- ["liara", 109, 1],
- ["tiara", 109, 1],
- ["tara", 109, 1],
- ["etara", 113, 1],
- ["tzara", 109, 1],
- ["bera", 108, 1],
- ["kera", 108, 1],
- ["pera", 108, 1],
- ["ora", 108, 2],
- ["tzarra", 108, 1],
- ["korra", 108, 1],
- ["tra", 108, 1],
- ["sa", -1, 1],
- ["osa", 123, 1],
- ["ta", -1, 1],
- ["eta", 125, 1],
- ["keta", 126, 1],
- ["sta", 125, 1],
- ["dua", -1, 1],
- ["mendua", 129, 1],
- ["ordua", 129, 1],
- ["lekua", -1, 1],
- ["burua", -1, 1],
- ["durua", -1, 1],
- ["tsua", -1, 1],
- ["tua", -1, 1],
- ["mentua", 136, 1],
- ["estua", 136, 1],
- ["txua", -1, 1],
- ["zua", -1, 1],
- ["tzua", 140, 1],
- ["za", -1, 1],
- ["eza", 142, 1],
- ["eroza", 142, 1],
- ["tza", 142, 2],
- ["koitza", 145, 1],
- ["antza", 145, 1],
- ["gintza", 145, 1],
- ["kintza", 145, 1],
- ["kuntza", 145, 1],
- ["gabe", -1, 1],
- ["kabe", -1, 1],
- ["kide", -1, 1],
- ["alde", -1, 1],
- ["kalde", 154, 1],
- ["talde", 154, 1],
- ["orde", -1, 1],
- ["ge", -1, 1],
- ["zale", -1, 1],
- ["tzale", 159, 1],
- ["gile", -1, 1],
- ["eme", -1, 1],
- ["kume", -1, 1],
- ["ne", -1, 1],
- ["zione", 164, 1],
- ["une", 164, 1],
- ["gune", 166, 1],
- ["pe", -1, 1],
- ["aurre", -1, 1],
- ["te", -1, 1],
- ["kote", 170, 1],
- ["arte", 170, 1],
- ["oste", 170, 1],
- ["etxe", -1, 1],
- ["gai", -1, 1],
- ["di", -1, 1],
- ["aldi", 176, 1],
- ["taldi", 177, 1],
- ["geldi", 176, 8],
- ["handi", 176, 1],
- ["mendi", 176, 1],
- ["gei", -1, 1],
- ["egi", -1, 1],
- ["degi", 183, 1],
- ["tegi", 183, 1],
- ["nahi", -1, 1],
- ["ohi", -1, 1],
- ["ki", -1, 1],
- ["toki", 188, 1],
- ["oi", -1, 1],
- ["goi", 190, 1],
- ["koi", 190, 1],
- ["ari", -1, 1],
- ["kari", 193, 1],
- ["lari", 193, 1],
- ["tari", 193, 1],
- ["garri", -1, 2],
- ["larri", -1, 1],
- ["kirri", -1, 1],
- ["duri", -1, 1],
- ["asi", -1, 1],
- ["ti", -1, 1],
- ["ontzi", -1, 1],
- ["\u00F1i", -1, 1],
- ["ak", -1, 1],
- ["ek", -1, 1],
- ["tarik", -1, 1],
- ["gibel", -1, 1],
- ["ail", -1, 1],
- ["kail", 209, 1],
- ["kan", -1, 1],
- ["tan", -1, 1],
- ["etan", 212, 1],
- ["en", -1, 4],
- ["ren", 214, 2],
- ["garren", 215, 1],
- ["gerren", 215, 1],
- ["urren", 215, 1],
- ["ten", 214, 4],
- ["tzen", 214, 4],
- ["zain", -1, 1],
- ["tzain", 221, 1],
- ["kin", -1, 1],
- ["min", -1, 1],
- ["dun", -1, 1],
- ["asun", -1, 1],
- ["tasun", 226, 1],
- ["aizun", -1, 1],
- ["ondo", -1, 1],
- ["kondo", 229, 1],
- ["go", -1, 1],
- ["ngo", 231, 1],
- ["zio", -1, 1],
- ["ko", -1, 1],
- ["trako", 234, 5],
- ["tako", 234, 1],
- ["etako", 236, 1],
- ["eko", 234, 1],
- ["tariko", 234, 1],
- ["sko", 234, 1],
- ["tuko", 234, 1],
- ["minutuko", 241, 6],
- ["zko", 234, 1],
- ["no", -1, 1],
- ["zino", 244, 1],
- ["ro", -1, 1],
- ["aro", 246, 1],
- ["igaro", 247, 9],
- ["taro", 247, 1],
- ["zaro", 247, 1],
- ["ero", 246, 1],
- ["giro", 246, 1],
- ["oro", 246, 1],
- ["oso", -1, 1],
- ["to", -1, 1],
- ["tto", 255, 1],
- ["zto", 255, 1],
- ["txo", -1, 1],
- ["tzo", -1, 1],
- ["gintzo", 259, 1],
- ["\u00F1o", -1, 1],
- ["zp", -1, 1],
- ["ar", -1, 1],
- ["dar", 263, 1],
- ["behar", 263, 1],
- ["zehar", 263, 7],
- ["liar", 263, 1],
- ["tiar", 263, 1],
- ["tar", 263, 1],
- ["tzar", 263, 1],
- ["or", -1, 2],
- ["kor", 271, 1],
- ["os", -1, 1],
- ["ket", -1, 1],
- ["du", -1, 1],
- ["mendu", 275, 1],
- ["ordu", 275, 1],
- ["leku", -1, 1],
- ["buru", -1, 2],
- ["duru", -1, 1],
- ["tsu", -1, 1],
- ["tu", -1, 1],
- ["tatu", 282, 4],
- ["mentu", 282, 1],
- ["estu", 282, 1],
- ["txu", -1, 1],
- ["zu", -1, 1],
- ["tzu", 287, 1],
- ["gintzu", 288, 1],
- ["z", -1, 1],
- ["ez", 290, 1],
- ["eroz", 290, 1],
- ["tz", 290, 1],
- ["koitz", 293, 1]
- ];
-
- /** @const */ var a_2 = [
- ["zlea", -1, 2],
- ["keria", -1, 1],
- ["la", -1, 1],
- ["era", -1, 1],
- ["dade", -1, 1],
- ["tade", -1, 1],
- ["date", -1, 1],
- ["tate", -1, 1],
- ["gi", -1, 1],
- ["ki", -1, 1],
- ["ik", -1, 1],
- ["lanik", 10, 1],
- ["rik", 10, 1],
- ["larik", 12, 1],
- ["ztik", 10, 1],
- ["go", -1, 1],
- ["ro", -1, 1],
- ["ero", 16, 1],
- ["to", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab4;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab4;
- }
- base.cursor++;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab2;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- }
- break lab1;
- }
- base.cursor = v_2;
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab0;
- }
- lab9: {
- var /** number */ v_6 = base.cursor;
- lab10: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab10;
- }
- base.cursor++;
- }
- break lab9;
- }
- base.cursor = v_6;
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab0;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_8 = base.cursor;
- lab13: {
- golab14: while(true)
- {
- lab15: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab15;
- }
- break golab14;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab16: while(true)
- {
- lab17: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab17;
- }
- break golab16;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab18: while(true)
- {
- lab19: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab19;
- }
- break golab18;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab20: while(true)
- {
- lab21: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab21;
- }
- break golab20;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_8;
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_aditzak() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("atseden"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("arabera"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("baditu"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_izenak() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("jok"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("tra"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("minutu"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("zehar"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("geldi"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("igaro"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("aurka"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_adjetiboak() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("z"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- while(true)
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab0: {
- if (!r_aditzak())
- {
- break lab0;
- }
- continue;
- }
- base.cursor = base.limit - v_2;
- break;
- }
- while(true)
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- if (!r_izenak())
- {
- break lab1;
- }
- continue;
- }
- base.cursor = base.limit - v_3;
- break;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- r_adjetiboak();
- base.cursor = base.limit - v_4;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['BasqueStemmer'] = BasqueStemmer;
diff --git a/js/catalan-stemmer.js b/js/catalan-stemmer.js
deleted file mode 100644
index 34f535d..0000000
--- a/js/catalan-stemmer.js
+++ /dev/null
@@ -1,927 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var CatalanStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 7],
- ["\u00B7", 0, 6],
- ["\u00E0", 0, 1],
- ["\u00E1", 0, 1],
- ["\u00E8", 0, 2],
- ["\u00E9", 0, 2],
- ["\u00EC", 0, 3],
- ["\u00ED", 0, 3],
- ["\u00EF", 0, 3],
- ["\u00F2", 0, 4],
- ["\u00F3", 0, 4],
- ["\u00FA", 0, 5],
- ["\u00FC", 0, 5]
- ];
-
- /** @const */ var a_1 = [
- ["la", -1, 1],
- ["-la", 0, 1],
- ["sela", 0, 1],
- ["le", -1, 1],
- ["me", -1, 1],
- ["-me", 4, 1],
- ["se", -1, 1],
- ["-te", -1, 1],
- ["hi", -1, 1],
- ["'hi", 8, 1],
- ["li", -1, 1],
- ["-li", 10, 1],
- ["'l", -1, 1],
- ["'m", -1, 1],
- ["-m", -1, 1],
- ["'n", -1, 1],
- ["-n", -1, 1],
- ["ho", -1, 1],
- ["'ho", 17, 1],
- ["lo", -1, 1],
- ["selo", 19, 1],
- ["'s", -1, 1],
- ["las", -1, 1],
- ["selas", 22, 1],
- ["les", -1, 1],
- ["-les", 24, 1],
- ["'ls", -1, 1],
- ["-ls", -1, 1],
- ["'ns", -1, 1],
- ["-ns", -1, 1],
- ["ens", -1, 1],
- ["los", -1, 1],
- ["selos", 31, 1],
- ["nos", -1, 1],
- ["-nos", 33, 1],
- ["vos", -1, 1],
- ["us", -1, 1],
- ["-us", 36, 1],
- ["'t", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["ica", -1, 4],
- ["l\u00F3gica", 0, 3],
- ["enca", -1, 1],
- ["ada", -1, 2],
- ["ancia", -1, 1],
- ["encia", -1, 1],
- ["\u00E8ncia", -1, 1],
- ["\u00EDcia", -1, 1],
- ["logia", -1, 3],
- ["inia", -1, 1],
- ["\u00EDinia", 9, 1],
- ["eria", -1, 1],
- ["\u00E0ria", -1, 1],
- ["at\u00F2ria", -1, 1],
- ["alla", -1, 1],
- ["ella", -1, 1],
- ["\u00EDvola", -1, 1],
- ["ima", -1, 1],
- ["\u00EDssima", 17, 1],
- ["qu\u00EDssima", 18, 5],
- ["ana", -1, 1],
- ["ina", -1, 1],
- ["era", -1, 1],
- ["sfera", 22, 1],
- ["ora", -1, 1],
- ["dora", 24, 1],
- ["adora", 25, 1],
- ["adura", -1, 1],
- ["esa", -1, 1],
- ["osa", -1, 1],
- ["assa", -1, 1],
- ["essa", -1, 1],
- ["issa", -1, 1],
- ["eta", -1, 1],
- ["ita", -1, 1],
- ["ota", -1, 1],
- ["ista", -1, 1],
- ["ialista", 36, 1],
- ["ionista", 36, 1],
- ["iva", -1, 1],
- ["ativa", 39, 1],
- ["n\u00E7a", -1, 1],
- ["log\u00EDa", -1, 3],
- ["ic", -1, 4],
- ["\u00EDstic", 43, 1],
- ["enc", -1, 1],
- ["esc", -1, 1],
- ["ud", -1, 1],
- ["atge", -1, 1],
- ["ble", -1, 1],
- ["able", 49, 1],
- ["ible", 49, 1],
- ["isme", -1, 1],
- ["ialisme", 52, 1],
- ["ionisme", 52, 1],
- ["ivisme", 52, 1],
- ["aire", -1, 1],
- ["icte", -1, 1],
- ["iste", -1, 1],
- ["ici", -1, 1],
- ["\u00EDci", -1, 1],
- ["logi", -1, 3],
- ["ari", -1, 1],
- ["tori", -1, 1],
- ["al", -1, 1],
- ["il", -1, 1],
- ["all", -1, 1],
- ["ell", -1, 1],
- ["\u00EDvol", -1, 1],
- ["isam", -1, 1],
- ["issem", -1, 1],
- ["\u00ECssem", -1, 1],
- ["\u00EDssem", -1, 1],
- ["\u00EDssim", -1, 1],
- ["qu\u00EDssim", 73, 5],
- ["amen", -1, 1],
- ["\u00ECssin", -1, 1],
- ["ar", -1, 1],
- ["ificar", 77, 1],
- ["egar", 77, 1],
- ["ejar", 77, 1],
- ["itar", 77, 1],
- ["itzar", 77, 1],
- ["fer", -1, 1],
- ["or", -1, 1],
- ["dor", 84, 1],
- ["dur", -1, 1],
- ["doras", -1, 1],
- ["ics", -1, 4],
- ["l\u00F3gics", 88, 3],
- ["uds", -1, 1],
- ["nces", -1, 1],
- ["ades", -1, 2],
- ["ancies", -1, 1],
- ["encies", -1, 1],
- ["\u00E8ncies", -1, 1],
- ["\u00EDcies", -1, 1],
- ["logies", -1, 3],
- ["inies", -1, 1],
- ["\u00EDnies", -1, 1],
- ["eries", -1, 1],
- ["\u00E0ries", -1, 1],
- ["at\u00F2ries", -1, 1],
- ["bles", -1, 1],
- ["ables", 103, 1],
- ["ibles", 103, 1],
- ["imes", -1, 1],
- ["\u00EDssimes", 106, 1],
- ["qu\u00EDssimes", 107, 5],
- ["formes", -1, 1],
- ["ismes", -1, 1],
- ["ialismes", 110, 1],
- ["ines", -1, 1],
- ["eres", -1, 1],
- ["ores", -1, 1],
- ["dores", 114, 1],
- ["idores", 115, 1],
- ["dures", -1, 1],
- ["eses", -1, 1],
- ["oses", -1, 1],
- ["asses", -1, 1],
- ["ictes", -1, 1],
- ["ites", -1, 1],
- ["otes", -1, 1],
- ["istes", -1, 1],
- ["ialistes", 124, 1],
- ["ionistes", 124, 1],
- ["iques", -1, 4],
- ["l\u00F3giques", 127, 3],
- ["ives", -1, 1],
- ["atives", 129, 1],
- ["log\u00EDes", -1, 3],
- ["alleng\u00FCes", -1, 1],
- ["icis", -1, 1],
- ["\u00EDcis", -1, 1],
- ["logis", -1, 3],
- ["aris", -1, 1],
- ["toris", -1, 1],
- ["ls", -1, 1],
- ["als", 138, 1],
- ["ells", 138, 1],
- ["ims", -1, 1],
- ["\u00EDssims", 141, 1],
- ["qu\u00EDssims", 142, 5],
- ["ions", -1, 1],
- ["cions", 144, 1],
- ["acions", 145, 2],
- ["esos", -1, 1],
- ["osos", -1, 1],
- ["assos", -1, 1],
- ["issos", -1, 1],
- ["ers", -1, 1],
- ["ors", -1, 1],
- ["dors", 152, 1],
- ["adors", 153, 1],
- ["idors", 153, 1],
- ["ats", -1, 1],
- ["itats", 156, 1],
- ["bilitats", 157, 1],
- ["ivitats", 157, 1],
- ["ativitats", 159, 1],
- ["\u00EFtats", 156, 1],
- ["ets", -1, 1],
- ["ants", -1, 1],
- ["ents", -1, 1],
- ["ments", 164, 1],
- ["aments", 165, 1],
- ["ots", -1, 1],
- ["uts", -1, 1],
- ["ius", -1, 1],
- ["trius", 169, 1],
- ["atius", 169, 1],
- ["\u00E8s", -1, 1],
- ["\u00E9s", -1, 1],
- ["\u00EDs", -1, 1],
- ["d\u00EDs", 174, 1],
- ["\u00F3s", -1, 1],
- ["itat", -1, 1],
- ["bilitat", 177, 1],
- ["ivitat", 177, 1],
- ["ativitat", 179, 1],
- ["\u00EFtat", -1, 1],
- ["et", -1, 1],
- ["ant", -1, 1],
- ["ent", -1, 1],
- ["ient", 184, 1],
- ["ment", 184, 1],
- ["ament", 186, 1],
- ["isament", 187, 1],
- ["ot", -1, 1],
- ["isseu", -1, 1],
- ["\u00ECsseu", -1, 1],
- ["\u00EDsseu", -1, 1],
- ["triu", -1, 1],
- ["\u00EDssiu", -1, 1],
- ["atiu", -1, 1],
- ["\u00F3", -1, 1],
- ["i\u00F3", 196, 1],
- ["ci\u00F3", 197, 1],
- ["aci\u00F3", 198, 1]
- ];
-
- /** @const */ var a_3 = [
- ["aba", -1, 1],
- ["esca", -1, 1],
- ["isca", -1, 1],
- ["\u00EFsca", -1, 1],
- ["ada", -1, 1],
- ["ida", -1, 1],
- ["uda", -1, 1],
- ["\u00EFda", -1, 1],
- ["ia", -1, 1],
- ["aria", 8, 1],
- ["iria", 8, 1],
- ["ara", -1, 1],
- ["iera", -1, 1],
- ["ira", -1, 1],
- ["adora", -1, 1],
- ["\u00EFra", -1, 1],
- ["ava", -1, 1],
- ["ixa", -1, 1],
- ["itza", -1, 1],
- ["\u00EDa", -1, 1],
- ["ar\u00EDa", 19, 1],
- ["er\u00EDa", 19, 1],
- ["ir\u00EDa", 19, 1],
- ["\u00EFa", -1, 1],
- ["isc", -1, 1],
- ["\u00EFsc", -1, 1],
- ["ad", -1, 1],
- ["ed", -1, 1],
- ["id", -1, 1],
- ["ie", -1, 1],
- ["re", -1, 1],
- ["dre", 30, 1],
- ["ase", -1, 1],
- ["iese", -1, 1],
- ["aste", -1, 1],
- ["iste", -1, 1],
- ["ii", -1, 1],
- ["ini", -1, 1],
- ["esqui", -1, 1],
- ["eixi", -1, 1],
- ["itzi", -1, 1],
- ["am", -1, 1],
- ["em", -1, 1],
- ["arem", 42, 1],
- ["irem", 42, 1],
- ["\u00E0rem", 42, 1],
- ["\u00EDrem", 42, 1],
- ["\u00E0ssem", 42, 1],
- ["\u00E9ssem", 42, 1],
- ["iguem", 42, 1],
- ["\u00EFguem", 42, 1],
- ["avem", 42, 1],
- ["\u00E0vem", 42, 1],
- ["\u00E1vem", 42, 1],
- ["ir\u00ECem", 42, 1],
- ["\u00EDem", 42, 1],
- ["ar\u00EDem", 55, 1],
- ["ir\u00EDem", 55, 1],
- ["assim", -1, 1],
- ["essim", -1, 1],
- ["issim", -1, 1],
- ["\u00E0ssim", -1, 1],
- ["\u00E8ssim", -1, 1],
- ["\u00E9ssim", -1, 1],
- ["\u00EDssim", -1, 1],
- ["\u00EFm", -1, 1],
- ["an", -1, 1],
- ["aban", 66, 1],
- ["arian", 66, 1],
- ["aran", 66, 1],
- ["ieran", 66, 1],
- ["iran", 66, 1],
- ["\u00EDan", 66, 1],
- ["ar\u00EDan", 72, 1],
- ["er\u00EDan", 72, 1],
- ["ir\u00EDan", 72, 1],
- ["en", -1, 1],
- ["ien", 76, 1],
- ["arien", 77, 1],
- ["irien", 77, 1],
- ["aren", 76, 1],
- ["eren", 76, 1],
- ["iren", 76, 1],
- ["\u00E0ren", 76, 1],
- ["\u00EFren", 76, 1],
- ["asen", 76, 1],
- ["iesen", 76, 1],
- ["assen", 76, 1],
- ["essen", 76, 1],
- ["issen", 76, 1],
- ["\u00E9ssen", 76, 1],
- ["\u00EFssen", 76, 1],
- ["esquen", 76, 1],
- ["isquen", 76, 1],
- ["\u00EFsquen", 76, 1],
- ["aven", 76, 1],
- ["ixen", 76, 1],
- ["eixen", 96, 1],
- ["\u00EFxen", 76, 1],
- ["\u00EFen", 76, 1],
- ["in", -1, 1],
- ["inin", 100, 1],
- ["sin", 100, 1],
- ["isin", 102, 1],
- ["assin", 102, 1],
- ["essin", 102, 1],
- ["issin", 102, 1],
- ["\u00EFssin", 102, 1],
- ["esquin", 100, 1],
- ["eixin", 100, 1],
- ["aron", -1, 1],
- ["ieron", -1, 1],
- ["ar\u00E1n", -1, 1],
- ["er\u00E1n", -1, 1],
- ["ir\u00E1n", -1, 1],
- ["i\u00EFn", -1, 1],
- ["ado", -1, 1],
- ["ido", -1, 1],
- ["ando", -1, 2],
- ["iendo", -1, 1],
- ["io", -1, 1],
- ["ixo", -1, 1],
- ["eixo", 121, 1],
- ["\u00EFxo", -1, 1],
- ["itzo", -1, 1],
- ["ar", -1, 1],
- ["tzar", 125, 1],
- ["er", -1, 1],
- ["eixer", 127, 1],
- ["ir", -1, 1],
- ["ador", -1, 1],
- ["as", -1, 1],
- ["abas", 131, 1],
- ["adas", 131, 1],
- ["idas", 131, 1],
- ["aras", 131, 1],
- ["ieras", 131, 1],
- ["\u00EDas", 131, 1],
- ["ar\u00EDas", 137, 1],
- ["er\u00EDas", 137, 1],
- ["ir\u00EDas", 137, 1],
- ["ids", -1, 1],
- ["es", -1, 1],
- ["ades", 142, 1],
- ["ides", 142, 1],
- ["udes", 142, 1],
- ["\u00EFdes", 142, 1],
- ["atges", 142, 1],
- ["ies", 142, 1],
- ["aries", 148, 1],
- ["iries", 148, 1],
- ["ares", 142, 1],
- ["ires", 142, 1],
- ["adores", 142, 1],
- ["\u00EFres", 142, 1],
- ["ases", 142, 1],
- ["ieses", 142, 1],
- ["asses", 142, 1],
- ["esses", 142, 1],
- ["isses", 142, 1],
- ["\u00EFsses", 142, 1],
- ["ques", 142, 1],
- ["esques", 161, 1],
- ["\u00EFsques", 161, 1],
- ["aves", 142, 1],
- ["ixes", 142, 1],
- ["eixes", 165, 1],
- ["\u00EFxes", 142, 1],
- ["\u00EFes", 142, 1],
- ["abais", -1, 1],
- ["arais", -1, 1],
- ["ierais", -1, 1],
- ["\u00EDais", -1, 1],
- ["ar\u00EDais", 172, 1],
- ["er\u00EDais", 172, 1],
- ["ir\u00EDais", 172, 1],
- ["aseis", -1, 1],
- ["ieseis", -1, 1],
- ["asteis", -1, 1],
- ["isteis", -1, 1],
- ["inis", -1, 1],
- ["sis", -1, 1],
- ["isis", 181, 1],
- ["assis", 181, 1],
- ["essis", 181, 1],
- ["issis", 181, 1],
- ["\u00EFssis", 181, 1],
- ["esquis", -1, 1],
- ["eixis", -1, 1],
- ["itzis", -1, 1],
- ["\u00E1is", -1, 1],
- ["ar\u00E9is", -1, 1],
- ["er\u00E9is", -1, 1],
- ["ir\u00E9is", -1, 1],
- ["ams", -1, 1],
- ["ados", -1, 1],
- ["idos", -1, 1],
- ["amos", -1, 1],
- ["\u00E1bamos", 197, 1],
- ["\u00E1ramos", 197, 1],
- ["i\u00E9ramos", 197, 1],
- ["\u00EDamos", 197, 1],
- ["ar\u00EDamos", 201, 1],
- ["er\u00EDamos", 201, 1],
- ["ir\u00EDamos", 201, 1],
- ["aremos", -1, 1],
- ["eremos", -1, 1],
- ["iremos", -1, 1],
- ["\u00E1semos", -1, 1],
- ["i\u00E9semos", -1, 1],
- ["imos", -1, 1],
- ["adors", -1, 1],
- ["ass", -1, 1],
- ["erass", 212, 1],
- ["ess", -1, 1],
- ["ats", -1, 1],
- ["its", -1, 1],
- ["ents", -1, 1],
- ["\u00E0s", -1, 1],
- ["ar\u00E0s", 218, 1],
- ["ir\u00E0s", 218, 1],
- ["ar\u00E1s", -1, 1],
- ["er\u00E1s", -1, 1],
- ["ir\u00E1s", -1, 1],
- ["\u00E9s", -1, 1],
- ["ar\u00E9s", 224, 1],
- ["\u00EDs", -1, 1],
- ["i\u00EFs", -1, 1],
- ["at", -1, 1],
- ["it", -1, 1],
- ["ant", -1, 1],
- ["ent", -1, 1],
- ["int", -1, 1],
- ["ut", -1, 1],
- ["\u00EFt", -1, 1],
- ["au", -1, 1],
- ["erau", 235, 1],
- ["ieu", -1, 1],
- ["ineu", -1, 1],
- ["areu", -1, 1],
- ["ireu", -1, 1],
- ["\u00E0reu", -1, 1],
- ["\u00EDreu", -1, 1],
- ["asseu", -1, 1],
- ["esseu", -1, 1],
- ["eresseu", 244, 1],
- ["\u00E0sseu", -1, 1],
- ["\u00E9sseu", -1, 1],
- ["igueu", -1, 1],
- ["\u00EFgueu", -1, 1],
- ["\u00E0veu", -1, 1],
- ["\u00E1veu", -1, 1],
- ["itzeu", -1, 1],
- ["\u00ECeu", -1, 1],
- ["ir\u00ECeu", 253, 1],
- ["\u00EDeu", -1, 1],
- ["ar\u00EDeu", 255, 1],
- ["ir\u00EDeu", 255, 1],
- ["assiu", -1, 1],
- ["issiu", -1, 1],
- ["\u00E0ssiu", -1, 1],
- ["\u00E8ssiu", -1, 1],
- ["\u00E9ssiu", -1, 1],
- ["\u00EDssiu", -1, 1],
- ["\u00EFu", -1, 1],
- ["ix", -1, 1],
- ["eix", 265, 1],
- ["\u00EFx", -1, 1],
- ["itz", -1, 1],
- ["i\u00E0", -1, 1],
- ["ar\u00E0", -1, 1],
- ["ir\u00E0", -1, 1],
- ["itz\u00E0", -1, 1],
- ["ar\u00E1", -1, 1],
- ["er\u00E1", -1, 1],
- ["ir\u00E1", -1, 1],
- ["ir\u00E8", -1, 1],
- ["ar\u00E9", -1, 1],
- ["er\u00E9", -1, 1],
- ["ir\u00E9", -1, 1],
- ["\u00ED", -1, 1],
- ["i\u00EF", -1, 1],
- ["i\u00F3", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["a", -1, 1],
- ["e", -1, 1],
- ["i", -1, 1],
- ["\u00EFn", -1, 1],
- ["o", -1, 1],
- ["ir", -1, 1],
- ["s", -1, 1],
- ["is", 6, 1],
- ["os", 6, 1],
- ["\u00EFs", 6, 1],
- ["it", -1, 1],
- ["eu", -1, 1],
- ["iu", -1, 1],
- ["iqu", -1, 2],
- ["itz", -1, 1],
- ["\u00E0", -1, 1],
- ["\u00E1", -1, 1],
- ["\u00E9", -1, 1],
- ["\u00EC", -1, 1],
- ["\u00ED", -1, 1],
- ["\u00EF", -1, 1],
- ["\u00F3", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 129, 81, 6, 10];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- lab2: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab2;
- }
- break golab1;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab3: while(true)
- {
- lab4: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_cleaning() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("o"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("."))
- {
- return false;
- }
- break;
- case 7:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_attached_pronoun() {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("ic"))
- {
- return false;
- }
- break;
- case 5:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("c"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_residual_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("ic"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_attached_pronoun();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- if (!r_standard_suffix())
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!r_verb_suffix())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_5 = base.limit - base.cursor;
- r_residual_suffix();
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- var /** number */ v_6 = base.cursor;
- r_cleaning();
- base.cursor = v_6;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['CatalanStemmer'] = CatalanStemmer;
diff --git a/js/danish-stemmer.js b/js/danish-stemmer.js
deleted file mode 100644
index 4c8941b..0000000
--- a/js/danish-stemmer.js
+++ /dev/null
@@ -1,312 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var DanishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["hed", -1, 1],
- ["ethed", 0, 1],
- ["ered", -1, 1],
- ["e", -1, 1],
- ["erede", 3, 1],
- ["ende", 3, 1],
- ["erende", 5, 1],
- ["ene", 3, 1],
- ["erne", 3, 1],
- ["ere", 3, 1],
- ["en", -1, 1],
- ["heden", 10, 1],
- ["eren", 10, 1],
- ["er", -1, 1],
- ["heder", 13, 1],
- ["erer", 13, 1],
- ["s", -1, 2],
- ["heds", 16, 1],
- ["es", 16, 1],
- ["endes", 18, 1],
- ["erendes", 19, 1],
- ["enes", 18, 1],
- ["ernes", 18, 1],
- ["eres", 18, 1],
- ["ens", 16, 1],
- ["hedens", 24, 1],
- ["erens", 24, 1],
- ["ers", 16, 1],
- ["ets", 16, 1],
- ["erets", 28, 1],
- ["et", -1, 1],
- ["eret", 30, 1]
- ];
-
- /** @const */ var a_1 = [
- ["gd", -1, -1],
- ["dt", -1, -1],
- ["gt", -1, -1],
- ["kt", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ig", -1, 1],
- ["lig", 0, 1],
- ["elig", 1, 1],
- ["els", -1, 1],
- ["l\u00F8st", -1, 2]
- ];
-
- /** @const */ var /** Array */ g_c = [119, 223, 119, 1];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128];
-
- /** @const */ var /** Array */ g_s_ending = [239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16];
-
- var /** number */ I_x = 0;
- var /** number */ I_p1 = 0;
- var /** string */ S_ch = '';
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- var /** number */ v_1 = base.cursor;
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- return false;
- }
- base.cursor = c1;
- }
- I_x = base.cursor;
- base.cursor = v_1;
- golab0: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_v, 97, 248)))
- {
- break lab1;
- }
- base.cursor = v_2;
- break golab0;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_v, 97, 248)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- lab4: {
- if (I_p1 >= I_x)
- {
- break lab4;
- }
- I_p1 = I_x;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_main_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.in_grouping_b(g_s_ending, 97, 229)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_consonant_pair() {
- var /** number */ v_1 = base.limit - base.cursor;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_3 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- base.limit_backward = v_3;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_3;
- base.cursor = base.limit - v_1;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_other_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("st")))
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!(base.eq_s_b("ig")))
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_3 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- base.limit_backward = v_3;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_3;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- r_consonant_pair();
- base.cursor = base.limit - v_4;
- break;
- case 2:
- if (!base.slice_from("l\u00F8s"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_undouble() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (!(base.in_grouping_b(g_c, 98, 122)))
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- S_ch = base.slice_to();
- if (S_ch == '')
- {
- return false;
- }
- base.limit_backward = v_2;
- if (!(base.eq_s_b(S_ch)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_mark_regions();
- base.cursor = v_1;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_main_suffix();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_consonant_pair();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_other_suffix();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_undouble();
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['DanishStemmer'] = DanishStemmer;
diff --git a/js/dutch-stemmer.js b/js/dutch-stemmer.js
deleted file mode 100644
index 422d710..0000000
--- a/js/dutch-stemmer.js
+++ /dev/null
@@ -1,671 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var DutchStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 6],
- ["\u00E1", 0, 1],
- ["\u00E4", 0, 1],
- ["\u00E9", 0, 2],
- ["\u00EB", 0, 2],
- ["\u00ED", 0, 3],
- ["\u00EF", 0, 3],
- ["\u00F3", 0, 4],
- ["\u00F6", 0, 4],
- ["\u00FA", 0, 5],
- ["\u00FC", 0, 5]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 3],
- ["I", 0, 2],
- ["Y", 0, 1]
- ];
-
- /** @const */ var a_2 = [
- ["dd", -1, -1],
- ["kk", -1, -1],
- ["tt", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["ene", -1, 2],
- ["se", -1, 3],
- ["en", -1, 2],
- ["heden", 2, 1],
- ["s", -1, 3]
- ];
-
- /** @const */ var a_4 = [
- ["end", -1, 1],
- ["ig", -1, 2],
- ["ing", -1, 1],
- ["lijk", -1, 3],
- ["baar", -1, 4],
- ["bar", -1, 5]
- ];
-
- /** @const */ var a_5 = [
- ["aa", -1, -1],
- ["ee", -1, -1],
- ["oo", -1, -1],
- ["uu", -1, -1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128];
-
- /** @const */ var /** Array */ g_v_I = [1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128];
-
- /** @const */ var /** Array */ g_v_j = [17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** boolean */ B_e_found = false;
-
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("o"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 6:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- base.cursor = v_1;
- var /** number */ v_3 = base.cursor;
- lab1: {
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- base.cursor = v_3;
- break lab1;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Y"))
- {
- return false;
- }
- }
- while(true)
- {
- var /** number */ v_4 = base.cursor;
- lab2: {
- golab3: while(true)
- {
- var /** number */ v_5 = base.cursor;
- lab4: {
- if (!(base.in_grouping(g_v, 97, 232)))
- {
- break lab4;
- }
- base.bra = base.cursor;
- lab5: {
- var /** number */ v_6 = base.cursor;
- lab6: {
- if (!(base.eq_s("i")))
- {
- break lab6;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 232)))
- {
- break lab6;
- }
- if (!base.slice_from("I"))
- {
- return false;
- }
- break lab5;
- }
- base.cursor = v_6;
- if (!(base.eq_s("y")))
- {
- break lab4;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Y"))
- {
- return false;
- }
- }
- base.cursor = v_5;
- break golab3;
- }
- base.cursor = v_5;
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_4;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- I_p2 = base.limit;
- golab0: while(true)
- {
- lab1: {
- if (!(base.in_grouping(g_v, 97, 232)))
- {
- break lab1;
- }
- break golab0;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_v, 97, 232)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- lab4: {
- if (I_p1 >= 3)
- {
- break lab4;
- }
- I_p1 = 3;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 232)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 232)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("y"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_undouble() {
- var /** number */ v_1 = base.limit - base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_e_ending() {
- B_e_found = false;
- base.ket = base.cursor;
- if (!(base.eq_s_b("e")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- if (!(base.out_grouping_b(g_v, 97, 232)))
- {
- return false;
- }
- base.cursor = base.limit - v_1;
- if (!base.slice_del())
- {
- return false;
- }
- B_e_found = true;
- if (!r_undouble())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_en_ending() {
- if (!r_R1())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- if (!(base.out_grouping_b(g_v, 97, 232)))
- {
- return false;
- }
- base.cursor = base.limit - v_1;
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("gem")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_undouble())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_from("heid"))
- {
- return false;
- }
- break;
- case 2:
- if (!r_en_ending())
- {
- break lab0;
- }
- break;
- case 3:
- if (!r_R1())
- {
- break lab0;
- }
- if (!(base.out_grouping_b(g_v_j, 97, 232)))
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_2 = base.limit - base.cursor;
- r_e_ending();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("heid")))
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- break lab1;
- }
- {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("c")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("en")))
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_en_ending())
- {
- break lab1;
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_5 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- break lab3;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- lab4: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab5: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("ig")))
- {
- break lab5;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- break lab5;
- }
- {
- var /** number */ v_7 = base.limit - base.cursor;
- lab6: {
- if (!(base.eq_s_b("e")))
- {
- break lab6;
- }
- break lab5;
- }
- base.cursor = base.limit - v_7;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_6;
- if (!r_undouble())
- {
- break lab3;
- }
- }
- break;
- case 2:
- if (!r_R2())
- {
- break lab3;
- }
- {
- var /** number */ v_8 = base.limit - base.cursor;
- lab7: {
- if (!(base.eq_s_b("e")))
- {
- break lab7;
- }
- break lab3;
- }
- base.cursor = base.limit - v_8;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!r_R2())
- {
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_e_ending())
- {
- break lab3;
- }
- break;
- case 4:
- if (!r_R2())
- {
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 5:
- if (!r_R2())
- {
- break lab3;
- }
- if (!B_e_found)
- {
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_9 = base.limit - base.cursor;
- lab8: {
- if (!(base.out_grouping_b(g_v_I, 73, 232)))
- {
- break lab8;
- }
- var /** number */ v_10 = base.limit - base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- break lab8;
- }
- if (!(base.out_grouping_b(g_v, 97, 232)))
- {
- break lab8;
- }
- base.cursor = base.limit - v_10;
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- break lab8;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_9;
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_prelude();
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- r_mark_regions();
- base.cursor = v_2;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- r_standard_suffix();
- base.cursor = base.limit_backward;
- var /** number */ v_4 = base.cursor;
- r_postlude();
- base.cursor = v_4;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['DutchStemmer'] = DutchStemmer;
diff --git a/js/english-stemmer.js b/js/english-stemmer.js
deleted file mode 100644
index 0f9cd7d..0000000
--- a/js/english-stemmer.js
+++ /dev/null
@@ -1,1086 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var EnglishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["arsen", -1, -1],
- ["commun", -1, -1],
- ["gener", -1, -1]
- ];
-
- /** @const */ var a_1 = [
- ["'", -1, 1],
- ["'s'", 0, 1],
- ["'s", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["ied", -1, 2],
- ["s", -1, 3],
- ["ies", 1, 2],
- ["sses", 1, 1],
- ["ss", 1, -1],
- ["us", 1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["", -1, 3],
- ["bb", 0, 2],
- ["dd", 0, 2],
- ["ff", 0, 2],
- ["gg", 0, 2],
- ["bl", 0, 1],
- ["mm", 0, 2],
- ["nn", 0, 2],
- ["pp", 0, 2],
- ["rr", 0, 2],
- ["at", 0, 1],
- ["tt", 0, 2],
- ["iz", 0, 1]
- ];
-
- /** @const */ var a_4 = [
- ["ed", -1, 2],
- ["eed", 0, 1],
- ["ing", -1, 2],
- ["edly", -1, 2],
- ["eedly", 3, 1],
- ["ingly", -1, 2]
- ];
-
- /** @const */ var a_5 = [
- ["anci", -1, 3],
- ["enci", -1, 2],
- ["ogi", -1, 13],
- ["li", -1, 15],
- ["bli", 3, 12],
- ["abli", 4, 4],
- ["alli", 3, 8],
- ["fulli", 3, 9],
- ["lessli", 3, 14],
- ["ousli", 3, 10],
- ["entli", 3, 5],
- ["aliti", -1, 8],
- ["biliti", -1, 12],
- ["iviti", -1, 11],
- ["tional", -1, 1],
- ["ational", 14, 7],
- ["alism", -1, 8],
- ["ation", -1, 7],
- ["ization", 17, 6],
- ["izer", -1, 6],
- ["ator", -1, 7],
- ["iveness", -1, 11],
- ["fulness", -1, 9],
- ["ousness", -1, 10]
- ];
-
- /** @const */ var a_6 = [
- ["icate", -1, 4],
- ["ative", -1, 6],
- ["alize", -1, 3],
- ["iciti", -1, 4],
- ["ical", -1, 4],
- ["tional", -1, 1],
- ["ational", 5, 2],
- ["ful", -1, 5],
- ["ness", -1, 5]
- ];
-
- /** @const */ var a_7 = [
- ["ic", -1, 1],
- ["ance", -1, 1],
- ["ence", -1, 1],
- ["able", -1, 1],
- ["ible", -1, 1],
- ["ate", -1, 1],
- ["ive", -1, 1],
- ["ize", -1, 1],
- ["iti", -1, 1],
- ["al", -1, 1],
- ["ism", -1, 1],
- ["ion", -1, 2],
- ["er", -1, 1],
- ["ous", -1, 1],
- ["ant", -1, 1],
- ["ent", -1, 1],
- ["ment", 15, 1],
- ["ement", 16, 1]
- ];
-
- /** @const */ var a_8 = [
- ["e", -1, 1],
- ["l", -1, 2]
- ];
-
- /** @const */ var a_9 = [
- ["succeed", -1, -1],
- ["proceed", -1, -1],
- ["exceed", -1, -1],
- ["canning", -1, -1],
- ["inning", -1, -1],
- ["earring", -1, -1],
- ["herring", -1, -1],
- ["outing", -1, -1]
- ];
-
- /** @const */ var a_10 = [
- ["andes", -1, -1],
- ["atlas", -1, -1],
- ["bias", -1, -1],
- ["cosmos", -1, -1],
- ["dying", -1, 3],
- ["early", -1, 9],
- ["gently", -1, 7],
- ["howe", -1, -1],
- ["idly", -1, 6],
- ["lying", -1, 4],
- ["news", -1, -1],
- ["only", -1, 10],
- ["singly", -1, 11],
- ["skies", -1, 2],
- ["skis", -1, 1],
- ["sky", -1, -1],
- ["tying", -1, 5],
- ["ugly", -1, 8]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1];
-
- /** @const */ var /** Array */ g_v_WXY = [1, 17, 65, 208, 1];
-
- /** @const */ var /** Array */ g_valid_LI = [55, 141, 2];
-
- var /** boolean */ B_Y_found = false;
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- B_Y_found = false;
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- if (!(base.eq_s("'")))
- {
- break lab0;
- }
- base.ket = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- lab1: {
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- break lab1;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Y"))
- {
- return false;
- }
- B_Y_found = true;
- }
- base.cursor = v_2;
- var /** number */ v_3 = base.cursor;
- lab2: {
- while(true)
- {
- var /** number */ v_4 = base.cursor;
- lab3: {
- golab4: while(true)
- {
- var /** number */ v_5 = base.cursor;
- lab5: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab5;
- }
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- break lab5;
- }
- base.ket = base.cursor;
- base.cursor = v_5;
- break golab4;
- }
- base.cursor = v_5;
- if (base.cursor >= base.limit)
- {
- break lab3;
- }
- base.cursor++;
- }
- if (!base.slice_from("Y"))
- {
- return false;
- }
- B_Y_found = true;
- continue;
- }
- base.cursor = v_4;
- break;
- }
- }
- base.cursor = v_3;
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (base.find_among(a_0) == 0)
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = v_2;
- golab3: while(true)
- {
- lab4: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.out_grouping(g_v, 97, 121)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_p1 = base.cursor;
- golab7: while(true)
- {
- lab8: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab9: while(true)
- {
- lab10: {
- if (!(base.out_grouping(g_v, 97, 121)))
- {
- break lab10;
- }
- break golab9;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_shortv() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.out_grouping_b(g_v_WXY, 89, 121)))
- {
- break lab1;
- }
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- break lab1;
- }
- if (!(base.out_grouping_b(g_v, 97, 121)))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.out_grouping_b(g_v, 97, 121)))
- {
- return false;
- }
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_Step_1a() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("ss"))
- {
- return false;
- }
- break;
- case 2:
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- {
- var /** number */ c1 = base.cursor - 2;
- if (c1 < base.limit_backward)
- {
- break lab2;
- }
- base.cursor = c1;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- if (!base.slice_from("ie"))
- {
- return false;
- }
- }
- break;
- case 3:
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- golab3: while(true)
- {
- lab4: {
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_1b() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("ee"))
- {
- return false;
- }
- break;
- case 2:
- var /** number */ v_1 = base.limit - base.cursor;
- golab0: while(true)
- {
- lab1: {
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- break lab1;
- }
- break golab0;
- }
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- base.cursor = base.limit - v_1;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- among_var = base.find_among_b(a_3);
- base.cursor = base.limit - v_3;
- switch (among_var) {
- case 1:
- {
- var /** number */ c1 = base.cursor;
- base.insert(base.cursor, base.cursor, "e");
- base.cursor = c1;
- }
- break;
- case 2:
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor != I_p1)
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- if (!r_shortv())
- {
- return false;
- }
- base.cursor = base.limit - v_4;
- {
- var /** number */ c2 = base.cursor;
- base.insert(base.cursor, base.cursor, "e");
- base.cursor = c2;
- }
- break;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_1c() {
- base.ket = base.cursor;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("y")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("Y")))
- {
- return false;
- }
- }
- base.bra = base.cursor;
- if (!(base.out_grouping_b(g_v, 97, 121)))
- {
- return false;
- }
- lab2: {
- if (base.cursor > base.limit_backward)
- {
- break lab2;
- }
- return false;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_2() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("tion"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("ence"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("ance"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("able"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("ent"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("ize"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("ate"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("al"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("ful"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("ous"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("ive"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("ble"))
- {
- return false;
- }
- break;
- case 13:
- if (!(base.eq_s_b("l")))
- {
- return false;
- }
- if (!base.slice_from("og"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("less"))
- {
- return false;
- }
- break;
- case 15:
- if (!(base.in_grouping_b(g_valid_LI, 99, 116)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_3() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("tion"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("ate"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("al"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("ic"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 6:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_4() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_7);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("s")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("t")))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_5() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_8);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!r_R2())
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!r_R1())
- {
- return false;
- }
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!r_shortv())
- {
- break lab2;
- }
- return false;
- }
- base.cursor = base.limit - v_2;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!(base.eq_s_b("l")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_exception2() {
- base.ket = base.cursor;
- if (base.find_among_b(a_9) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_exception1() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_10);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- if (base.cursor < base.limit)
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("ski"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("sky"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("die"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("lie"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("tie"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("idl"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("gentl"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("ugli"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("earli"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("onli"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("singl"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- if (!B_Y_found)
- {
- return false;
- }
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab2: {
- base.bra = base.cursor;
- if (!(base.eq_s("Y")))
- {
- break lab2;
- }
- base.ket = base.cursor;
- base.cursor = v_2;
- break golab1;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- if (!base.slice_from("y"))
- {
- return false;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- lab0: {
- var /** number */ v_1 = base.cursor;
- lab1: {
- if (!r_exception1())
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = v_1;
- lab2: {
- {
- var /** number */ v_2 = base.cursor;
- lab3: {
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- break lab3;
- }
- base.cursor = c1;
- }
- break lab2;
- }
- base.cursor = v_2;
- }
- break lab0;
- }
- base.cursor = v_1;
- r_prelude();
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_5 = base.limit - base.cursor;
- r_Step_1a();
- base.cursor = base.limit - v_5;
- lab4: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab5: {
- if (!r_exception2())
- {
- break lab5;
- }
- break lab4;
- }
- base.cursor = base.limit - v_6;
- var /** number */ v_7 = base.limit - base.cursor;
- r_Step_1b();
- base.cursor = base.limit - v_7;
- var /** number */ v_8 = base.limit - base.cursor;
- r_Step_1c();
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- r_Step_2();
- base.cursor = base.limit - v_9;
- var /** number */ v_10 = base.limit - base.cursor;
- r_Step_3();
- base.cursor = base.limit - v_10;
- var /** number */ v_11 = base.limit - base.cursor;
- r_Step_4();
- base.cursor = base.limit - v_11;
- var /** number */ v_12 = base.limit - base.cursor;
- r_Step_5();
- base.cursor = base.limit - v_12;
- }
- base.cursor = base.limit_backward;
- var /** number */ v_13 = base.cursor;
- r_postlude();
- base.cursor = v_13;
- }
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['EnglishStemmer'] = EnglishStemmer;
diff --git a/js/estonian-stemmer.js b/js/estonian-stemmer.js
deleted file mode 100644
index 4b9ef4b..0000000
--- a/js/estonian-stemmer.js
+++ /dev/null
@@ -1,1112 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var EstonianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["gi", -1, 1],
- ["ki", -1, 2]
- ];
-
- /** @const */ var a_1 = [
- ["da", -1, 3],
- ["mata", -1, 1],
- ["b", -1, 3],
- ["ksid", -1, 1],
- ["nuksid", 3, 1],
- ["me", -1, 3],
- ["sime", 5, 1],
- ["ksime", 6, 1],
- ["nuksime", 7, 1],
- ["akse", -1, 2],
- ["dakse", 9, 1],
- ["takse", 9, 1],
- ["site", -1, 1],
- ["ksite", 12, 1],
- ["nuksite", 13, 1],
- ["n", -1, 3],
- ["sin", 15, 1],
- ["ksin", 16, 1],
- ["nuksin", 17, 1],
- ["daks", -1, 1],
- ["taks", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["aa", -1, -1],
- ["ee", -1, -1],
- ["ii", -1, -1],
- ["oo", -1, -1],
- ["uu", -1, -1],
- ["\u00E4\u00E4", -1, -1],
- ["\u00F5\u00F5", -1, -1],
- ["\u00F6\u00F6", -1, -1],
- ["\u00FC\u00FC", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["i", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["lane", -1, 1],
- ["line", -1, 3],
- ["mine", -1, 2],
- ["lasse", -1, 1],
- ["lisse", -1, 3],
- ["misse", -1, 2],
- ["lasi", -1, 1],
- ["lisi", -1, 3],
- ["misi", -1, 2],
- ["last", -1, 1],
- ["list", -1, 3],
- ["mist", -1, 2]
- ];
-
- /** @const */ var a_5 = [
- ["ga", -1, 1],
- ["ta", -1, 1],
- ["le", -1, 1],
- ["sse", -1, 1],
- ["l", -1, 1],
- ["s", -1, 1],
- ["ks", 5, 1],
- ["t", -1, 2],
- ["lt", 7, 1],
- ["st", 7, 1]
- ];
-
- /** @const */ var a_6 = [
- ["", -1, 2],
- ["las", 0, 1],
- ["lis", 0, 1],
- ["mis", 0, 1],
- ["t", 0, -1]
- ];
-
- /** @const */ var a_7 = [
- ["d", -1, 4],
- ["sid", 0, 2],
- ["de", -1, 4],
- ["ikkude", 2, 1],
- ["ike", -1, 1],
- ["ikke", -1, 1],
- ["te", -1, 3]
- ];
-
- /** @const */ var a_8 = [
- ["va", -1, -1],
- ["du", -1, -1],
- ["nu", -1, -1],
- ["tu", -1, -1]
- ];
-
- /** @const */ var a_9 = [
- ["kk", -1, 1],
- ["pp", -1, 2],
- ["tt", -1, 3]
- ];
-
- /** @const */ var a_10 = [
- ["ma", -1, 2],
- ["mai", -1, 1],
- ["m", -1, 1]
- ];
-
- /** @const */ var a_11 = [
- ["joob", -1, 1],
- ["jood", -1, 1],
- ["joodakse", 1, 1],
- ["jooma", -1, 1],
- ["joomata", 3, 1],
- ["joome", -1, 1],
- ["joon", -1, 1],
- ["joote", -1, 1],
- ["joovad", -1, 1],
- ["juua", -1, 1],
- ["juuakse", 9, 1],
- ["j\u00E4i", -1, 12],
- ["j\u00E4id", 11, 12],
- ["j\u00E4ime", 11, 12],
- ["j\u00E4in", 11, 12],
- ["j\u00E4ite", 11, 12],
- ["j\u00E4\u00E4b", -1, 12],
- ["j\u00E4\u00E4d", -1, 12],
- ["j\u00E4\u00E4da", 17, 12],
- ["j\u00E4\u00E4dakse", 18, 12],
- ["j\u00E4\u00E4di", 17, 12],
- ["j\u00E4\u00E4ks", -1, 12],
- ["j\u00E4\u00E4ksid", 21, 12],
- ["j\u00E4\u00E4ksime", 21, 12],
- ["j\u00E4\u00E4ksin", 21, 12],
- ["j\u00E4\u00E4ksite", 21, 12],
- ["j\u00E4\u00E4ma", -1, 12],
- ["j\u00E4\u00E4mata", 26, 12],
- ["j\u00E4\u00E4me", -1, 12],
- ["j\u00E4\u00E4n", -1, 12],
- ["j\u00E4\u00E4te", -1, 12],
- ["j\u00E4\u00E4vad", -1, 12],
- ["j\u00F5i", -1, 1],
- ["j\u00F5id", 32, 1],
- ["j\u00F5ime", 32, 1],
- ["j\u00F5in", 32, 1],
- ["j\u00F5ite", 32, 1],
- ["keeb", -1, 4],
- ["keed", -1, 4],
- ["keedakse", 38, 4],
- ["keeks", -1, 4],
- ["keeksid", 40, 4],
- ["keeksime", 40, 4],
- ["keeksin", 40, 4],
- ["keeksite", 40, 4],
- ["keema", -1, 4],
- ["keemata", 45, 4],
- ["keeme", -1, 4],
- ["keen", -1, 4],
- ["kees", -1, 4],
- ["keeta", -1, 4],
- ["keete", -1, 4],
- ["keevad", -1, 4],
- ["k\u00E4ia", -1, 8],
- ["k\u00E4iakse", 53, 8],
- ["k\u00E4ib", -1, 8],
- ["k\u00E4id", -1, 8],
- ["k\u00E4idi", 56, 8],
- ["k\u00E4iks", -1, 8],
- ["k\u00E4iksid", 58, 8],
- ["k\u00E4iksime", 58, 8],
- ["k\u00E4iksin", 58, 8],
- ["k\u00E4iksite", 58, 8],
- ["k\u00E4ima", -1, 8],
- ["k\u00E4imata", 63, 8],
- ["k\u00E4ime", -1, 8],
- ["k\u00E4in", -1, 8],
- ["k\u00E4is", -1, 8],
- ["k\u00E4ite", -1, 8],
- ["k\u00E4ivad", -1, 8],
- ["laob", -1, 16],
- ["laod", -1, 16],
- ["laoks", -1, 16],
- ["laoksid", 72, 16],
- ["laoksime", 72, 16],
- ["laoksin", 72, 16],
- ["laoksite", 72, 16],
- ["laome", -1, 16],
- ["laon", -1, 16],
- ["laote", -1, 16],
- ["laovad", -1, 16],
- ["loeb", -1, 14],
- ["loed", -1, 14],
- ["loeks", -1, 14],
- ["loeksid", 83, 14],
- ["loeksime", 83, 14],
- ["loeksin", 83, 14],
- ["loeksite", 83, 14],
- ["loeme", -1, 14],
- ["loen", -1, 14],
- ["loete", -1, 14],
- ["loevad", -1, 14],
- ["loob", -1, 7],
- ["lood", -1, 7],
- ["loodi", 93, 7],
- ["looks", -1, 7],
- ["looksid", 95, 7],
- ["looksime", 95, 7],
- ["looksin", 95, 7],
- ["looksite", 95, 7],
- ["looma", -1, 7],
- ["loomata", 100, 7],
- ["loome", -1, 7],
- ["loon", -1, 7],
- ["loote", -1, 7],
- ["loovad", -1, 7],
- ["luua", -1, 7],
- ["luuakse", 106, 7],
- ["l\u00F5i", -1, 6],
- ["l\u00F5id", 108, 6],
- ["l\u00F5ime", 108, 6],
- ["l\u00F5in", 108, 6],
- ["l\u00F5ite", 108, 6],
- ["l\u00F6\u00F6b", -1, 5],
- ["l\u00F6\u00F6d", -1, 5],
- ["l\u00F6\u00F6dakse", 114, 5],
- ["l\u00F6\u00F6di", 114, 5],
- ["l\u00F6\u00F6ks", -1, 5],
- ["l\u00F6\u00F6ksid", 117, 5],
- ["l\u00F6\u00F6ksime", 117, 5],
- ["l\u00F6\u00F6ksin", 117, 5],
- ["l\u00F6\u00F6ksite", 117, 5],
- ["l\u00F6\u00F6ma", -1, 5],
- ["l\u00F6\u00F6mata", 122, 5],
- ["l\u00F6\u00F6me", -1, 5],
- ["l\u00F6\u00F6n", -1, 5],
- ["l\u00F6\u00F6te", -1, 5],
- ["l\u00F6\u00F6vad", -1, 5],
- ["l\u00FC\u00FCa", -1, 5],
- ["l\u00FC\u00FCakse", 128, 5],
- ["m\u00FC\u00FCa", -1, 13],
- ["m\u00FC\u00FCakse", 130, 13],
- ["m\u00FC\u00FCb", -1, 13],
- ["m\u00FC\u00FCd", -1, 13],
- ["m\u00FC\u00FCdi", 133, 13],
- ["m\u00FC\u00FCks", -1, 13],
- ["m\u00FC\u00FCksid", 135, 13],
- ["m\u00FC\u00FCksime", 135, 13],
- ["m\u00FC\u00FCksin", 135, 13],
- ["m\u00FC\u00FCksite", 135, 13],
- ["m\u00FC\u00FCma", -1, 13],
- ["m\u00FC\u00FCmata", 140, 13],
- ["m\u00FC\u00FCme", -1, 13],
- ["m\u00FC\u00FCn", -1, 13],
- ["m\u00FC\u00FCs", -1, 13],
- ["m\u00FC\u00FCte", -1, 13],
- ["m\u00FC\u00FCvad", -1, 13],
- ["n\u00E4eb", -1, 18],
- ["n\u00E4ed", -1, 18],
- ["n\u00E4eks", -1, 18],
- ["n\u00E4eksid", 149, 18],
- ["n\u00E4eksime", 149, 18],
- ["n\u00E4eksin", 149, 18],
- ["n\u00E4eksite", 149, 18],
- ["n\u00E4eme", -1, 18],
- ["n\u00E4en", -1, 18],
- ["n\u00E4ete", -1, 18],
- ["n\u00E4evad", -1, 18],
- ["n\u00E4gema", -1, 18],
- ["n\u00E4gemata", 158, 18],
- ["n\u00E4ha", -1, 18],
- ["n\u00E4hakse", 160, 18],
- ["n\u00E4hti", -1, 18],
- ["p\u00F5eb", -1, 15],
- ["p\u00F5ed", -1, 15],
- ["p\u00F5eks", -1, 15],
- ["p\u00F5eksid", 165, 15],
- ["p\u00F5eksime", 165, 15],
- ["p\u00F5eksin", 165, 15],
- ["p\u00F5eksite", 165, 15],
- ["p\u00F5eme", -1, 15],
- ["p\u00F5en", -1, 15],
- ["p\u00F5ete", -1, 15],
- ["p\u00F5evad", -1, 15],
- ["saab", -1, 2],
- ["saad", -1, 2],
- ["saada", 175, 2],
- ["saadakse", 176, 2],
- ["saadi", 175, 2],
- ["saaks", -1, 2],
- ["saaksid", 179, 2],
- ["saaksime", 179, 2],
- ["saaksin", 179, 2],
- ["saaksite", 179, 2],
- ["saama", -1, 2],
- ["saamata", 184, 2],
- ["saame", -1, 2],
- ["saan", -1, 2],
- ["saate", -1, 2],
- ["saavad", -1, 2],
- ["sai", -1, 2],
- ["said", 190, 2],
- ["saime", 190, 2],
- ["sain", 190, 2],
- ["saite", 190, 2],
- ["s\u00F5i", -1, 9],
- ["s\u00F5id", 195, 9],
- ["s\u00F5ime", 195, 9],
- ["s\u00F5in", 195, 9],
- ["s\u00F5ite", 195, 9],
- ["s\u00F6\u00F6b", -1, 9],
- ["s\u00F6\u00F6d", -1, 9],
- ["s\u00F6\u00F6dakse", 201, 9],
- ["s\u00F6\u00F6di", 201, 9],
- ["s\u00F6\u00F6ks", -1, 9],
- ["s\u00F6\u00F6ksid", 204, 9],
- ["s\u00F6\u00F6ksime", 204, 9],
- ["s\u00F6\u00F6ksin", 204, 9],
- ["s\u00F6\u00F6ksite", 204, 9],
- ["s\u00F6\u00F6ma", -1, 9],
- ["s\u00F6\u00F6mata", 209, 9],
- ["s\u00F6\u00F6me", -1, 9],
- ["s\u00F6\u00F6n", -1, 9],
- ["s\u00F6\u00F6te", -1, 9],
- ["s\u00F6\u00F6vad", -1, 9],
- ["s\u00FC\u00FCa", -1, 9],
- ["s\u00FC\u00FCakse", 215, 9],
- ["teeb", -1, 17],
- ["teed", -1, 17],
- ["teeks", -1, 17],
- ["teeksid", 219, 17],
- ["teeksime", 219, 17],
- ["teeksin", 219, 17],
- ["teeksite", 219, 17],
- ["teeme", -1, 17],
- ["teen", -1, 17],
- ["teete", -1, 17],
- ["teevad", -1, 17],
- ["tegema", -1, 17],
- ["tegemata", 228, 17],
- ["teha", -1, 17],
- ["tehakse", 230, 17],
- ["tehti", -1, 17],
- ["toob", -1, 10],
- ["tood", -1, 10],
- ["toodi", 234, 10],
- ["tooks", -1, 10],
- ["tooksid", 236, 10],
- ["tooksime", 236, 10],
- ["tooksin", 236, 10],
- ["tooksite", 236, 10],
- ["tooma", -1, 10],
- ["toomata", 241, 10],
- ["toome", -1, 10],
- ["toon", -1, 10],
- ["toote", -1, 10],
- ["toovad", -1, 10],
- ["tuua", -1, 10],
- ["tuuakse", 247, 10],
- ["t\u00F5i", -1, 10],
- ["t\u00F5id", 249, 10],
- ["t\u00F5ime", 249, 10],
- ["t\u00F5in", 249, 10],
- ["t\u00F5ite", 249, 10],
- ["viia", -1, 3],
- ["viiakse", 254, 3],
- ["viib", -1, 3],
- ["viid", -1, 3],
- ["viidi", 257, 3],
- ["viiks", -1, 3],
- ["viiksid", 259, 3],
- ["viiksime", 259, 3],
- ["viiksin", 259, 3],
- ["viiksite", 259, 3],
- ["viima", -1, 3],
- ["viimata", 264, 3],
- ["viime", -1, 3],
- ["viin", -1, 3],
- ["viisime", -1, 3],
- ["viisin", -1, 3],
- ["viisite", -1, 3],
- ["viite", -1, 3],
- ["viivad", -1, 3],
- ["v\u00F5ib", -1, 11],
- ["v\u00F5id", -1, 11],
- ["v\u00F5ida", 274, 11],
- ["v\u00F5idakse", 275, 11],
- ["v\u00F5idi", 274, 11],
- ["v\u00F5iks", -1, 11],
- ["v\u00F5iksid", 278, 11],
- ["v\u00F5iksime", 278, 11],
- ["v\u00F5iksin", 278, 11],
- ["v\u00F5iksite", 278, 11],
- ["v\u00F5ima", -1, 11],
- ["v\u00F5imata", 283, 11],
- ["v\u00F5ime", -1, 11],
- ["v\u00F5in", -1, 11],
- ["v\u00F5is", -1, 11],
- ["v\u00F5ite", -1, 11],
- ["v\u00F5ivad", -1, 11]
- ];
-
- /** @const */ var /** Array */ g_V1 = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 48, 8];
-
- /** @const */ var /** Array */ g_RV = [17, 65, 16];
-
- /** @const */ var /** Array */ g_KI = [117, 66, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 16];
-
- /** @const */ var /** Array */ g_GI = [21, 123, 243, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 48, 8];
-
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- golab0: while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_V1, 97, 252)))
- {
- break lab1;
- }
- base.cursor = v_1;
- break golab0;
- }
- base.cursor = v_1;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_V1, 97, 252)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- return true;
- };
-
- /** @return {boolean} */
- function r_emphasis() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- {
- var /** number */ c1 = base.cursor - 4;
- if (c1 < base.limit_backward)
- {
- return false;
- }
- base.cursor = c1;
- }
- base.cursor = base.limit - v_3;
- switch (among_var) {
- case 1:
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_GI, 97, 252)))
- {
- return false;
- }
- base.cursor = base.limit - v_4;
- {
- var /** number */ v_5 = base.limit - base.cursor;
- lab0: {
- if (!r_LONGV())
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_5;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.in_grouping_b(g_KI, 98, 382)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 3:
- if (!(base.in_grouping_b(g_V1, 97, 252)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_LONGV() {
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_i_plural() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!(base.in_grouping_b(g_RV, 97, 117)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_special_noun_endings() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_from("lase"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("mise"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("lise"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_case_ending() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- if (!(base.in_grouping_b(g_RV, 97, 117)))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_3;
- if (!r_LONGV())
- {
- return false;
- }
- }
- break;
- case 2:
- var /** number */ v_4 = base.limit - base.cursor;
- {
- var /** number */ c1 = base.cursor - 4;
- if (c1 < base.limit_backward)
- {
- return false;
- }
- base.cursor = c1;
- }
- base.cursor = base.limit - v_4;
- break;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_plural_three_first_cases() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_7);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_from("iku"))
- {
- return false;
- }
- break;
- case 2:
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- if (!r_LONGV())
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_5 = base.limit - base.cursor;
- {
- var /** number */ c1 = base.cursor - 4;
- if (c1 < base.limit_backward)
- {
- break lab2;
- }
- base.cursor = c1;
- }
- base.cursor = base.limit - v_5;
- among_var = base.find_among_b(a_6);
- switch (among_var) {
- case 1:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!base.slice_from("t"))
- {
- return false;
- }
- }
- break;
- case 4:
- lab3: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab4: {
- if (!(base.in_grouping_b(g_RV, 97, 117)))
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- if (!r_LONGV())
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_nu() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_8) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_undouble_kpt() {
- var /** number */ among_var;
- if (!(base.in_grouping_b(g_V1, 97, 252)))
- {
- return false;
- }
- if (I_p1 > base.cursor)
- {
- return false;
- }
- base.ket = base.cursor;
- among_var = base.find_among_b(a_9);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("k"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("p"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_degrees() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_10);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!(base.in_grouping_b(g_RV, 97, 117)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_substantive() {
- var /** number */ v_1 = base.limit - base.cursor;
- r_special_noun_endings();
- base.cursor = base.limit - v_1;
- var /** number */ v_2 = base.limit - base.cursor;
- r_case_ending();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_plural_three_first_cases();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_degrees();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_i_plural();
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- r_nu();
- base.cursor = base.limit - v_6;
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_exceptions() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_11);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- if (base.cursor < base.limit)
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("joo"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("saa"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("viima"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("keesi"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("l\u00F6\u00F6"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("l\u00F5i"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("loo"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("k\u00E4isi"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("s\u00F6\u00F6"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("too"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("v\u00F5isi"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("j\u00E4\u00E4ma"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("m\u00FC\u00FCsi"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("luge"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("p\u00F5de"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("ladu"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("tegi"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("n\u00E4gi"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- if (!r_verb_exceptions())
- {
- break lab0;
- }
- return false;
- }
- base.cursor = v_1;
- }
- var /** number */ v_2 = base.cursor;
- r_mark_regions();
- base.cursor = v_2;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_3 = base.limit - base.cursor;
- r_emphasis();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- lab1: {
- lab2: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab3: {
- if (!r_verb())
- {
- break lab3;
- }
- break lab2;
- }
- base.cursor = base.limit - v_5;
- r_substantive();
- }
- }
- base.cursor = base.limit - v_4;
- var /** number */ v_6 = base.limit - base.cursor;
- r_undouble_kpt();
- base.cursor = base.limit - v_6;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['EstonianStemmer'] = EstonianStemmer;
diff --git a/js/finnish-stemmer.js b/js/finnish-stemmer.js
deleted file mode 100644
index d78ceff..0000000
--- a/js/finnish-stemmer.js
+++ /dev/null
@@ -1,788 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var FinnishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["pa", -1, 1],
- ["sti", -1, 2],
- ["kaan", -1, 1],
- ["han", -1, 1],
- ["kin", -1, 1],
- ["h\u00E4n", -1, 1],
- ["k\u00E4\u00E4n", -1, 1],
- ["ko", -1, 1],
- ["p\u00E4", -1, 1],
- ["k\u00F6", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["lla", -1, -1],
- ["na", -1, -1],
- ["ssa", -1, -1],
- ["ta", -1, -1],
- ["lta", 3, -1],
- ["sta", 3, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ll\u00E4", -1, -1],
- ["n\u00E4", -1, -1],
- ["ss\u00E4", -1, -1],
- ["t\u00E4", -1, -1],
- ["lt\u00E4", 3, -1],
- ["st\u00E4", 3, -1]
- ];
-
- /** @const */ var a_3 = [
- ["lle", -1, -1],
- ["ine", -1, -1]
- ];
-
- /** @const */ var a_4 = [
- ["nsa", -1, 3],
- ["mme", -1, 3],
- ["nne", -1, 3],
- ["ni", -1, 2],
- ["si", -1, 1],
- ["an", -1, 4],
- ["en", -1, 6],
- ["\u00E4n", -1, 5],
- ["ns\u00E4", -1, 3]
- ];
-
- /** @const */ var a_5 = [
- ["aa", -1, -1],
- ["ee", -1, -1],
- ["ii", -1, -1],
- ["oo", -1, -1],
- ["uu", -1, -1],
- ["\u00E4\u00E4", -1, -1],
- ["\u00F6\u00F6", -1, -1]
- ];
-
- /** @const */ var a_6 = [
- ["a", -1, 8],
- ["lla", 0, -1],
- ["na", 0, -1],
- ["ssa", 0, -1],
- ["ta", 0, -1],
- ["lta", 4, -1],
- ["sta", 4, -1],
- ["tta", 4, 2],
- ["lle", -1, -1],
- ["ine", -1, -1],
- ["ksi", -1, -1],
- ["n", -1, 7],
- ["han", 11, 1],
- ["den", 11, -1, r_VI],
- ["seen", 11, -1, r_LONG],
- ["hen", 11, 2],
- ["tten", 11, -1, r_VI],
- ["hin", 11, 3],
- ["siin", 11, -1, r_VI],
- ["hon", 11, 4],
- ["h\u00E4n", 11, 5],
- ["h\u00F6n", 11, 6],
- ["\u00E4", -1, 8],
- ["ll\u00E4", 22, -1],
- ["n\u00E4", 22, -1],
- ["ss\u00E4", 22, -1],
- ["t\u00E4", 22, -1],
- ["lt\u00E4", 26, -1],
- ["st\u00E4", 26, -1],
- ["tt\u00E4", 26, 2]
- ];
-
- /** @const */ var a_7 = [
- ["eja", -1, -1],
- ["mma", -1, 1],
- ["imma", 1, -1],
- ["mpa", -1, 1],
- ["impa", 3, -1],
- ["mmi", -1, 1],
- ["immi", 5, -1],
- ["mpi", -1, 1],
- ["impi", 7, -1],
- ["ej\u00E4", -1, -1],
- ["mm\u00E4", -1, 1],
- ["imm\u00E4", 10, -1],
- ["mp\u00E4", -1, 1],
- ["imp\u00E4", 12, -1]
- ];
-
- /** @const */ var a_8 = [
- ["i", -1, -1],
- ["j", -1, -1]
- ];
-
- /** @const */ var a_9 = [
- ["mma", -1, 1],
- ["imma", 0, -1]
- ];
-
- /** @const */ var /** Array */ g_AEI = [17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8];
-
- /** @const */ var /** Array */ g_C = [119, 223, 119, 1];
-
- /** @const */ var /** Array */ g_V1 = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32];
-
- /** @const */ var /** Array */ g_V2 = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32];
-
- /** @const */ var /** Array */ g_particle_end = [17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32];
-
- var /** boolean */ B_ending_removed = false;
- var /** string */ S_x = '';
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- I_p2 = base.limit;
- golab0: while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_V1, 97, 246)))
- {
- break lab1;
- }
- base.cursor = v_1;
- break golab0;
- }
- base.cursor = v_1;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_V1, 97, 246)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab4: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab5: {
- if (!(base.in_grouping(g_V1, 97, 246)))
- {
- break lab5;
- }
- base.cursor = v_3;
- break golab4;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab6: while(true)
- {
- lab7: {
- if (!(base.out_grouping(g_V1, 97, 246)))
- {
- break lab7;
- }
- break golab6;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- return true;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_particle_etc() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!(base.in_grouping_b(g_particle_end, 97, 246)))
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- break;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_possessive() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("k")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("kse")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_from("ksi"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 4:
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 5:
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 6:
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_LONG() {
- if (base.find_among_b(a_5) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_VI() {
- if (!(base.eq_s_b("i")))
- {
- return false;
- }
- if (!(base.in_grouping_b(g_V2, 97, 246)))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_case_ending() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!(base.eq_s_b("a")))
- {
- return false;
- }
- break;
- case 2:
- if (!(base.eq_s_b("e")))
- {
- return false;
- }
- break;
- case 3:
- if (!(base.eq_s_b("i")))
- {
- return false;
- }
- break;
- case 4:
- if (!(base.eq_s_b("o")))
- {
- return false;
- }
- break;
- case 5:
- if (!(base.eq_s_b("\u00E4")))
- {
- return false;
- }
- break;
- case 6:
- if (!(base.eq_s_b("\u00F6")))
- {
- return false;
- }
- break;
- case 7:
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab1: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab2: {
- if (!r_LONG())
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_5;
- if (!(base.eq_s_b("ie")))
- {
- base.cursor = base.limit - v_3;
- break lab0;
- }
- }
- base.cursor = base.limit - v_4;
- if (base.cursor <= base.limit_backward)
- {
- base.cursor = base.limit - v_3;
- break lab0;
- }
- base.cursor--;
- base.bra = base.cursor;
- }
- break;
- case 8:
- if (!(base.in_grouping_b(g_V1, 97, 246)))
- {
- return false;
- }
- if (!(base.in_grouping_b(g_C, 98, 122)))
- {
- return false;
- }
- break;
- }
- if (!base.slice_del())
- {
- return false;
- }
- B_ending_removed = true;
- return true;
- };
-
- /** @return {boolean} */
- function r_other_endings() {
- var /** number */ among_var;
- if (base.cursor < I_p2)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p2;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_7);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("po")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- break;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_i_plural() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_8) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_t_plural() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (!(base.eq_s_b("t")))
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- var /** number */ v_3 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_V1, 97, 246)))
- {
- base.limit_backward = v_2;
- return false;
- }
- base.cursor = base.limit - v_3;
- if (!base.slice_del())
- {
- return false;
- }
- base.limit_backward = v_2;
- if (base.cursor < I_p2)
- {
- return false;
- }
- var /** number */ v_5 = base.limit_backward;
- base.limit_backward = I_p2;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_9);
- if (among_var == 0)
- {
- base.limit_backward = v_5;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_5;
- switch (among_var) {
- case 1:
- {
- var /** number */ v_6 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("po")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_6;
- }
- break;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_tidy() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- var /** number */ v_4 = base.limit - base.cursor;
- if (!r_LONG())
- {
- break lab0;
- }
- base.cursor = base.limit - v_4;
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- break lab0;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_5 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (!(base.in_grouping_b(g_AEI, 97, 228)))
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_C, 98, 122)))
- {
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("j")))
- {
- break lab2;
- }
- base.bra = base.cursor;
- lab3: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab4: {
- if (!(base.eq_s_b("o")))
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_7;
- if (!(base.eq_s_b("u")))
- {
- break lab2;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_6;
- var /** number */ v_8 = base.limit - base.cursor;
- lab5: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("o")))
- {
- break lab5;
- }
- base.bra = base.cursor;
- if (!(base.eq_s_b("j")))
- {
- break lab5;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_8;
- base.limit_backward = v_2;
- golab6: while(true)
- {
- var /** number */ v_9 = base.limit - base.cursor;
- lab7: {
- if (!(base.out_grouping_b(g_V1, 97, 246)))
- {
- break lab7;
- }
- base.cursor = base.limit - v_9;
- break golab6;
- }
- base.cursor = base.limit - v_9;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping_b(g_C, 98, 122)))
- {
- return false;
- }
- base.bra = base.cursor;
- S_x = base.slice_to();
- if (S_x == '')
- {
- return false;
- }
- if (!(base.eq_s_b(S_x)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_mark_regions();
- base.cursor = v_1;
- B_ending_removed = false;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_particle_etc();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_possessive();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_case_ending();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_other_endings();
- base.cursor = base.limit - v_5;
- lab0: {
- lab1: {
- if (!B_ending_removed)
- {
- break lab1;
- }
- var /** number */ v_7 = base.limit - base.cursor;
- r_i_plural();
- base.cursor = base.limit - v_7;
- break lab0;
- }
- var /** number */ v_8 = base.limit - base.cursor;
- r_t_plural();
- base.cursor = base.limit - v_8;
- }
- var /** number */ v_9 = base.limit - base.cursor;
- r_tidy();
- base.cursor = base.limit - v_9;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['FinnishStemmer'] = FinnishStemmer;
diff --git a/js/french-stemmer.js b/js/french-stemmer.js
deleted file mode 100644
index 36475be..0000000
--- a/js/french-stemmer.js
+++ /dev/null
@@ -1,1311 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var FrenchStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["col", -1, -1],
- ["par", -1, -1],
- ["tap", -1, -1]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 7],
- ["H", 0, 6],
- ["He", 1, 4],
- ["Hi", 1, 5],
- ["I", 0, 1],
- ["U", 0, 2],
- ["Y", 0, 3]
- ];
-
- /** @const */ var a_2 = [
- ["iqU", -1, 3],
- ["abl", -1, 3],
- ["I\u00E8r", -1, 4],
- ["i\u00E8r", -1, 4],
- ["eus", -1, 2],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_3 = [
- ["ic", -1, 2],
- ["abil", -1, 1],
- ["iv", -1, 3]
- ];
-
- /** @const */ var a_4 = [
- ["iqUe", -1, 1],
- ["atrice", -1, 2],
- ["ance", -1, 1],
- ["ence", -1, 5],
- ["logie", -1, 3],
- ["able", -1, 1],
- ["isme", -1, 1],
- ["euse", -1, 11],
- ["iste", -1, 1],
- ["ive", -1, 8],
- ["if", -1, 8],
- ["usion", -1, 4],
- ["ation", -1, 2],
- ["ution", -1, 4],
- ["ateur", -1, 2],
- ["iqUes", -1, 1],
- ["atrices", -1, 2],
- ["ances", -1, 1],
- ["ences", -1, 5],
- ["logies", -1, 3],
- ["ables", -1, 1],
- ["ismes", -1, 1],
- ["euses", -1, 11],
- ["istes", -1, 1],
- ["ives", -1, 8],
- ["ifs", -1, 8],
- ["usions", -1, 4],
- ["ations", -1, 2],
- ["utions", -1, 4],
- ["ateurs", -1, 2],
- ["ments", -1, 15],
- ["ements", 30, 6],
- ["issements", 31, 12],
- ["it\u00E9s", -1, 7],
- ["ment", -1, 15],
- ["ement", 34, 6],
- ["issement", 35, 12],
- ["amment", 34, 13],
- ["emment", 34, 14],
- ["aux", -1, 10],
- ["eaux", 39, 9],
- ["eux", -1, 1],
- ["it\u00E9", -1, 7]
- ];
-
- /** @const */ var a_5 = [
- ["ira", -1, 1],
- ["ie", -1, 1],
- ["isse", -1, 1],
- ["issante", -1, 1],
- ["i", -1, 1],
- ["irai", 4, 1],
- ["ir", -1, 1],
- ["iras", -1, 1],
- ["ies", -1, 1],
- ["\u00EEmes", -1, 1],
- ["isses", -1, 1],
- ["issantes", -1, 1],
- ["\u00EEtes", -1, 1],
- ["is", -1, 1],
- ["irais", 13, 1],
- ["issais", 13, 1],
- ["irions", -1, 1],
- ["issions", -1, 1],
- ["irons", -1, 1],
- ["issons", -1, 1],
- ["issants", -1, 1],
- ["it", -1, 1],
- ["irait", 21, 1],
- ["issait", 21, 1],
- ["issant", -1, 1],
- ["iraIent", -1, 1],
- ["issaIent", -1, 1],
- ["irent", -1, 1],
- ["issent", -1, 1],
- ["iront", -1, 1],
- ["\u00EEt", -1, 1],
- ["iriez", -1, 1],
- ["issiez", -1, 1],
- ["irez", -1, 1],
- ["issez", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["a", -1, 3],
- ["era", 0, 2],
- ["asse", -1, 3],
- ["ante", -1, 3],
- ["\u00E9e", -1, 2],
- ["ai", -1, 3],
- ["erai", 5, 2],
- ["er", -1, 2],
- ["as", -1, 3],
- ["eras", 8, 2],
- ["\u00E2mes", -1, 3],
- ["asses", -1, 3],
- ["antes", -1, 3],
- ["\u00E2tes", -1, 3],
- ["\u00E9es", -1, 2],
- ["ais", -1, 3],
- ["erais", 15, 2],
- ["ions", -1, 1],
- ["erions", 17, 2],
- ["assions", 17, 3],
- ["erons", -1, 2],
- ["ants", -1, 3],
- ["\u00E9s", -1, 2],
- ["ait", -1, 3],
- ["erait", 23, 2],
- ["ant", -1, 3],
- ["aIent", -1, 3],
- ["eraIent", 26, 2],
- ["\u00E8rent", -1, 2],
- ["assent", -1, 3],
- ["eront", -1, 2],
- ["\u00E2t", -1, 3],
- ["ez", -1, 2],
- ["iez", 32, 2],
- ["eriez", 33, 2],
- ["assiez", 33, 3],
- ["erez", 32, 2],
- ["\u00E9", -1, 2]
- ];
-
- /** @const */ var a_7 = [
- ["e", -1, 3],
- ["I\u00E8re", 0, 2],
- ["i\u00E8re", 0, 2],
- ["ion", -1, 1],
- ["Ier", -1, 2],
- ["ier", -1, 2]
- ];
-
- /** @const */ var a_8 = [
- ["ell", -1, -1],
- ["eill", -1, -1],
- ["enn", -1, -1],
- ["onn", -1, -1],
- ["ett", -1, -1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5];
-
- /** @const */ var /** Array */ g_keep_with_s = [1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab2: {
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab4;
- }
- base.bra = base.cursor;
- lab5: {
- var /** number */ v_4 = base.cursor;
- lab6: {
- if (!(base.eq_s("u")))
- {
- break lab6;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab6;
- }
- if (!base.slice_from("U"))
- {
- return false;
- }
- break lab5;
- }
- base.cursor = v_4;
- lab7: {
- if (!(base.eq_s("i")))
- {
- break lab7;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab7;
- }
- if (!base.slice_from("I"))
- {
- return false;
- }
- break lab5;
- }
- base.cursor = v_4;
- if (!(base.eq_s("y")))
- {
- break lab4;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Y"))
- {
- return false;
- }
- }
- break lab3;
- }
- base.cursor = v_3;
- lab8: {
- base.bra = base.cursor;
- if (!(base.eq_s("\u00EB")))
- {
- break lab8;
- }
- base.ket = base.cursor;
- if (!base.slice_from("He"))
- {
- return false;
- }
- break lab3;
- }
- base.cursor = v_3;
- lab9: {
- base.bra = base.cursor;
- if (!(base.eq_s("\u00EF")))
- {
- break lab9;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Hi"))
- {
- return false;
- }
- break lab3;
- }
- base.cursor = v_3;
- lab10: {
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- break lab10;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab10;
- }
- if (!base.slice_from("Y"))
- {
- return false;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.eq_s("q")))
- {
- break lab2;
- }
- base.bra = base.cursor;
- if (!(base.eq_s("u")))
- {
- break lab2;
- }
- base.ket = base.cursor;
- if (!base.slice_from("U"))
- {
- return false;
- }
- }
- base.cursor = v_2;
- break golab1;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab2;
- }
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab2;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- break lab1;
- }
- base.cursor = v_2;
- lab3: {
- if (base.find_among(a_0) == 0)
- {
- break lab3;
- }
- break lab1;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- golab4: while(true)
- {
- lab5: {
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab5;
- }
- break golab4;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_4 = base.cursor;
- lab6: {
- golab7: while(true)
- {
- lab8: {
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab6;
- }
- base.cursor++;
- }
- golab9: while(true)
- {
- lab10: {
- if (!(base.out_grouping(g_v, 97, 251)))
- {
- break lab10;
- }
- break golab9;
- }
- if (base.cursor >= base.limit)
- {
- break lab6;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 251)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab6;
- }
- base.cursor++;
- }
- golab13: while(true)
- {
- lab14: {
- if (!(base.out_grouping(g_v, 97, 251)))
- {
- break lab14;
- }
- break golab13;
- }
- if (base.cursor >= base.limit)
- {
- break lab6;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_4;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("y"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u00EB"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u00EF"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 7:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("ic")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!r_R2())
- {
- break lab2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- if (!base.slice_from("iqU"))
- {
- return false;
- }
- }
- }
- break;
- case 3:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 5:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("ent"))
- {
- return false;
- }
- break;
- case 6:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- lab4: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab5: {
- if (!r_R2())
- {
- break lab5;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_4;
- if (!r_R1())
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- if (!base.slice_from("eux"))
- {
- return false;
- }
- }
- break;
- case 3:
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 4:
- if (!r_RV())
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- }
- }
- break;
- case 7:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- lab6: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_5;
- break lab6;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab7: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab8: {
- if (!r_R2())
- {
- break lab8;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab7;
- }
- base.cursor = base.limit - v_6;
- if (!base.slice_from("abl"))
- {
- return false;
- }
- }
- break;
- case 2:
- lab9: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab10: {
- if (!r_R2())
- {
- break lab10;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab9;
- }
- base.cursor = base.limit - v_7;
- if (!base.slice_from("iqU"))
- {
- return false;
- }
- }
- break;
- case 3:
- if (!r_R2())
- {
- base.cursor = base.limit - v_5;
- break lab6;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- break;
- case 8:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_8 = base.limit - base.cursor;
- lab11: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_8;
- break lab11;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_8;
- break lab11;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("ic")))
- {
- base.cursor = base.limit - v_8;
- break lab11;
- }
- base.bra = base.cursor;
- lab12: {
- var /** number */ v_9 = base.limit - base.cursor;
- lab13: {
- if (!r_R2())
- {
- break lab13;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab12;
- }
- base.cursor = base.limit - v_9;
- if (!base.slice_from("iqU"))
- {
- return false;
- }
- }
- }
- break;
- case 9:
- if (!base.slice_from("eau"))
- {
- return false;
- }
- break;
- case 10:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("al"))
- {
- return false;
- }
- break;
- case 11:
- lab14: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab15: {
- if (!r_R2())
- {
- break lab15;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab14;
- }
- base.cursor = base.limit - v_10;
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("eux"))
- {
- return false;
- }
- }
- break;
- case 12:
- if (!r_R1())
- {
- return false;
- }
- if (!(base.out_grouping_b(g_v, 97, 251)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 13:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_from("ant"))
- {
- return false;
- }
- return false;
- case 14:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_from("ent"))
- {
- return false;
- }
- return false;
- case 15:
- var /** number */ v_11 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_v, 97, 251)))
- {
- return false;
- }
- if (!r_RV())
- {
- return false;
- }
- base.cursor = base.limit - v_11;
- if (!base.slice_del())
- {
- return false;
- }
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_i_verb_suffix() {
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("H")))
- {
- break lab0;
- }
- base.limit_backward = v_2;
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- if (!(base.out_grouping_b(g_v, 97, 251)))
- {
- base.limit_backward = v_2;
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- base.limit_backward = v_2;
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("e")))
- {
- base.cursor = base.limit - v_3;
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_residual_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("s")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("Hi")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_3;
- if (!(base.out_grouping_b(g_keep_with_s, 97, 232)))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- }
- base.cursor = base.limit - v_2;
- if (!base.slice_del())
- {
- return false;
- }
- }
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_5 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_7);
- if (among_var == 0)
- {
- base.limit_backward = v_5;
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- base.limit_backward = v_5;
- return false;
- }
- lab3: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab4: {
- if (!(base.eq_s_b("s")))
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- if (!(base.eq_s_b("t")))
- {
- base.limit_backward = v_5;
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- base.limit_backward = v_5;
- return true;
- };
-
- /** @return {boolean} */
- function r_un_double() {
- var /** number */ v_1 = base.limit - base.cursor;
- if (base.find_among_b(a_8) == 0)
- {
- return false;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_un_accent() {
- {
- var v_1 = 1;
- while(true)
- {
- lab0: {
- if (!(base.out_grouping_b(g_v, 97, 251)))
- {
- break lab0;
- }
- v_1--;
- continue;
- }
- break;
- }
- if (v_1 > 0)
- {
- return false;
- }
- }
- base.ket = base.cursor;
- lab1: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("\u00E9")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_3;
- if (!(base.eq_s_b("\u00E8")))
- {
- return false;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_from("e"))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_prelude();
- base.cursor = v_1;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab3: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab4: {
- if (!r_standard_suffix())
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- lab5: {
- if (!r_i_verb_suffix())
- {
- break lab5;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- if (!r_verb_suffix())
- {
- break lab2;
- }
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_7 = base.limit - base.cursor;
- lab6: {
- base.ket = base.cursor;
- lab7: {
- var /** number */ v_8 = base.limit - base.cursor;
- lab8: {
- if (!(base.eq_s_b("Y")))
- {
- break lab8;
- }
- base.bra = base.cursor;
- if (!base.slice_from("i"))
- {
- return false;
- }
- break lab7;
- }
- base.cursor = base.limit - v_8;
- if (!(base.eq_s_b("\u00E7")))
- {
- base.cursor = base.limit - v_7;
- break lab6;
- }
- base.bra = base.cursor;
- if (!base.slice_from("c"))
- {
- return false;
- }
- }
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!r_residual_suffix())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_9 = base.limit - base.cursor;
- r_un_double();
- base.cursor = base.limit - v_9;
- var /** number */ v_10 = base.limit - base.cursor;
- r_un_accent();
- base.cursor = base.limit - v_10;
- base.cursor = base.limit_backward;
- var /** number */ v_11 = base.cursor;
- r_postlude();
- base.cursor = v_11;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['FrenchStemmer'] = FrenchStemmer;
diff --git a/js/german-stemmer.js b/js/german-stemmer.js
deleted file mode 100644
index 93f92f8..0000000
--- a/js/german-stemmer.js
+++ /dev/null
@@ -1,591 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var GermanStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 5],
- ["ae", 0, 2],
- ["oe", 0, 3],
- ["qu", 0, -1],
- ["ue", 0, 4],
- ["\u00DF", 0, 1]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 5],
- ["U", 0, 2],
- ["Y", 0, 1],
- ["\u00E4", 0, 3],
- ["\u00F6", 0, 4],
- ["\u00FC", 0, 2]
- ];
-
- /** @const */ var a_2 = [
- ["e", -1, 2],
- ["em", -1, 1],
- ["en", -1, 2],
- ["ern", -1, 1],
- ["er", -1, 1],
- ["s", -1, 3],
- ["es", 5, 2]
- ];
-
- /** @const */ var a_3 = [
- ["en", -1, 1],
- ["er", -1, 1],
- ["st", -1, 2],
- ["est", 2, 1]
- ];
-
- /** @const */ var a_4 = [
- ["ig", -1, 1],
- ["lich", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["end", -1, 1],
- ["ig", -1, 2],
- ["ung", -1, 1],
- ["lich", -1, 3],
- ["isch", -1, 2],
- ["ik", -1, 2],
- ["heit", -1, 3],
- ["keit", -1, 4]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8];
-
- /** @const */ var /** Array */ g_s_ending = [117, 30, 5];
-
- /** @const */ var /** Array */ g_st_ending = [117, 30, 4];
-
- var /** number */ I_x = 0;
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab2;
- }
- base.bra = base.cursor;
- lab3: {
- var /** number */ v_4 = base.cursor;
- lab4: {
- if (!(base.eq_s("u")))
- {
- break lab4;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab4;
- }
- if (!base.slice_from("U"))
- {
- return false;
- }
- break lab3;
- }
- base.cursor = v_4;
- if (!(base.eq_s("y")))
- {
- break lab2;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab2;
- }
- if (!base.slice_from("Y"))
- {
- return false;
- }
- }
- base.cursor = v_3;
- break golab1;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- base.cursor = v_1;
- while(true)
- {
- var /** number */ v_5 = base.cursor;
- lab5: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("ss"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u00E4"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u00F6"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u00FC"))
- {
- return false;
- }
- break;
- case 5:
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_5;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- return false;
- }
- base.cursor = c1;
- }
- I_x = base.cursor;
- base.cursor = v_1;
- golab0: while(true)
- {
- lab1: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab1;
- }
- break golab0;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- lab4: {
- if (I_p1 >= I_x)
- {
- break lab4;
- }
- I_p1 = I_x;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("y"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("o"))
- {
- return false;
- }
- break;
- case 5:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- break lab0;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("s")))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!(base.eq_s_b("nis")))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 3:
- if (!(base.in_grouping_b(g_s_ending, 98, 116)))
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- break lab2;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- break lab2;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.in_grouping_b(g_st_ending, 98, 116)))
- {
- break lab2;
- }
- {
- var /** number */ c1 = base.cursor - 3;
- if (c1 < base.limit_backward)
- {
- break lab2;
- }
- base.cursor = c1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- break lab3;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- lab4: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("ig")))
- {
- base.cursor = base.limit - v_5;
- break lab4;
- }
- base.bra = base.cursor;
- {
- var /** number */ v_6 = base.limit - base.cursor;
- lab5: {
- if (!(base.eq_s_b("e")))
- {
- break lab5;
- }
- base.cursor = base.limit - v_5;
- break lab4;
- }
- base.cursor = base.limit - v_6;
- }
- if (!r_R2())
- {
- base.cursor = base.limit - v_5;
- break lab4;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 2:
- {
- var /** number */ v_7 = base.limit - base.cursor;
- lab6: {
- if (!(base.eq_s_b("e")))
- {
- break lab6;
- }
- break lab3;
- }
- base.cursor = base.limit - v_7;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_8 = base.limit - base.cursor;
- lab7: {
- base.ket = base.cursor;
- lab8: {
- var /** number */ v_9 = base.limit - base.cursor;
- lab9: {
- if (!(base.eq_s_b("er")))
- {
- break lab9;
- }
- break lab8;
- }
- base.cursor = base.limit - v_9;
- if (!(base.eq_s_b("en")))
- {
- base.cursor = base.limit - v_8;
- break lab7;
- }
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- base.cursor = base.limit - v_8;
- break lab7;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 4:
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_10 = base.limit - base.cursor;
- lab10: {
- base.ket = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- base.cursor = base.limit - v_10;
- break lab10;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_10;
- break lab10;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- }
- base.cursor = base.limit - v_4;
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_prelude();
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- r_mark_regions();
- base.cursor = v_2;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- r_standard_suffix();
- base.cursor = base.limit_backward;
- var /** number */ v_4 = base.cursor;
- r_postlude();
- base.cursor = v_4;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['GermanStemmer'] = GermanStemmer;
diff --git a/js/greek-stemmer.js b/js/greek-stemmer.js
deleted file mode 100644
index 865b0ea..0000000
--- a/js/greek-stemmer.js
+++ /dev/null
@@ -1,2870 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var GreekStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 25],
- ["\u0386", 0, 1],
- ["\u0388", 0, 5],
- ["\u0389", 0, 7],
- ["\u038A", 0, 9],
- ["\u038C", 0, 15],
- ["\u038E", 0, 20],
- ["\u038F", 0, 24],
- ["\u0390", 0, 7],
- ["\u0391", 0, 1],
- ["\u0392", 0, 2],
- ["\u0393", 0, 3],
- ["\u0394", 0, 4],
- ["\u0395", 0, 5],
- ["\u0396", 0, 6],
- ["\u0397", 0, 7],
- ["\u0398", 0, 8],
- ["\u0399", 0, 9],
- ["\u039A", 0, 10],
- ["\u039B", 0, 11],
- ["\u039C", 0, 12],
- ["\u039D", 0, 13],
- ["\u039E", 0, 14],
- ["\u039F", 0, 15],
- ["\u03A0", 0, 16],
- ["\u03A1", 0, 17],
- ["\u03A3", 0, 18],
- ["\u03A4", 0, 19],
- ["\u03A5", 0, 20],
- ["\u03A6", 0, 21],
- ["\u03A7", 0, 22],
- ["\u03A8", 0, 23],
- ["\u03A9", 0, 24],
- ["\u03AA", 0, 9],
- ["\u03AB", 0, 20],
- ["\u03AC", 0, 1],
- ["\u03AD", 0, 5],
- ["\u03AE", 0, 7],
- ["\u03AF", 0, 9],
- ["\u03B0", 0, 20],
- ["\u03C2", 0, 18],
- ["\u03CA", 0, 7],
- ["\u03CB", 0, 20],
- ["\u03CC", 0, 15],
- ["\u03CD", 0, 20],
- ["\u03CE", 0, 24]
- ];
-
- /** @const */ var a_1 = [
- ["\u03C3\u03BA\u03B1\u03B3\u03B9\u03B1", -1, 2],
- ["\u03C6\u03B1\u03B3\u03B9\u03B1", -1, 1],
- ["\u03BF\u03BB\u03BF\u03B3\u03B9\u03B1", -1, 3],
- ["\u03C3\u03BF\u03B3\u03B9\u03B1", -1, 4],
- ["\u03C4\u03B1\u03C4\u03BF\u03B3\u03B9\u03B1", -1, 5],
- ["\u03BA\u03C1\u03B5\u03B1\u03C4\u03B1", -1, 6],
- ["\u03C0\u03B5\u03C1\u03B1\u03C4\u03B1", -1, 7],
- ["\u03C4\u03B5\u03C1\u03B1\u03C4\u03B1", -1, 8],
- ["\u03B3\u03B5\u03B3\u03BF\u03BD\u03BF\u03C4\u03B1", -1, 11],
- ["\u03BA\u03B1\u03B8\u03B5\u03C3\u03C4\u03C9\u03C4\u03B1", -1, 10],
- ["\u03C6\u03C9\u03C4\u03B1", -1, 9],
- ["\u03C0\u03B5\u03C1\u03B1\u03C4\u03B7", -1, 7],
- ["\u03C3\u03BA\u03B1\u03B3\u03B9\u03C9\u03BD", -1, 2],
- ["\u03C6\u03B1\u03B3\u03B9\u03C9\u03BD", -1, 1],
- ["\u03BF\u03BB\u03BF\u03B3\u03B9\u03C9\u03BD", -1, 3],
- ["\u03C3\u03BF\u03B3\u03B9\u03C9\u03BD", -1, 4],
- ["\u03C4\u03B1\u03C4\u03BF\u03B3\u03B9\u03C9\u03BD", -1, 5],
- ["\u03BA\u03C1\u03B5\u03B1\u03C4\u03C9\u03BD", -1, 6],
- ["\u03C0\u03B5\u03C1\u03B1\u03C4\u03C9\u03BD", -1, 7],
- ["\u03C4\u03B5\u03C1\u03B1\u03C4\u03C9\u03BD", -1, 8],
- ["\u03B3\u03B5\u03B3\u03BF\u03BD\u03BF\u03C4\u03C9\u03BD", -1, 11],
- ["\u03BA\u03B1\u03B8\u03B5\u03C3\u03C4\u03C9\u03C4\u03C9\u03BD", -1, 10],
- ["\u03C6\u03C9\u03C4\u03C9\u03BD", -1, 9],
- ["\u03BA\u03C1\u03B5\u03B1\u03C3", -1, 6],
- ["\u03C0\u03B5\u03C1\u03B1\u03C3", -1, 7],
- ["\u03C4\u03B5\u03C1\u03B1\u03C3", -1, 8],
- ["\u03B3\u03B5\u03B3\u03BF\u03BD\u03BF\u03C3", -1, 11],
- ["\u03BA\u03C1\u03B5\u03B1\u03C4\u03BF\u03C3", -1, 6],
- ["\u03C0\u03B5\u03C1\u03B1\u03C4\u03BF\u03C3", -1, 7],
- ["\u03C4\u03B5\u03C1\u03B1\u03C4\u03BF\u03C3", -1, 8],
- ["\u03B3\u03B5\u03B3\u03BF\u03BD\u03BF\u03C4\u03BF\u03C3", -1, 11],
- ["\u03BA\u03B1\u03B8\u03B5\u03C3\u03C4\u03C9\u03C4\u03BF\u03C3", -1, 10],
- ["\u03C6\u03C9\u03C4\u03BF\u03C3", -1, 9],
- ["\u03BA\u03B1\u03B8\u03B5\u03C3\u03C4\u03C9\u03C3", -1, 10],
- ["\u03C6\u03C9\u03C3", -1, 9],
- ["\u03C3\u03BA\u03B1\u03B3\u03B9\u03BF\u03C5", -1, 2],
- ["\u03C6\u03B1\u03B3\u03B9\u03BF\u03C5", -1, 1],
- ["\u03BF\u03BB\u03BF\u03B3\u03B9\u03BF\u03C5", -1, 3],
- ["\u03C3\u03BF\u03B3\u03B9\u03BF\u03C5", -1, 4],
- ["\u03C4\u03B1\u03C4\u03BF\u03B3\u03B9\u03BF\u03C5", -1, 5]
- ];
-
- /** @const */ var a_2 = [
- ["\u03C0\u03B1", -1, 1],
- ["\u03BE\u03B1\u03BD\u03B1\u03C0\u03B1", 0, 1],
- ["\u03B5\u03C0\u03B1", 0, 1],
- ["\u03C0\u03B5\u03C1\u03B9\u03C0\u03B1", 0, 1],
- ["\u03B1\u03BD\u03B1\u03BC\u03C0\u03B1", 0, 1],
- ["\u03B5\u03BC\u03C0\u03B1", 0, 1],
- ["\u03B2", -1, 2],
- ["\u03B4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B2\u03B1\u03B8\u03C5\u03C1\u03B9", -1, 2],
- ["\u03B2\u03B1\u03C1\u03BA", -1, 2],
- ["\u03BC\u03B1\u03C1\u03BA", -1, 2],
- ["\u03BB", -1, 2],
- ["\u03BC", -1, 2],
- ["\u03BA\u03BF\u03C1\u03BD", -1, 2],
- ["\u03B1\u03B8\u03C1\u03BF", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B1\u03B8\u03C1\u03BF", 14, 1],
- ["\u03C0", -1, 2],
- ["\u03B9\u03BC\u03C0", 16, 2],
- ["\u03C1", -1, 2],
- ["\u03BC\u03B1\u03C1", 18, 2],
- ["\u03B1\u03BC\u03C0\u03B1\u03C1", 18, 2],
- ["\u03B3\u03BA\u03C1", 18, 2],
- ["\u03B2\u03BF\u03BB\u03B2\u03BF\u03C1", 18, 2],
- ["\u03B3\u03BB\u03C5\u03BA\u03BF\u03C1", 18, 2],
- ["\u03C0\u03B9\u03C0\u03B5\u03C1\u03BF\u03C1", 18, 2],
- ["\u03C0\u03C1", 18, 2],
- ["\u03BC\u03C0\u03C1", 25, 2],
- ["\u03B1\u03C1\u03C1", 18, 2],
- ["\u03B3\u03BB\u03C5\u03BA\u03C5\u03C1", 18, 2],
- ["\u03C0\u03BF\u03BB\u03C5\u03C1", 18, 2],
- ["\u03BB\u03BF\u03C5", -1, 2]
- ];
-
- /** @const */ var a_3 = [
- ["\u03B9\u03B6\u03B1", -1, 1],
- ["\u03B9\u03B6\u03B5", -1, 1],
- ["\u03B9\u03B6\u03B1\u03BC\u03B5", -1, 1],
- ["\u03B9\u03B6\u03BF\u03C5\u03BC\u03B5", -1, 1],
- ["\u03B9\u03B6\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B9\u03B6\u03BF\u03C5\u03BD\u03B5", -1, 1],
- ["\u03B9\u03B6\u03B1\u03C4\u03B5", -1, 1],
- ["\u03B9\u03B6\u03B5\u03C4\u03B5", -1, 1],
- ["\u03B9\u03B6\u03B5\u03B9", -1, 1],
- ["\u03B9\u03B6\u03B1\u03BD", -1, 1],
- ["\u03B9\u03B6\u03BF\u03C5\u03BD", -1, 1],
- ["\u03B9\u03B6\u03B5\u03C3", -1, 1],
- ["\u03B9\u03B6\u03B5\u03B9\u03C3", -1, 1],
- ["\u03B9\u03B6\u03C9", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["\u03B2\u03B9", -1, 1],
- ["\u03BB\u03B9", -1, 1],
- ["\u03B1\u03BB", -1, 1],
- ["\u03B5\u03BD", -1, 1],
- ["\u03C3", -1, 1],
- ["\u03C7", -1, 1],
- ["\u03C5\u03C8", -1, 1],
- ["\u03B6\u03C9", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["\u03C9\u03B8\u03B7\u03BA\u03B1", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B5", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B1\u03BC\u03B5", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B1\u03BD\u03B5", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B1\u03C4\u03B5", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B1\u03BD", -1, 1],
- ["\u03C9\u03B8\u03B7\u03BA\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["\u03BE\u03B1\u03BD\u03B1\u03C0\u03B1", -1, 1],
- ["\u03B5\u03C0\u03B1", -1, 1],
- ["\u03C0\u03B5\u03C1\u03B9\u03C0\u03B1", -1, 1],
- ["\u03B1\u03BD\u03B1\u03BC\u03C0\u03B1", -1, 1],
- ["\u03B5\u03BC\u03C0\u03B1", -1, 1],
- ["\u03C7\u03B1\u03C1\u03C4\u03BF\u03C0\u03B1", -1, 1],
- ["\u03B5\u03BE\u03B1\u03C1\u03C7\u03B1", -1, 1],
- ["\u03B3\u03B5", -1, 2],
- ["\u03B3\u03BA\u03B5", -1, 2],
- ["\u03BA\u03BB\u03B5", -1, 1],
- ["\u03B5\u03BA\u03BB\u03B5", 9, 1],
- ["\u03B1\u03C0\u03B5\u03BA\u03BB\u03B5", 10, 1],
- ["\u03B1\u03C0\u03BF\u03BA\u03BB\u03B5", 9, 1],
- ["\u03B5\u03C3\u03C9\u03BA\u03BB\u03B5", 9, 1],
- ["\u03B4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03C0\u03B5", -1, 1],
- ["\u03B5\u03C0\u03B5", 15, 1],
- ["\u03BC\u03B5\u03C4\u03B5\u03C0\u03B5", 16, 1],
- ["\u03B5\u03C3\u03B5", -1, 1],
- ["\u03B3\u03BA", -1, 2],
- ["\u03BC", -1, 2],
- ["\u03C0\u03BF\u03C5\u03BA\u03B1\u03BC", 20, 2],
- ["\u03BA\u03BF\u03BC", 20, 2],
- ["\u03B1\u03BD", -1, 2],
- ["\u03BF\u03BB\u03BF", -1, 2],
- ["\u03B1\u03B8\u03C1\u03BF", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B1\u03B8\u03C1\u03BF", 25, 1],
- ["\u03C0", -1, 2],
- ["\u03BB\u03B1\u03C1", -1, 2],
- ["\u03B4\u03B7\u03BC\u03BF\u03BA\u03C1\u03B1\u03C4", -1, 2],
- ["\u03B1\u03C6", -1, 2],
- ["\u03B3\u03B9\u03B3\u03B1\u03BD\u03C4\u03BF\u03B1\u03C6", 30, 2]
- ];
-
- /** @const */ var a_7 = [
- ["\u03B9\u03C3\u03B1", -1, 1],
- ["\u03B9\u03C3\u03B1\u03BC\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B1\u03C4\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B1\u03BD", -1, 1],
- ["\u03B9\u03C3\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_8 = [
- ["\u03BE\u03B1\u03BD\u03B1\u03C0\u03B1", -1, 1],
- ["\u03B5\u03C0\u03B1", -1, 1],
- ["\u03C0\u03B5\u03C1\u03B9\u03C0\u03B1", -1, 1],
- ["\u03B1\u03BD\u03B1\u03BC\u03C0\u03B1", -1, 1],
- ["\u03B5\u03BC\u03C0\u03B1", -1, 1],
- ["\u03C7\u03B1\u03C1\u03C4\u03BF\u03C0\u03B1", -1, 1],
- ["\u03B5\u03BE\u03B1\u03C1\u03C7\u03B1", -1, 1],
- ["\u03BA\u03BB\u03B5", -1, 1],
- ["\u03B5\u03BA\u03BB\u03B5", 7, 1],
- ["\u03B1\u03C0\u03B5\u03BA\u03BB\u03B5", 8, 1],
- ["\u03B1\u03C0\u03BF\u03BA\u03BB\u03B5", 7, 1],
- ["\u03B5\u03C3\u03C9\u03BA\u03BB\u03B5", 7, 1],
- ["\u03B4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03C0\u03B5", -1, 1],
- ["\u03B5\u03C0\u03B5", 13, 1],
- ["\u03BC\u03B5\u03C4\u03B5\u03C0\u03B5", 14, 1],
- ["\u03B5\u03C3\u03B5", -1, 1],
- ["\u03B1\u03B8\u03C1\u03BF", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B1\u03B8\u03C1\u03BF", 17, 1]
- ];
-
- /** @const */ var a_9 = [
- ["\u03B9\u03C3\u03BF\u03C5\u03BC\u03B5", -1, 1],
- ["\u03B9\u03C3\u03BF\u03C5\u03BD\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B5\u03C4\u03B5", -1, 1],
- ["\u03B9\u03C3\u03B5\u03B9", -1, 1],
- ["\u03B9\u03C3\u03BF\u03C5\u03BD", -1, 1],
- ["\u03B9\u03C3\u03B5\u03B9\u03C3", -1, 1],
- ["\u03B9\u03C3\u03C9", -1, 1]
- ];
-
- /** @const */ var a_10 = [
- ["\u03B1\u03C4\u03B1", -1, 2],
- ["\u03C6\u03B1", -1, 2],
- ["\u03B7\u03C6\u03B1", 1, 2],
- ["\u03BC\u03B5\u03B3", -1, 2],
- ["\u03BB\u03C5\u03B3", -1, 2],
- ["\u03B7\u03B4", -1, 2],
- ["\u03BA\u03BB\u03B5", -1, 1],
- ["\u03B5\u03C3\u03C9\u03BA\u03BB\u03B5", 6, 1],
- ["\u03C0\u03BB\u03B5", -1, 1],
- ["\u03B4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03C3\u03B5", -1, 1],
- ["\u03B1\u03C3\u03B5", 10, 1],
- ["\u03BA\u03B1\u03B8", -1, 2],
- ["\u03B5\u03C7\u03B8", -1, 2],
- ["\u03BA\u03B1\u03BA", -1, 2],
- ["\u03BC\u03B1\u03BA", -1, 2],
- ["\u03C3\u03BA", -1, 2],
- ["\u03C6\u03B9\u03BB", -1, 2],
- ["\u03BA\u03C5\u03BB", -1, 2],
- ["\u03BC", -1, 2],
- ["\u03B3\u03B5\u03BC", 19, 2],
- ["\u03B1\u03C7\u03BD", -1, 2],
- ["\u03C3\u03C5\u03BD\u03B1\u03B8\u03C1\u03BF", -1, 1],
- ["\u03C0", -1, 2],
- ["\u03B1\u03C0", 23, 2],
- ["\u03B5\u03BC\u03C0", 23, 2],
- ["\u03B5\u03C5\u03C0", 23, 2],
- ["\u03B1\u03C1", -1, 2],
- ["\u03B1\u03BF\u03C1", -1, 2],
- ["\u03B3\u03C5\u03C1", -1, 2],
- ["\u03C7\u03C1", -1, 2],
- ["\u03C7\u03C9\u03C1", -1, 2],
- ["\u03BA\u03C4", -1, 2],
- ["\u03B1\u03BA\u03C4", 32, 2],
- ["\u03C7\u03C4", -1, 2],
- ["\u03B1\u03C7\u03C4", 34, 2],
- ["\u03C4\u03B1\u03C7", -1, 2],
- ["\u03C3\u03C7", -1, 2],
- ["\u03B1\u03C3\u03C7", 37, 2],
- ["\u03C5\u03C8", -1, 2]
- ];
-
- /** @const */ var a_11 = [
- ["\u03B9\u03C3\u03C4\u03B1", -1, 1],
- ["\u03B9\u03C3\u03C4\u03B5", -1, 1],
- ["\u03B9\u03C3\u03C4\u03B7", -1, 1],
- ["\u03B9\u03C3\u03C4\u03BF\u03B9", -1, 1],
- ["\u03B9\u03C3\u03C4\u03C9\u03BD", -1, 1],
- ["\u03B9\u03C3\u03C4\u03BF", -1, 1],
- ["\u03B9\u03C3\u03C4\u03B5\u03C3", -1, 1],
- ["\u03B9\u03C3\u03C4\u03B7\u03C3", -1, 1],
- ["\u03B9\u03C3\u03C4\u03BF\u03C3", -1, 1],
- ["\u03B9\u03C3\u03C4\u03BF\u03C5\u03C3", -1, 1],
- ["\u03B9\u03C3\u03C4\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_12 = [
- ["\u03B5\u03B3\u03BA\u03BB\u03B5", -1, 1],
- ["\u03B1\u03C0\u03BF\u03BA\u03BB\u03B5", -1, 1],
- ["\u03B4\u03B1\u03BD\u03B5", -1, 2],
- ["\u03B1\u03BD\u03C4\u03B9\u03B4\u03B1\u03BD\u03B5", 2, 2],
- ["\u03C3\u03B5", -1, 1],
- ["\u03BC\u03B5\u03C4\u03B1\u03C3\u03B5", 4, 1],
- ["\u03BC\u03B9\u03BA\u03C1\u03BF\u03C3\u03B5", 4, 1]
- ];
-
- /** @const */ var a_13 = [
- ["\u03B1\u03C4\u03BF\u03BC\u03B9\u03BA", -1, 2],
- ["\u03B5\u03B8\u03BD\u03B9\u03BA", -1, 4],
- ["\u03C4\u03BF\u03C0\u03B9\u03BA", -1, 7],
- ["\u03B5\u03BA\u03BB\u03B5\u03BA\u03C4\u03B9\u03BA", -1, 5],
- ["\u03C3\u03BA\u03B5\u03C0\u03C4\u03B9\u03BA", -1, 6],
- ["\u03B3\u03BD\u03C9\u03C3\u03C4\u03B9\u03BA", -1, 3],
- ["\u03B1\u03B3\u03BD\u03C9\u03C3\u03C4\u03B9\u03BA", 5, 1],
- ["\u03B1\u03BB\u03B5\u03BE\u03B1\u03BD\u03B4\u03C1\u03B9\u03BD", -1, 8],
- ["\u03B8\u03B5\u03B1\u03C4\u03C1\u03B9\u03BD", -1, 10],
- ["\u03B2\u03C5\u03B6\u03B1\u03BD\u03C4\u03B9\u03BD", -1, 9]
- ];
-
- /** @const */ var a_14 = [
- ["\u03B9\u03C3\u03BC\u03BF\u03B9", -1, 1],
- ["\u03B9\u03C3\u03BC\u03C9\u03BD", -1, 1],
- ["\u03B9\u03C3\u03BC\u03BF", -1, 1],
- ["\u03B9\u03C3\u03BC\u03BF\u03C3", -1, 1],
- ["\u03B9\u03C3\u03BC\u03BF\u03C5\u03C3", -1, 1],
- ["\u03B9\u03C3\u03BC\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_15 = [
- ["\u03C3", -1, 1],
- ["\u03C7", -1, 1]
- ];
-
- /** @const */ var a_16 = [
- ["\u03BF\u03C5\u03B4\u03B1\u03BA\u03B9\u03B1", -1, 1],
- ["\u03B1\u03C1\u03B1\u03BA\u03B9\u03B1", -1, 1],
- ["\u03BF\u03C5\u03B4\u03B1\u03BA\u03B9", -1, 1],
- ["\u03B1\u03C1\u03B1\u03BA\u03B9", -1, 1]
- ];
-
- /** @const */ var a_17 = [
- ["\u03B2", -1, 2],
- ["\u03B2\u03B1\u03BC\u03B2", 0, 1],
- ["\u03C3\u03BB\u03BF\u03B2", 0, 1],
- ["\u03C4\u03C3\u03B5\u03C7\u03BF\u03C3\u03BB\u03BF\u03B2", 2, 1],
- ["\u03BA\u03B1\u03C1\u03B4", -1, 2],
- ["\u03B6", -1, 2],
- ["\u03C4\u03B6", 5, 1],
- ["\u03BA", -1, 1],
- ["\u03BA\u03B1\u03C0\u03B1\u03BA", 7, 1],
- ["\u03C3\u03BF\u03BA", 7, 1],
- ["\u03C3\u03BA", 7, 1],
- ["\u03B2\u03B1\u03BB", -1, 2],
- ["\u03BC\u03B1\u03BB", -1, 1],
- ["\u03B3\u03BB", -1, 2],
- ["\u03C4\u03C1\u03B9\u03C0\u03BF\u03BB", -1, 2],
- ["\u03C0\u03BB", -1, 1],
- ["\u03BB\u03BF\u03C5\u03BB", -1, 1],
- ["\u03C6\u03C5\u03BB", -1, 1],
- ["\u03BA\u03B1\u03B9\u03BC", -1, 1],
- ["\u03BA\u03BB\u03B9\u03BC", -1, 1],
- ["\u03C6\u03B1\u03C1\u03BC", -1, 1],
- ["\u03B3\u03B9\u03B1\u03BD", -1, 2],
- ["\u03C3\u03C0\u03B1\u03BD", -1, 1],
- ["\u03B7\u03B3\u03BF\u03C5\u03BC\u03B5\u03BD", -1, 2],
- ["\u03BA\u03BF\u03BD", -1, 1],
- ["\u03BC\u03B1\u03BA\u03C1\u03C5\u03BD", -1, 2],
- ["\u03C0", -1, 2],
- ["\u03BA\u03B1\u03C4\u03C1\u03B1\u03C0", 26, 1],
- ["\u03C1", -1, 1],
- ["\u03B2\u03C1", 28, 1],
- ["\u03BB\u03B1\u03B2\u03C1", 29, 1],
- ["\u03B1\u03BC\u03B2\u03C1", 29, 1],
- ["\u03BC\u03B5\u03C1", 28, 1],
- ["\u03C0\u03B1\u03C4\u03B5\u03C1", 28, 2],
- ["\u03B1\u03BD\u03B8\u03C1", 28, 1],
- ["\u03BA\u03BF\u03C1", 28, 1],
- ["\u03C3", -1, 1],
- ["\u03BD\u03B1\u03B3\u03BA\u03B1\u03C3", 36, 1],
- ["\u03C4\u03BF\u03C3", 36, 2],
- ["\u03BC\u03BF\u03C5\u03C3\u03C4", -1, 1],
- ["\u03C1\u03C5", -1, 1],
- ["\u03C6", -1, 1],
- ["\u03C3\u03C6", 41, 1],
- ["\u03B1\u03BB\u03B9\u03C3\u03C6", 42, 1],
- ["\u03BD\u03C5\u03C6", 41, 2],
- ["\u03C7", -1, 1]
- ];
-
- /** @const */ var a_18 = [
- ["\u03B1\u03BA\u03B9\u03B1", -1, 1],
- ["\u03B1\u03C1\u03B1\u03BA\u03B9\u03B1", 0, 1],
- ["\u03B9\u03C4\u03C3\u03B1", -1, 1],
- ["\u03B1\u03BA\u03B9", -1, 1],
- ["\u03B1\u03C1\u03B1\u03BA\u03B9", 3, 1],
- ["\u03B9\u03C4\u03C3\u03C9\u03BD", -1, 1],
- ["\u03B9\u03C4\u03C3\u03B1\u03C3", -1, 1],
- ["\u03B9\u03C4\u03C3\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_19 = [
- ["\u03C8\u03B1\u03BB", -1, 1],
- ["\u03B1\u03B9\u03C6\u03BD", -1, 1],
- ["\u03BF\u03BB\u03BF", -1, 1],
- ["\u03B9\u03C1", -1, 1]
- ];
-
- /** @const */ var a_20 = [
- ["\u03B5", -1, 1],
- ["\u03C0\u03B1\u03B9\u03C7\u03BD", -1, 1]
- ];
-
- /** @const */ var a_21 = [
- ["\u03B9\u03B4\u03B9\u03B1", -1, 1],
- ["\u03B9\u03B4\u03B9\u03C9\u03BD", -1, 1],
- ["\u03B9\u03B4\u03B9\u03BF", -1, 1]
- ];
-
- /** @const */ var a_22 = [
- ["\u03B9\u03B2", -1, 1],
- ["\u03B4", -1, 1],
- ["\u03C6\u03C1\u03B1\u03B3\u03BA", -1, 1],
- ["\u03BB\u03C5\u03BA", -1, 1],
- ["\u03BF\u03B2\u03B5\u03BB", -1, 1],
- ["\u03BC\u03B7\u03BD", -1, 1],
- ["\u03C1", -1, 1]
- ];
-
- /** @const */ var a_23 = [
- ["\u03B9\u03C3\u03BA\u03B5", -1, 1],
- ["\u03B9\u03C3\u03BA\u03BF", -1, 1],
- ["\u03B9\u03C3\u03BA\u03BF\u03C3", -1, 1],
- ["\u03B9\u03C3\u03BA\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_24 = [
- ["\u03B1\u03B4\u03C9\u03BD", -1, 1],
- ["\u03B1\u03B4\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_25 = [
- ["\u03B3\u03B9\u03B1\u03B3\u03B9", -1, -1],
- ["\u03B8\u03B5\u03B9", -1, -1],
- ["\u03BF\u03BA", -1, -1],
- ["\u03BC\u03B1\u03BC", -1, -1],
- ["\u03BC\u03B1\u03BD", -1, -1],
- ["\u03BC\u03C0\u03B1\u03BC\u03C0", -1, -1],
- ["\u03C0\u03B5\u03B8\u03B5\u03C1", -1, -1],
- ["\u03C0\u03B1\u03C4\u03B5\u03C1", -1, -1],
- ["\u03BA\u03C5\u03C1", -1, -1],
- ["\u03BD\u03C4\u03B1\u03BD\u03C4", -1, -1]
- ];
-
- /** @const */ var a_26 = [
- ["\u03B5\u03B4\u03C9\u03BD", -1, 1],
- ["\u03B5\u03B4\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_27 = [
- ["\u03BC\u03B9\u03BB", -1, 1],
- ["\u03B4\u03B1\u03C0", -1, 1],
- ["\u03B3\u03B7\u03C0", -1, 1],
- ["\u03B9\u03C0", -1, 1],
- ["\u03B5\u03BC\u03C0", -1, 1],
- ["\u03BF\u03C0", -1, 1],
- ["\u03BA\u03C1\u03B1\u03C3\u03C0", -1, 1],
- ["\u03C5\u03C0", -1, 1]
- ];
-
- /** @const */ var a_28 = [
- ["\u03BF\u03C5\u03B4\u03C9\u03BD", -1, 1],
- ["\u03BF\u03C5\u03B4\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_29 = [
- ["\u03C4\u03C1\u03B1\u03B3", -1, 1],
- ["\u03C6\u03B5", -1, 1],
- ["\u03BA\u03B1\u03BB\u03B9\u03B1\u03BA", -1, 1],
- ["\u03B1\u03C1\u03BA", -1, 1],
- ["\u03C3\u03BA", -1, 1],
- ["\u03C0\u03B5\u03C4\u03B1\u03BB", -1, 1],
- ["\u03B2\u03B5\u03BB", -1, 1],
- ["\u03BB\u03BF\u03C5\u03BB", -1, 1],
- ["\u03C6\u03BB", -1, 1],
- ["\u03C7\u03BD", -1, 1],
- ["\u03C0\u03BB\u03B5\u03BE", -1, 1],
- ["\u03C3\u03C0", -1, 1],
- ["\u03C6\u03C1", -1, 1],
- ["\u03C3", -1, 1],
- ["\u03BB\u03B9\u03C7", -1, 1]
- ];
-
- /** @const */ var a_30 = [
- ["\u03B5\u03C9\u03BD", -1, 1],
- ["\u03B5\u03C9\u03C3", -1, 1]
- ];
-
- /** @const */ var a_31 = [
- ["\u03B4", -1, 1],
- ["\u03B9\u03B4", 0, 1],
- ["\u03B8", -1, 1],
- ["\u03B3\u03B1\u03BB", -1, 1],
- ["\u03B5\u03BB", -1, 1],
- ["\u03BD", -1, 1],
- ["\u03C0", -1, 1],
- ["\u03C0\u03B1\u03C1", -1, 1]
- ];
-
- /** @const */ var a_32 = [
- ["\u03B9\u03B1", -1, 1],
- ["\u03B9\u03C9\u03BD", -1, 1],
- ["\u03B9\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_33 = [
- ["\u03B9\u03BA\u03B1", -1, 1],
- ["\u03B9\u03BA\u03C9\u03BD", -1, 1],
- ["\u03B9\u03BA\u03BF", -1, 1],
- ["\u03B9\u03BA\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_34 = [
- ["\u03B1\u03B4", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B1\u03B4", 0, 1],
- ["\u03BA\u03B1\u03C4\u03B1\u03B4", 0, 1],
- ["\u03B1\u03BD\u03C4\u03B9\u03B4", -1, 1],
- ["\u03B5\u03BD\u03B4", -1, 1],
- ["\u03C6\u03C5\u03BB\u03BF\u03B4", -1, 1],
- ["\u03C5\u03C0\u03BF\u03B4", -1, 1],
- ["\u03C0\u03C1\u03C9\u03C4\u03BF\u03B4", -1, 1],
- ["\u03B5\u03BE\u03C9\u03B4", -1, 1],
- ["\u03B7\u03B8", -1, 1],
- ["\u03B1\u03BD\u03B7\u03B8", 9, 1],
- ["\u03BE\u03B9\u03BA", -1, 1],
- ["\u03B1\u03BB", -1, 1],
- ["\u03B1\u03BC\u03BC\u03BF\u03C7\u03B1\u03BB", 12, 1],
- ["\u03C3\u03C5\u03BD\u03BF\u03BC\u03B7\u03BB", -1, 1],
- ["\u03BC\u03C0\u03BF\u03BB", -1, 1],
- ["\u03BC\u03BF\u03C5\u03BB", -1, 1],
- ["\u03C4\u03C3\u03B1\u03BC", -1, 1],
- ["\u03B2\u03C1\u03C9\u03BC", -1, 1],
- ["\u03B1\u03BC\u03B1\u03BD", -1, 1],
- ["\u03BC\u03C0\u03B1\u03BD", -1, 1],
- ["\u03BA\u03B1\u03BB\u03BB\u03B9\u03BD", -1, 1],
- ["\u03C0\u03BF\u03C3\u03C4\u03B5\u03BB\u03BD", -1, 1],
- ["\u03C6\u03B9\u03BB\u03BF\u03BD", -1, 1],
- ["\u03BA\u03B1\u03BB\u03C0", -1, 1],
- ["\u03B3\u03B5\u03C1", -1, 1],
- ["\u03C7\u03B1\u03C3", -1, 1],
- ["\u03BC\u03C0\u03BF\u03C3", -1, 1],
- ["\u03C0\u03BB\u03B9\u03B1\u03C4\u03C3", -1, 1],
- ["\u03C0\u03B5\u03C4\u03C3", -1, 1],
- ["\u03C0\u03B9\u03C4\u03C3", -1, 1],
- ["\u03C6\u03C5\u03C3", -1, 1],
- ["\u03BC\u03C0\u03B1\u03B3\u03B9\u03B1\u03C4", -1, 1],
- ["\u03BD\u03B9\u03C4", -1, 1],
- ["\u03C0\u03B9\u03BA\u03B1\u03BD\u03C4", -1, 1],
- ["\u03C3\u03B5\u03C1\u03C4", -1, 1]
- ];
-
- /** @const */ var a_35 = [
- ["\u03B1\u03B3\u03B1\u03BC\u03B5", -1, 1],
- ["\u03B7\u03BA\u03B1\u03BC\u03B5", -1, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B1\u03BC\u03B5", 1, 1],
- ["\u03B7\u03C3\u03B1\u03BC\u03B5", -1, 1],
- ["\u03BF\u03C5\u03C3\u03B1\u03BC\u03B5", -1, 1]
- ];
-
- /** @const */ var a_36 = [
- ["\u03B2\u03BF\u03C5\u03B2", -1, 1],
- ["\u03BE\u03B5\u03B8", -1, 1],
- ["\u03C0\u03B5\u03B8", -1, 1],
- ["\u03B1\u03C0\u03BF\u03B8", -1, 1],
- ["\u03B1\u03C0\u03BF\u03BA", -1, 1],
- ["\u03BF\u03C5\u03BB", -1, 1],
- ["\u03B1\u03BD\u03B1\u03C0", -1, 1],
- ["\u03C0\u03B9\u03BA\u03C1", -1, 1],
- ["\u03C0\u03BF\u03C4", -1, 1],
- ["\u03B1\u03C0\u03BF\u03C3\u03C4", -1, 1],
- ["\u03C7", -1, 1],
- ["\u03C3\u03B9\u03C7", 10, 1]
- ];
-
- /** @const */ var a_37 = [
- ["\u03C4\u03C1", -1, 1],
- ["\u03C4\u03C3", -1, 1]
- ];
-
- /** @const */ var a_38 = [
- ["\u03B1\u03B3\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B7\u03BA\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B1\u03BD\u03B5", 1, 1],
- ["\u03B7\u03C3\u03B1\u03BD\u03B5", -1, 1],
- ["\u03BF\u03C5\u03C3\u03B1\u03BD\u03B5", -1, 1],
- ["\u03BF\u03BD\u03C4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B9\u03BF\u03BD\u03C4\u03B1\u03BD\u03B5", 5, 1],
- ["\u03BF\u03C5\u03BD\u03C4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B9\u03BF\u03C5\u03BD\u03C4\u03B1\u03BD\u03B5", 7, 1],
- ["\u03BF\u03C4\u03B1\u03BD\u03B5", -1, 1],
- ["\u03B9\u03BF\u03C4\u03B1\u03BD\u03B5", 9, 1]
- ];
-
- /** @const */ var a_39 = [
- ["\u03C4\u03B1\u03B2", -1, 1],
- ["\u03BD\u03C4\u03B1\u03B2", 0, 1],
- ["\u03C8\u03B7\u03BB\u03BF\u03C4\u03B1\u03B2", 0, 1],
- ["\u03BB\u03B9\u03B2", -1, 1],
- ["\u03BA\u03BB\u03B9\u03B2", 3, 1],
- ["\u03BE\u03B7\u03C1\u03BF\u03BA\u03BB\u03B9\u03B2", 4, 1],
- ["\u03B3", -1, 1],
- ["\u03B1\u03B3", 6, 1],
- ["\u03C4\u03C1\u03B1\u03B3", 7, 1],
- ["\u03C4\u03C3\u03B1\u03B3", 7, 1],
- ["\u03B1\u03B8\u03B9\u03B3\u03B3", 6, 1],
- ["\u03C4\u03C3\u03B9\u03B3\u03B3", 6, 1],
- ["\u03B1\u03C4\u03C3\u03B9\u03B3\u03B3", 11, 1],
- ["\u03C3\u03C4\u03B5\u03B3", 6, 1],
- ["\u03B1\u03C0\u03B7\u03B3", 6, 1],
- ["\u03C3\u03B9\u03B3", 6, 1],
- ["\u03B1\u03BD\u03BF\u03C1\u03B3", 6, 1],
- ["\u03B5\u03BD\u03BF\u03C1\u03B3", 6, 1],
- ["\u03BA\u03B1\u03BB\u03C0\u03BF\u03C5\u03B6", -1, 1],
- ["\u03B8", -1, 1],
- ["\u03BC\u03C9\u03B1\u03BC\u03B5\u03B8", 19, 1],
- ["\u03C0\u03B9\u03B8", 19, 1],
- ["\u03B1\u03C0\u03B9\u03B8", 21, 1],
- ["\u03B4\u03B5\u03BA", -1, 1],
- ["\u03C0\u03B5\u03BB\u03B5\u03BA", -1, 1],
- ["\u03B9\u03BA", -1, 1],
- ["\u03B1\u03BD\u03B9\u03BA", 25, 1],
- ["\u03B2\u03BF\u03C5\u03BB\u03BA", -1, 1],
- ["\u03B2\u03B1\u03C3\u03BA", -1, 1],
- ["\u03B2\u03C1\u03B1\u03C7\u03C5\u03BA", -1, 1],
- ["\u03B3\u03B1\u03BB", -1, 1],
- ["\u03BA\u03B1\u03C4\u03B1\u03B3\u03B1\u03BB", 30, 1],
- ["\u03BF\u03BB\u03BF\u03B3\u03B1\u03BB", 30, 1],
- ["\u03B2\u03B1\u03B8\u03C5\u03B3\u03B1\u03BB", 30, 1],
- ["\u03BC\u03B5\u03BB", -1, 1],
- ["\u03BA\u03B1\u03C3\u03C4\u03B5\u03BB", -1, 1],
- ["\u03C0\u03BF\u03C1\u03C4\u03BF\u03BB", -1, 1],
- ["\u03C0\u03BB", -1, 1],
- ["\u03B4\u03B9\u03C0\u03BB", 37, 1],
- ["\u03BB\u03B1\u03BF\u03C0\u03BB", 37, 1],
- ["\u03C8\u03C5\u03C7\u03BF\u03C0\u03BB", 37, 1],
- ["\u03BF\u03C5\u03BB", -1, 1],
- ["\u03BC", -1, 1],
- ["\u03BF\u03BB\u03B9\u03B3\u03BF\u03B4\u03B1\u03BC", 42, 1],
- ["\u03BC\u03BF\u03C5\u03C3\u03BF\u03C5\u03BB\u03BC", 42, 1],
- ["\u03B4\u03C1\u03B1\u03B4\u03BF\u03C5\u03BC", 42, 1],
- ["\u03B2\u03C1\u03B1\u03C7\u03BC", 42, 1],
- ["\u03BD", -1, 1],
- ["\u03B1\u03BC\u03B5\u03C1\u03B9\u03BA\u03B1\u03BD", 47, 1],
- ["\u03C0", -1, 1],
- ["\u03B1\u03B4\u03B1\u03C0", 49, 1],
- ["\u03C7\u03B1\u03BC\u03B7\u03BB\u03BF\u03B4\u03B1\u03C0", 49, 1],
- ["\u03C0\u03BF\u03BB\u03C5\u03B4\u03B1\u03C0", 49, 1],
- ["\u03BA\u03BF\u03C0", 49, 1],
- ["\u03C5\u03C0\u03BF\u03BA\u03BF\u03C0", 53, 1],
- ["\u03C4\u03C3\u03BF\u03C0", 49, 1],
- ["\u03C3\u03C0", 49, 1],
- ["\u03B5\u03C1", -1, 1],
- ["\u03B3\u03B5\u03C1", 57, 1],
- ["\u03B2\u03B5\u03C4\u03B5\u03C1", 57, 1],
- ["\u03BB\u03BF\u03C5\u03B8\u03B7\u03C1", -1, 1],
- ["\u03BA\u03BF\u03C1\u03BC\u03BF\u03C1", -1, 1],
- ["\u03C0\u03B5\u03C1\u03B9\u03C4\u03C1", -1, 1],
- ["\u03BF\u03C5\u03C1", -1, 1],
- ["\u03C3", -1, 1],
- ["\u03B2\u03B1\u03C3", 64, 1],
- ["\u03C0\u03BF\u03BB\u03B9\u03C3", 64, 1],
- ["\u03C3\u03B1\u03C1\u03B1\u03BA\u03B1\u03C4\u03C3", 64, 1],
- ["\u03B8\u03C5\u03C3", 64, 1],
- ["\u03B4\u03B9\u03B1\u03C4", -1, 1],
- ["\u03C0\u03BB\u03B1\u03C4", -1, 1],
- ["\u03C4\u03C3\u03B1\u03C1\u03BB\u03B1\u03C4", -1, 1],
- ["\u03C4\u03B5\u03C4", -1, 1],
- ["\u03C0\u03BF\u03C5\u03C1\u03B9\u03C4", -1, 1],
- ["\u03C3\u03BF\u03C5\u03BB\u03C4", -1, 1],
- ["\u03BC\u03B1\u03B9\u03BD\u03C4", -1, 1],
- ["\u03B6\u03C9\u03BD\u03C4", -1, 1],
- ["\u03BA\u03B1\u03C3\u03C4", -1, 1],
- ["\u03C6", -1, 1],
- ["\u03B4\u03B9\u03B1\u03C6", 78, 1],
- ["\u03C3\u03C4\u03B5\u03C6", 78, 1],
- ["\u03C6\u03C9\u03C4\u03BF\u03C3\u03C4\u03B5\u03C6", 80, 1],
- ["\u03C0\u03B5\u03C1\u03B7\u03C6", 78, 1],
- ["\u03C5\u03C0\u03B5\u03C1\u03B7\u03C6", 82, 1],
- ["\u03BA\u03BF\u03B9\u03BB\u03B1\u03C1\u03C6", 78, 1],
- ["\u03C0\u03B5\u03BD\u03C4\u03B1\u03C1\u03C6", 78, 1],
- ["\u03BF\u03C1\u03C6", 78, 1],
- ["\u03C7", -1, 1],
- ["\u03B1\u03BC\u03B7\u03C7", 87, 1],
- ["\u03B2\u03B9\u03BF\u03BC\u03B7\u03C7", 87, 1],
- ["\u03BC\u03B5\u03B3\u03BB\u03BF\u03B2\u03B9\u03BF\u03BC\u03B7\u03C7", 89, 1],
- ["\u03BA\u03B1\u03C0\u03BD\u03BF\u03B2\u03B9\u03BF\u03BC\u03B7\u03C7", 89, 1],
- ["\u03BC\u03B9\u03BA\u03C1\u03BF\u03B2\u03B9\u03BF\u03BC\u03B7\u03C7", 89, 1],
- ["\u03C0\u03BF\u03BB\u03C5\u03BC\u03B7\u03C7", 87, 1],
- ["\u03BB\u03B9\u03C7", 87, 1]
- ];
-
- /** @const */ var a_40 = [
- ["\u03B7\u03C3\u03B5\u03C4\u03B5", -1, 1]
- ];
-
- /** @const */ var a_41 = [
- ["\u03B5\u03BD\u03B4", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B4", -1, 1],
- ["\u03BF\u03B4", -1, 1],
- ["\u03B4\u03B9\u03B1\u03B8", -1, 1],
- ["\u03BA\u03B1\u03B8", -1, 1],
- ["\u03C1\u03B1\u03B8", -1, 1],
- ["\u03C4\u03B1\u03B8", -1, 1],
- ["\u03C4\u03B9\u03B8", -1, 1],
- ["\u03B5\u03BA\u03B8", -1, 1],
- ["\u03B5\u03BD\u03B8", -1, 1],
- ["\u03C3\u03C5\u03BD\u03B8", -1, 1],
- ["\u03C1\u03BF\u03B8", -1, 1],
- ["\u03C5\u03C0\u03B5\u03C1\u03B8", -1, 1],
- ["\u03C3\u03B8", -1, 1],
- ["\u03B5\u03C5\u03B8", -1, 1],
- ["\u03B1\u03C1\u03BA", -1, 1],
- ["\u03C9\u03C6\u03B5\u03BB", -1, 1],
- ["\u03B2\u03BF\u03BB", -1, 1],
- ["\u03B1\u03B9\u03BD", -1, 1],
- ["\u03C0\u03BF\u03BD", -1, 1],
- ["\u03C1\u03BF\u03BD", -1, 1],
- ["\u03C3\u03C5\u03BD", -1, 1],
- ["\u03B2\u03B1\u03C1", -1, 1],
- ["\u03B2\u03C1", -1, 1],
- ["\u03B1\u03B9\u03C1", -1, 1],
- ["\u03C6\u03BF\u03C1", -1, 1],
- ["\u03B5\u03C5\u03C1", -1, 1],
- ["\u03C0\u03C5\u03C1", -1, 1],
- ["\u03C7\u03C9\u03C1", -1, 1],
- ["\u03BD\u03B5\u03C4", -1, 1],
- ["\u03C3\u03C7", -1, 1]
- ];
-
- /** @const */ var a_42 = [
- ["\u03C0\u03B1\u03B3", -1, 1],
- ["\u03B4", -1, 1],
- ["\u03B1\u03B4", 1, 1],
- ["\u03B8", -1, 1],
- ["\u03B1\u03B8", 3, 1],
- ["\u03C4\u03BF\u03BA", -1, 1],
- ["\u03C3\u03BA", -1, 1],
- ["\u03C0\u03B1\u03C1\u03B1\u03BA\u03B1\u03BB", -1, 1],
- ["\u03C3\u03BA\u03B5\u03BB", -1, 1],
- ["\u03B1\u03C0\u03BB", -1, 1],
- ["\u03B5\u03BC", -1, 1],
- ["\u03B1\u03BD", -1, 1],
- ["\u03B2\u03B5\u03BD", -1, 1],
- ["\u03B2\u03B1\u03C1\u03BF\u03BD", -1, 1],
- ["\u03BA\u03BF\u03C0", -1, 1],
- ["\u03C3\u03B5\u03C1\u03C0", -1, 1],
- ["\u03B1\u03B2\u03B1\u03C1", -1, 1],
- ["\u03B5\u03BD\u03B1\u03C1", -1, 1],
- ["\u03B1\u03B2\u03C1", -1, 1],
- ["\u03BC\u03C0\u03BF\u03C1", -1, 1],
- ["\u03B8\u03B1\u03C1\u03C1", -1, 1],
- ["\u03BD\u03C4\u03C1", -1, 1],
- ["\u03C5", -1, 1],
- ["\u03BD\u03B9\u03C6", -1, 1],
- ["\u03C3\u03C5\u03C1\u03C6", -1, 1]
- ];
-
- /** @const */ var a_43 = [
- ["\u03BF\u03BD\u03C4\u03B1\u03C3", -1, 1],
- ["\u03C9\u03BD\u03C4\u03B1\u03C3", -1, 1]
- ];
-
- /** @const */ var a_44 = [
- ["\u03BF\u03BC\u03B1\u03C3\u03C4\u03B5", -1, 1],
- ["\u03B9\u03BF\u03BC\u03B1\u03C3\u03C4\u03B5", 0, 1]
- ];
-
- /** @const */ var a_45 = [
- ["\u03C0", -1, 1],
- ["\u03B1\u03C0", 0, 1],
- ["\u03B1\u03BA\u03B1\u03C4\u03B1\u03C0", 1, 1],
- ["\u03C3\u03C5\u03BC\u03C0", 0, 1],
- ["\u03B1\u03C3\u03C5\u03BC\u03C0", 3, 1],
- ["\u03B1\u03BC\u03B5\u03C4\u03B1\u03BC\u03C6", -1, 1]
- ];
-
- /** @const */ var a_46 = [
- ["\u03B6", -1, 1],
- ["\u03B1\u03BB", -1, 1],
- ["\u03C0\u03B1\u03C1\u03B1\u03BA\u03B1\u03BB", 1, 1],
- ["\u03B5\u03BA\u03C4\u03B5\u03BB", -1, 1],
- ["\u03BC", -1, 1],
- ["\u03BE", -1, 1],
- ["\u03C0\u03C1\u03BF", -1, 1],
- ["\u03B1\u03C1", -1, 1],
- ["\u03BD\u03B9\u03C3", -1, 1]
- ];
-
- /** @const */ var a_47 = [
- ["\u03B7\u03B8\u03B7\u03BA\u03B1", -1, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B5", -1, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_48 = [
- ["\u03C0\u03B9\u03B8", -1, 1],
- ["\u03BF\u03B8", -1, 1],
- ["\u03BD\u03B1\u03C1\u03B8", -1, 1],
- ["\u03C3\u03BA\u03BF\u03C5\u03BB", -1, 1],
- ["\u03C3\u03BA\u03C9\u03BB", -1, 1],
- ["\u03C3\u03C6", -1, 1]
- ];
-
- /** @const */ var a_49 = [
- ["\u03B8", -1, 1],
- ["\u03B4\u03B9\u03B1\u03B8", 0, 1],
- ["\u03C0\u03B1\u03C1\u03B1\u03BA\u03B1\u03C4\u03B1\u03B8", 0, 1],
- ["\u03C3\u03C5\u03BD\u03B8", 0, 1],
- ["\u03C0\u03C1\u03BF\u03C3\u03B8", 0, 1]
- ];
-
- /** @const */ var a_50 = [
- ["\u03B7\u03BA\u03B1", -1, 1],
- ["\u03B7\u03BA\u03B5", -1, 1],
- ["\u03B7\u03BA\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_51 = [
- ["\u03C6\u03B1\u03B3", -1, 1],
- ["\u03BB\u03B7\u03B3", -1, 1],
- ["\u03C6\u03C1\u03C5\u03B4", -1, 1],
- ["\u03BC\u03B1\u03BD\u03C4\u03B9\u03BB", -1, 1],
- ["\u03BC\u03B1\u03BB\u03BB", -1, 1],
- ["\u03BF\u03BC", -1, 1],
- ["\u03B2\u03BB\u03B5\u03C0", -1, 1],
- ["\u03C0\u03BF\u03B4\u03B1\u03C1", -1, 1],
- ["\u03BA\u03C5\u03BC\u03B1\u03C4", -1, 1],
- ["\u03C0\u03C1\u03C9\u03C4", -1, 1],
- ["\u03BB\u03B1\u03C7", -1, 1],
- ["\u03C0\u03B1\u03BD\u03C4\u03B1\u03C7", -1, 1]
- ];
-
- /** @const */ var a_52 = [
- ["\u03C4\u03C3\u03B1", -1, 1],
- ["\u03C7\u03B1\u03B4", -1, 1],
- ["\u03BC\u03B5\u03B4", -1, 1],
- ["\u03BB\u03B1\u03BC\u03C0\u03B9\u03B4", -1, 1],
- ["\u03B4\u03B5", -1, 1],
- ["\u03C0\u03BB\u03B5", -1, 1],
- ["\u03BC\u03B5\u03C3\u03B1\u03B6", -1, 1],
- ["\u03B4\u03B5\u03C3\u03C0\u03BF\u03B6", -1, 1],
- ["\u03B1\u03B9\u03B8", -1, 1],
- ["\u03C6\u03B1\u03C1\u03BC\u03B1\u03BA", -1, 1],
- ["\u03B1\u03B3\u03BA", -1, 1],
- ["\u03B1\u03BD\u03B7\u03BA", -1, 1],
- ["\u03BB", -1, 1],
- ["\u03BC", -1, 1],
- ["\u03B1\u03BC", 13, 1],
- ["\u03B2\u03C1\u03BF\u03BC", 13, 1],
- ["\u03C5\u03C0\u03BF\u03C4\u03B5\u03B9\u03BD", -1, 1],
- ["\u03B5\u03BA\u03BB\u03B9\u03C0", -1, 1],
- ["\u03C1", -1, 1],
- ["\u03B5\u03BD\u03B4\u03B9\u03B1\u03C6\u03B5\u03C1", 18, 1],
- ["\u03B1\u03BD\u03B1\u03C1\u03C1", 18, 1],
- ["\u03C0\u03B1\u03C4", -1, 1],
- ["\u03BA\u03B1\u03B8\u03B1\u03C1\u03B5\u03C5", -1, 1],
- ["\u03B4\u03B5\u03C5\u03C4\u03B5\u03C1\u03B5\u03C5", -1, 1],
- ["\u03BB\u03B5\u03C7", -1, 1]
- ];
-
- /** @const */ var a_53 = [
- ["\u03BF\u03C5\u03C3\u03B1", -1, 1],
- ["\u03BF\u03C5\u03C3\u03B5", -1, 1],
- ["\u03BF\u03C5\u03C3\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_54 = [
- ["\u03C0\u03B5\u03BB", -1, 1],
- ["\u03BB\u03BB", -1, 1],
- ["\u03C3\u03BC\u03B7\u03BD", -1, 1],
- ["\u03C1\u03C0", -1, 1],
- ["\u03C0\u03C1", -1, 1],
- ["\u03C6\u03C1", -1, 1],
- ["\u03C7\u03BF\u03C1\u03C4", -1, 1],
- ["\u03BF\u03C6", -1, 1],
- ["\u03C8\u03BF\u03C6", 7, -1],
- ["\u03C3\u03C6", -1, 1],
- ["\u03BB\u03BF\u03C7", -1, 1],
- ["\u03BD\u03B1\u03C5\u03BB\u03BF\u03C7", 10, -1]
- ];
-
- /** @const */ var a_55 = [
- ["\u03B1\u03BC\u03B1\u03BB\u03BB\u03B9", -1, 1],
- ["\u03BB", -1, 1],
- ["\u03B1\u03BC\u03B1\u03BB", 1, 1],
- ["\u03BC", -1, 1],
- ["\u03BF\u03C5\u03BB\u03B1\u03BC", 3, 1],
- ["\u03B5\u03BD", -1, 1],
- ["\u03B4\u03B5\u03C1\u03B2\u03B5\u03BD", 5, 1],
- ["\u03C0", -1, 1],
- ["\u03B1\u03B5\u03B9\u03C0", 7, 1],
- ["\u03B1\u03C1\u03C4\u03B9\u03C0", 7, 1],
- ["\u03C3\u03C5\u03BC\u03C0", 7, 1],
- ["\u03BD\u03B5\u03BF\u03C0", 7, 1],
- ["\u03BA\u03C1\u03BF\u03BA\u03B1\u03BB\u03BF\u03C0", 7, 1],
- ["\u03BF\u03BB\u03BF\u03C0", 7, 1],
- ["\u03C0\u03C1\u03BF\u03C3\u03C9\u03C0\u03BF\u03C0", 7, 1],
- ["\u03C3\u03B9\u03B4\u03B7\u03C1\u03BF\u03C0", 7, 1],
- ["\u03B4\u03C1\u03BF\u03C3\u03BF\u03C0", 7, 1],
- ["\u03B1\u03C3\u03C0", 7, 1],
- ["\u03B1\u03BD\u03C5\u03C0", 7, 1],
- ["\u03C1", -1, 1],
- ["\u03B1\u03C3\u03C0\u03B1\u03C1", 19, 1],
- ["\u03C7\u03B1\u03C1", 19, 1],
- ["\u03B1\u03C7\u03B1\u03C1", 21, 1],
- ["\u03B1\u03C0\u03B5\u03C1", 19, 1],
- ["\u03C4\u03C1", 19, 1],
- ["\u03BF\u03C5\u03C1", 19, 1],
- ["\u03C4", -1, 1],
- ["\u03B4\u03B9\u03B1\u03C4", 26, 1],
- ["\u03B5\u03C0\u03B9\u03C4", 26, 1],
- ["\u03C3\u03C5\u03BD\u03C4", 26, 1],
- ["\u03BF\u03BC\u03BF\u03C4", 26, 1],
- ["\u03BD\u03BF\u03BC\u03BF\u03C4", 30, 1],
- ["\u03B1\u03C0\u03BF\u03C4", 26, 1],
- ["\u03C5\u03C0\u03BF\u03C4", 26, 1],
- ["\u03B1\u03B2\u03B1\u03C3\u03C4", 26, 1],
- ["\u03B1\u03B9\u03BC\u03BF\u03C3\u03C4", 26, 1],
- ["\u03C0\u03C1\u03BF\u03C3\u03C4", 26, 1],
- ["\u03B1\u03BD\u03C5\u03C3\u03C4", 26, 1],
- ["\u03BD\u03B1\u03C5", -1, 1],
- ["\u03B1\u03C6", -1, 1],
- ["\u03BE\u03B5\u03C6", -1, 1],
- ["\u03B1\u03B4\u03B7\u03C6", -1, 1],
- ["\u03C0\u03B1\u03BC\u03C6", -1, 1],
- ["\u03C0\u03BF\u03BB\u03C5\u03C6", -1, 1]
- ];
-
- /** @const */ var a_56 = [
- ["\u03B1\u03B3\u03B1", -1, 1],
- ["\u03B1\u03B3\u03B5", -1, 1],
- ["\u03B1\u03B3\u03B5\u03C3", -1, 1]
- ];
-
- /** @const */ var a_57 = [
- ["\u03B7\u03C3\u03B1", -1, 1],
- ["\u03B7\u03C3\u03B5", -1, 1],
- ["\u03B7\u03C3\u03BF\u03C5", -1, 1]
- ];
-
- /** @const */ var a_58 = [
- ["\u03BD", -1, 1],
- ["\u03B4\u03C9\u03B4\u03B5\u03BA\u03B1\u03BD", 0, 1],
- ["\u03B5\u03C0\u03C4\u03B1\u03BD", 0, 1],
- ["\u03BC\u03B5\u03B3\u03B1\u03BB\u03BF\u03BD", 0, 1],
- ["\u03B5\u03C1\u03B7\u03BC\u03BF\u03BD", 0, 1],
- ["\u03C7\u03B5\u03C1\u03C3\u03BF\u03BD", 0, 1]
- ];
-
- /** @const */ var a_59 = [
- ["\u03B7\u03C3\u03C4\u03B5", -1, 1]
- ];
-
- /** @const */ var a_60 = [
- ["\u03C3\u03B2", -1, 1],
- ["\u03B1\u03C3\u03B2", 0, 1],
- ["\u03B1\u03C0\u03BB", -1, 1],
- ["\u03B1\u03B5\u03B9\u03BC\u03BD", -1, 1],
- ["\u03C7\u03C1", -1, 1],
- ["\u03B1\u03C7\u03C1", 4, 1],
- ["\u03BA\u03BF\u03B9\u03BD\u03BF\u03C7\u03C1", 4, 1],
- ["\u03B4\u03C5\u03C3\u03C7\u03C1", 4, 1],
- ["\u03B5\u03C5\u03C7\u03C1", 4, 1],
- ["\u03C0\u03B1\u03BB\u03B9\u03BC\u03C8", -1, 1]
- ];
-
- /** @const */ var a_61 = [
- ["\u03BF\u03C5\u03BD\u03B5", -1, 1],
- ["\u03B7\u03B8\u03BF\u03C5\u03BD\u03B5", 0, 1],
- ["\u03B7\u03C3\u03BF\u03C5\u03BD\u03B5", 0, 1]
- ];
-
- /** @const */ var a_62 = [
- ["\u03C3\u03C0\u03B9", -1, 1],
- ["\u03BD", -1, 1],
- ["\u03B5\u03BE\u03C9\u03BD", 1, 1],
- ["\u03C1", -1, 1],
- ["\u03C3\u03C4\u03C1\u03B1\u03B2\u03BF\u03BC\u03BF\u03C5\u03C4\u03C3", -1, 1],
- ["\u03BA\u03B1\u03BA\u03BF\u03BC\u03BF\u03C5\u03C4\u03C3", -1, 1]
- ];
-
- /** @const */ var a_63 = [
- ["\u03BF\u03C5\u03BC\u03B5", -1, 1],
- ["\u03B7\u03B8\u03BF\u03C5\u03BC\u03B5", 0, 1],
- ["\u03B7\u03C3\u03BF\u03C5\u03BC\u03B5", 0, 1]
- ];
-
- /** @const */ var a_64 = [
- ["\u03B1\u03B6", -1, 1],
- ["\u03C9\u03C1\u03B9\u03BF\u03C0\u03BB", -1, 1],
- ["\u03B1\u03C3\u03BF\u03C5\u03C3", -1, 1],
- ["\u03C0\u03B1\u03C1\u03B1\u03C3\u03BF\u03C5\u03C3", 2, 1],
- ["\u03B1\u03BB\u03BB\u03BF\u03C3\u03BF\u03C5\u03C3", -1, 1],
- ["\u03C6", -1, 1],
- ["\u03C7", -1, 1]
- ];
-
- /** @const */ var a_65 = [
- ["\u03BC\u03B1\u03C4\u03B1", -1, 1],
- ["\u03BC\u03B1\u03C4\u03C9\u03BD", -1, 1],
- ["\u03BC\u03B1\u03C4\u03BF\u03C3", -1, 1]
- ];
-
- /** @const */ var a_66 = [
- ["\u03B1", -1, 1],
- ["\u03B9\u03BF\u03C5\u03BC\u03B1", 0, 1],
- ["\u03BF\u03BC\u03BF\u03C5\u03BD\u03B1", 0, 1],
- ["\u03B9\u03BF\u03BC\u03BF\u03C5\u03BD\u03B1", 2, 1],
- ["\u03BF\u03C3\u03BF\u03C5\u03BD\u03B1", 0, 1],
- ["\u03B9\u03BF\u03C3\u03BF\u03C5\u03BD\u03B1", 4, 1],
- ["\u03B5", -1, 1],
- ["\u03B1\u03B3\u03B1\u03C4\u03B5", 6, 1],
- ["\u03B7\u03BA\u03B1\u03C4\u03B5", 6, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B1\u03C4\u03B5", 8, 1],
- ["\u03B7\u03C3\u03B1\u03C4\u03B5", 6, 1],
- ["\u03BF\u03C5\u03C3\u03B1\u03C4\u03B5", 6, 1],
- ["\u03B5\u03B9\u03C4\u03B5", 6, 1],
- ["\u03B7\u03B8\u03B5\u03B9\u03C4\u03B5", 12, 1],
- ["\u03B9\u03B5\u03BC\u03B1\u03C3\u03C4\u03B5", 6, 1],
- ["\u03BF\u03C5\u03BC\u03B1\u03C3\u03C4\u03B5", 6, 1],
- ["\u03B9\u03BF\u03C5\u03BC\u03B1\u03C3\u03C4\u03B5", 15, 1],
- ["\u03B9\u03B5\u03C3\u03B1\u03C3\u03C4\u03B5", 6, 1],
- ["\u03BF\u03C3\u03B1\u03C3\u03C4\u03B5", 6, 1],
- ["\u03B9\u03BF\u03C3\u03B1\u03C3\u03C4\u03B5", 18, 1],
- ["\u03B7", -1, 1],
- ["\u03B9", -1, 1],
- ["\u03B1\u03BC\u03B1\u03B9", 21, 1],
- ["\u03B9\u03B5\u03BC\u03B1\u03B9", 21, 1],
- ["\u03BF\u03BC\u03B1\u03B9", 21, 1],
- ["\u03BF\u03C5\u03BC\u03B1\u03B9", 21, 1],
- ["\u03B1\u03C3\u03B1\u03B9", 21, 1],
- ["\u03B5\u03C3\u03B1\u03B9", 21, 1],
- ["\u03B9\u03B5\u03C3\u03B1\u03B9", 27, 1],
- ["\u03B1\u03C4\u03B1\u03B9", 21, 1],
- ["\u03B5\u03C4\u03B1\u03B9", 21, 1],
- ["\u03B9\u03B5\u03C4\u03B1\u03B9", 30, 1],
- ["\u03BF\u03BD\u03C4\u03B1\u03B9", 21, 1],
- ["\u03BF\u03C5\u03BD\u03C4\u03B1\u03B9", 21, 1],
- ["\u03B9\u03BF\u03C5\u03BD\u03C4\u03B1\u03B9", 33, 1],
- ["\u03B5\u03B9", 21, 1],
- ["\u03B1\u03B5\u03B9", 35, 1],
- ["\u03B7\u03B8\u03B5\u03B9", 35, 1],
- ["\u03B7\u03C3\u03B5\u03B9", 35, 1],
- ["\u03BF\u03B9", 21, 1],
- ["\u03B1\u03BD", -1, 1],
- ["\u03B1\u03B3\u03B1\u03BD", 40, 1],
- ["\u03B7\u03BA\u03B1\u03BD", 40, 1],
- ["\u03B7\u03B8\u03B7\u03BA\u03B1\u03BD", 42, 1],
- ["\u03B7\u03C3\u03B1\u03BD", 40, 1],
- ["\u03BF\u03C5\u03C3\u03B1\u03BD", 40, 1],
- ["\u03BF\u03BD\u03C4\u03BF\u03C5\u03C3\u03B1\u03BD", 45, 1],
- ["\u03B9\u03BF\u03BD\u03C4\u03BF\u03C5\u03C3\u03B1\u03BD", 46, 1],
- ["\u03BF\u03BD\u03C4\u03B1\u03BD", 40, 1],
- ["\u03B9\u03BF\u03BD\u03C4\u03B1\u03BD", 48, 1],
- ["\u03BF\u03C5\u03BD\u03C4\u03B1\u03BD", 40, 1],
- ["\u03B9\u03BF\u03C5\u03BD\u03C4\u03B1\u03BD", 50, 1],
- ["\u03BF\u03C4\u03B1\u03BD", 40, 1],
- ["\u03B9\u03BF\u03C4\u03B1\u03BD", 52, 1],
- ["\u03BF\u03BC\u03B1\u03C3\u03C4\u03B1\u03BD", 40, 1],
- ["\u03B9\u03BF\u03BC\u03B1\u03C3\u03C4\u03B1\u03BD", 54, 1],
- ["\u03BF\u03C3\u03B1\u03C3\u03C4\u03B1\u03BD", 40, 1],
- ["\u03B9\u03BF\u03C3\u03B1\u03C3\u03C4\u03B1\u03BD", 56, 1],
- ["\u03BF\u03C5\u03BD", -1, 1],
- ["\u03B7\u03B8\u03BF\u03C5\u03BD", 58, 1],
- ["\u03BF\u03BC\u03BF\u03C5\u03BD", 58, 1],
- ["\u03B9\u03BF\u03BC\u03BF\u03C5\u03BD", 60, 1],
- ["\u03B7\u03C3\u03BF\u03C5\u03BD", 58, 1],
- ["\u03BF\u03C3\u03BF\u03C5\u03BD", 58, 1],
- ["\u03B9\u03BF\u03C3\u03BF\u03C5\u03BD", 63, 1],
- ["\u03C9\u03BD", -1, 1],
- ["\u03B7\u03B4\u03C9\u03BD", 65, 1],
- ["\u03BF", -1, 1],
- ["\u03B1\u03C3", -1, 1],
- ["\u03B5\u03C3", -1, 1],
- ["\u03B7\u03B4\u03B5\u03C3", 69, 1],
- ["\u03B7\u03C3\u03B5\u03C3", 69, 1],
- ["\u03B7\u03C3", -1, 1],
- ["\u03B5\u03B9\u03C3", -1, 1],
- ["\u03B7\u03B8\u03B5\u03B9\u03C3", 73, 1],
- ["\u03BF\u03C3", -1, 1],
- ["\u03C5\u03C3", -1, 1],
- ["\u03BF\u03C5\u03C3", 76, 1],
- ["\u03C5", -1, 1],
- ["\u03BF\u03C5", 78, 1],
- ["\u03C9", -1, 1],
- ["\u03B1\u03C9", 80, 1],
- ["\u03B7\u03B8\u03C9", 80, 1],
- ["\u03B7\u03C3\u03C9", 80, 1]
- ];
-
- /** @const */ var a_67 = [
- ["\u03BF\u03C4\u03B5\u03C1", -1, 1],
- ["\u03B5\u03C3\u03C4\u03B5\u03C1", -1, 1],
- ["\u03C5\u03C4\u03B5\u03C1", -1, 1],
- ["\u03C9\u03C4\u03B5\u03C1", -1, 1],
- ["\u03BF\u03C4\u03B1\u03C4", -1, 1],
- ["\u03B5\u03C3\u03C4\u03B1\u03C4", -1, 1],
- ["\u03C5\u03C4\u03B1\u03C4", -1, 1],
- ["\u03C9\u03C4\u03B1\u03C4", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [81, 65, 16, 1];
-
- /** @const */ var /** Array */ g_v2 = [81, 65, 0, 1];
-
- var /** boolean */ B_test1 = false;
-
-
- /** @return {boolean} */
- function r_has_min_length() {
- return base.current.length >= 3;
- };
-
- /** @return {boolean} */
- function r_tolower() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B1"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B2"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u03B3"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u03B4"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u03B5"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u03B6"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u03B7"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u03B8"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("\u03BA"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("\u03BB"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("\u03BC"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("\u03BD"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("\u03BE"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("\u03BF"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("\u03C0"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("\u03C1"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("\u03C3"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("\u03C4"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("\u03C5"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("\u03C6"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("\u03C7"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("\u03C8"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("\u03C9"))
- {
- return false;
- }
- break;
- case 25:
- if (base.cursor <= base.limit_backward)
- {
- break lab0;
- }
- base.cursor--;
- break;
- }
- continue;
- }
- base.cursor = base.limit - v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step1() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03C6\u03B1"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03C3\u03BA\u03B1"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u03BF\u03BB\u03BF"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u03C3\u03BF"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u03C4\u03B1\u03C4\u03BF"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u03BA\u03C1\u03B5"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u03C0\u03B5\u03C1"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u03C4\u03B5\u03C1"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("\u03C6\u03C9"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("\u03BA\u03B1\u03B8\u03B5\u03C3\u03C4"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("\u03B3\u03B5\u03B3\u03BF\u03BD"))
- {
- return false;
- }
- break;
- }
- B_test1 = false;
- return true;
- };
-
- /** @return {boolean} */
- function r_steps1() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B9\u03B6"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps2() {
- base.ket = base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03C9\u03BD"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps3() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_7) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("\u03B9\u03C3\u03B1")))
- {
- break lab1;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab1;
- }
- if (!base.slice_from("\u03B9\u03C3"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B9\u03C3"))
- {
- return false;
- }
- break;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps4() {
- base.ket = base.cursor;
- if (base.find_among_b(a_9) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_8) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps5() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_11) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_10);
- if (among_var == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B9\u03C3\u03C4"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps6() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_14) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_12);
- if (among_var == 0)
- {
- break lab1;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab1;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B9\u03C3\u03BC"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- break;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_13);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B1\u03B3\u03BD\u03C9\u03C3\u03C4"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B1\u03C4\u03BF\u03BC"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u03B3\u03BD\u03C9\u03C3\u03C4"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u03B5\u03B8\u03BD"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u03B5\u03BA\u03BB\u03B5\u03BA\u03C4"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u03C3\u03BA\u03B5\u03C0\u03C4"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u03C4\u03BF\u03C0"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u03B1\u03BB\u03B5\u03BE\u03B1\u03BD\u03B4\u03C1"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("\u03B2\u03C5\u03B6\u03B1\u03BD\u03C4"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("\u03B8\u03B5\u03B1\u03C4\u03C1"))
- {
- return false;
- }
- break;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps7() {
- base.ket = base.cursor;
- if (base.find_among_b(a_16) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_15) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B1\u03C1\u03B1\u03BA"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps8() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_18) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_17);
- if (among_var == 0)
- {
- break lab1;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab1;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B1\u03BA"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u03B9\u03C4\u03C3"))
- {
- return false;
- }
- break;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u03BA\u03BF\u03C1")))
- {
- return false;
- }
- if (!base.slice_from("\u03B9\u03C4\u03C3"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps9() {
- base.ket = base.cursor;
- if (base.find_among_b(a_21) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_19) == 0)
- {
- break lab1;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab1;
- }
- if (!base.slice_from("\u03B9\u03B4"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_20) == 0)
- {
- return false;
- }
- if (!base.slice_from("\u03B9\u03B4"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_steps10() {
- base.ket = base.cursor;
- if (base.find_among_b(a_23) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_22) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B9\u03C3\u03BA"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step2a() {
- base.ket = base.cursor;
- if (base.find_among_b(a_24) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (base.find_among_b(a_25) == 0)
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_1;
- }
- {
- var /** number */ c1 = base.cursor;
- base.insert(base.cursor, base.cursor, "\u03B1\u03B4");
- base.cursor = c1;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step2b() {
- base.ket = base.cursor;
- if (base.find_among_b(a_26) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_27) == 0)
- {
- return false;
- }
- if (!base.slice_from("\u03B5\u03B4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step2c() {
- base.ket = base.cursor;
- if (base.find_among_b(a_28) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_29) == 0)
- {
- return false;
- }
- if (!base.slice_from("\u03BF\u03C5\u03B4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step2d() {
- base.ket = base.cursor;
- if (base.find_among_b(a_30) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_31) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B5"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step3() {
- base.ket = base.cursor;
- if (base.find_among_b(a_32) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_v, 945, 969)))
- {
- return false;
- }
- if (!base.slice_from("\u03B9"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step4() {
- base.ket = base.cursor;
- if (base.find_among_b(a_33) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_v, 945, 969)))
- {
- break lab1;
- }
- if (!base.slice_from("\u03B9\u03BA"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- }
- base.bra = base.cursor;
- if (base.find_among_b(a_34) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B9\u03BA"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5a() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("\u03B1\u03B3\u03B1\u03BC\u03B5")))
- {
- break lab0;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab0;
- }
- if (!base.slice_from("\u03B1\u03B3\u03B1\u03BC"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (base.find_among_b(a_35) == 0)
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- }
- base.cursor = base.limit - v_2;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u03B1\u03BC\u03B5")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_36) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B1\u03BC"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5b() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_38) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_37) == 0)
- {
- break lab0;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab0;
- }
- if (!base.slice_from("\u03B1\u03B3\u03B1\u03BD"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u03B1\u03BD\u03B5")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_v2, 945, 969)))
- {
- break lab2;
- }
- if (!base.slice_from("\u03B1\u03BD"))
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- base.ket = base.cursor;
- }
- base.bra = base.cursor;
- if (base.find_among_b(a_39) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B1\u03BD"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5c() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_40) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u03B5\u03C4\u03B5")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_v2, 945, 969)))
- {
- break lab2;
- }
- if (!base.slice_from("\u03B5\u03C4"))
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- lab3: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_41) == 0)
- {
- break lab3;
- }
- if (!base.slice_from("\u03B5\u03C4"))
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- base.ket = base.cursor;
- }
- base.bra = base.cursor;
- if (base.find_among_b(a_42) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B5\u03C4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5d() {
- base.ket = base.cursor;
- if (base.find_among_b(a_43) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u03B1\u03C1\u03C7")))
- {
- break lab1;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab1;
- }
- if (!base.slice_from("\u03BF\u03BD\u03C4"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u03BA\u03C1\u03B5")))
- {
- return false;
- }
- if (!base.slice_from("\u03C9\u03BD\u03C4"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5e() {
- base.ket = base.cursor;
- if (base.find_among_b(a_44) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u03BF\u03BD")))
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03BF\u03BC\u03B1\u03C3\u03C4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5f() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u03B9\u03B5\u03C3\u03C4\u03B5")))
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_45) == 0)
- {
- break lab0;
- }
- if (base.cursor > base.limit_backward)
- {
- break lab0;
- }
- if (!base.slice_from("\u03B9\u03B5\u03C3\u03C4"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u03B5\u03C3\u03C4\u03B5")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_46) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B9\u03B5\u03C3\u03C4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5g() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_47) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (base.find_among_b(a_50) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_48) == 0)
- {
- break lab2;
- }
- if (!base.slice_from("\u03B7\u03BA"))
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_49) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B7\u03BA"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5h() {
- base.ket = base.cursor;
- if (base.find_among_b(a_53) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_51) == 0)
- {
- break lab1;
- }
- if (!base.slice_from("\u03BF\u03C5\u03C3"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_52) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03BF\u03C5\u03C3"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5i() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_56) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u03BA\u03BF\u03BB\u03BB")))
- {
- break lab1;
- }
- if (!base.slice_from("\u03B1\u03B3"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab2: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- base.bra = base.cursor;
- among_var = base.find_among_b(a_54);
- if (among_var == 0)
- {
- break lab3;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u03B1\u03B3"))
- {
- return false;
- }
- break;
- }
- break lab2;
- }
- base.cursor = base.limit - v_2;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_55) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B1\u03B3"))
- {
- return false;
- }
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5j() {
- base.ket = base.cursor;
- if (base.find_among_b(a_57) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_58) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B7\u03C3"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5k() {
- base.ket = base.cursor;
- if (base.find_among_b(a_59) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_60) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03B7\u03C3\u03C4"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5l() {
- base.ket = base.cursor;
- if (base.find_among_b(a_61) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_62) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03BF\u03C5\u03BD"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step5m() {
- base.ket = base.cursor;
- if (base.find_among_b(a_63) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_test1 = false;
- base.ket = base.cursor;
- base.bra = base.cursor;
- if (base.find_among_b(a_64) == 0)
- {
- return false;
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- if (!base.slice_from("\u03BF\u03C5\u03BC"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step6() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_65) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u03BC\u03B1"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- if (!B_test1)
- {
- return false;
- }
- base.ket = base.cursor;
- if (base.find_among_b(a_66) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step7() {
- base.ket = base.cursor;
- if (base.find_among_b(a_67) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_1 = base.limit - base.cursor;
- r_tolower();
- base.cursor = base.limit - v_1;
- if (!r_has_min_length())
- {
- return false;
- }
- B_test1 = true;
- var /** number */ v_2 = base.limit - base.cursor;
- r_step1();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_steps1();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_steps2();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_steps3();
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- r_steps4();
- base.cursor = base.limit - v_6;
- var /** number */ v_7 = base.limit - base.cursor;
- r_steps5();
- base.cursor = base.limit - v_7;
- var /** number */ v_8 = base.limit - base.cursor;
- r_steps6();
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- r_steps7();
- base.cursor = base.limit - v_9;
- var /** number */ v_10 = base.limit - base.cursor;
- r_steps8();
- base.cursor = base.limit - v_10;
- var /** number */ v_11 = base.limit - base.cursor;
- r_steps9();
- base.cursor = base.limit - v_11;
- var /** number */ v_12 = base.limit - base.cursor;
- r_steps10();
- base.cursor = base.limit - v_12;
- var /** number */ v_13 = base.limit - base.cursor;
- r_step2a();
- base.cursor = base.limit - v_13;
- var /** number */ v_14 = base.limit - base.cursor;
- r_step2b();
- base.cursor = base.limit - v_14;
- var /** number */ v_15 = base.limit - base.cursor;
- r_step2c();
- base.cursor = base.limit - v_15;
- var /** number */ v_16 = base.limit - base.cursor;
- r_step2d();
- base.cursor = base.limit - v_16;
- var /** number */ v_17 = base.limit - base.cursor;
- r_step3();
- base.cursor = base.limit - v_17;
- var /** number */ v_18 = base.limit - base.cursor;
- r_step4();
- base.cursor = base.limit - v_18;
- var /** number */ v_19 = base.limit - base.cursor;
- r_step5a();
- base.cursor = base.limit - v_19;
- var /** number */ v_20 = base.limit - base.cursor;
- r_step5b();
- base.cursor = base.limit - v_20;
- var /** number */ v_21 = base.limit - base.cursor;
- r_step5c();
- base.cursor = base.limit - v_21;
- var /** number */ v_22 = base.limit - base.cursor;
- r_step5d();
- base.cursor = base.limit - v_22;
- var /** number */ v_23 = base.limit - base.cursor;
- r_step5e();
- base.cursor = base.limit - v_23;
- var /** number */ v_24 = base.limit - base.cursor;
- r_step5f();
- base.cursor = base.limit - v_24;
- var /** number */ v_25 = base.limit - base.cursor;
- r_step5g();
- base.cursor = base.limit - v_25;
- var /** number */ v_26 = base.limit - base.cursor;
- r_step5h();
- base.cursor = base.limit - v_26;
- var /** number */ v_27 = base.limit - base.cursor;
- r_step5j();
- base.cursor = base.limit - v_27;
- var /** number */ v_28 = base.limit - base.cursor;
- r_step5i();
- base.cursor = base.limit - v_28;
- var /** number */ v_29 = base.limit - base.cursor;
- r_step5k();
- base.cursor = base.limit - v_29;
- var /** number */ v_30 = base.limit - base.cursor;
- r_step5l();
- base.cursor = base.limit - v_30;
- var /** number */ v_31 = base.limit - base.cursor;
- r_step5m();
- base.cursor = base.limit - v_31;
- var /** number */ v_32 = base.limit - base.cursor;
- r_step6();
- base.cursor = base.limit - v_32;
- var /** number */ v_33 = base.limit - base.cursor;
- r_step7();
- base.cursor = base.limit - v_33;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['GreekStemmer'] = GreekStemmer;
diff --git a/js/hindi-stemmer.js b/js/hindi-stemmer.js
deleted file mode 100644
index 20bf6f6..0000000
--- a/js/hindi-stemmer.js
+++ /dev/null
@@ -1,183 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var HindiStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0906\u0901", -1, -1],
- ["\u093E\u0901", -1, -1],
- ["\u0907\u092F\u093E\u0901", 1, -1],
- ["\u0906\u0907\u092F\u093E\u0901", 2, -1],
- ["\u093E\u0907\u092F\u093E\u0901", 2, -1],
- ["\u093F\u092F\u093E\u0901", 1, -1],
- ["\u0906\u0902", -1, -1],
- ["\u0909\u0906\u0902", 6, -1],
- ["\u0941\u0906\u0902", 6, -1],
- ["\u0908\u0902", -1, -1],
- ["\u0906\u0908\u0902", 9, -1],
- ["\u093E\u0908\u0902", 9, -1],
- ["\u090F\u0902", -1, -1],
- ["\u0906\u090F\u0902", 12, -1],
- ["\u0909\u090F\u0902", 12, -1],
- ["\u093E\u090F\u0902", 12, -1],
- ["\u0924\u093E\u090F\u0902", 15, -1, r_CONSONANT],
- ["\u0905\u0924\u093E\u090F\u0902", 16, -1],
- ["\u0928\u093E\u090F\u0902", 15, -1, r_CONSONANT],
- ["\u0905\u0928\u093E\u090F\u0902", 18, -1],
- ["\u0941\u090F\u0902", 12, -1],
- ["\u0913\u0902", -1, -1],
- ["\u0906\u0913\u0902", 21, -1],
- ["\u0909\u0913\u0902", 21, -1],
- ["\u093E\u0913\u0902", 21, -1],
- ["\u0924\u093E\u0913\u0902", 24, -1, r_CONSONANT],
- ["\u0905\u0924\u093E\u0913\u0902", 25, -1],
- ["\u0928\u093E\u0913\u0902", 24, -1, r_CONSONANT],
- ["\u0905\u0928\u093E\u0913\u0902", 27, -1],
- ["\u0941\u0913\u0902", 21, -1],
- ["\u093E\u0902", -1, -1],
- ["\u0907\u092F\u093E\u0902", 30, -1],
- ["\u0906\u0907\u092F\u093E\u0902", 31, -1],
- ["\u093E\u0907\u092F\u093E\u0902", 31, -1],
- ["\u093F\u092F\u093E\u0902", 30, -1],
- ["\u0940\u0902", -1, -1],
- ["\u0924\u0940\u0902", 35, -1, r_CONSONANT],
- ["\u0905\u0924\u0940\u0902", 36, -1],
- ["\u0906\u0924\u0940\u0902", 36, -1],
- ["\u093E\u0924\u0940\u0902", 36, -1],
- ["\u0947\u0902", -1, -1],
- ["\u094B\u0902", -1, -1],
- ["\u0907\u092F\u094B\u0902", 41, -1],
- ["\u0906\u0907\u092F\u094B\u0902", 42, -1],
- ["\u093E\u0907\u092F\u094B\u0902", 42, -1],
- ["\u093F\u092F\u094B\u0902", 41, -1],
- ["\u0905", -1, -1],
- ["\u0906", -1, -1],
- ["\u0907", -1, -1],
- ["\u0908", -1, -1],
- ["\u0906\u0908", 49, -1],
- ["\u093E\u0908", 49, -1],
- ["\u0909", -1, -1],
- ["\u090A", -1, -1],
- ["\u090F", -1, -1],
- ["\u0906\u090F", 54, -1],
- ["\u0907\u090F", 54, -1],
- ["\u0906\u0907\u090F", 56, -1],
- ["\u093E\u0907\u090F", 56, -1],
- ["\u093E\u090F", 54, -1],
- ["\u093F\u090F", 54, -1],
- ["\u0913", -1, -1],
- ["\u0906\u0913", 61, -1],
- ["\u093E\u0913", 61, -1],
- ["\u0915\u0930", -1, -1, r_CONSONANT],
- ["\u0905\u0915\u0930", 64, -1],
- ["\u0906\u0915\u0930", 64, -1],
- ["\u093E\u0915\u0930", 64, -1],
- ["\u093E", -1, -1],
- ["\u090A\u0902\u0917\u093E", 68, -1],
- ["\u0906\u090A\u0902\u0917\u093E", 69, -1],
- ["\u093E\u090A\u0902\u0917\u093E", 69, -1],
- ["\u0942\u0902\u0917\u093E", 68, -1],
- ["\u090F\u0917\u093E", 68, -1],
- ["\u0906\u090F\u0917\u093E", 73, -1],
- ["\u093E\u090F\u0917\u093E", 73, -1],
- ["\u0947\u0917\u093E", 68, -1],
- ["\u0924\u093E", 68, -1, r_CONSONANT],
- ["\u0905\u0924\u093E", 77, -1],
- ["\u0906\u0924\u093E", 77, -1],
- ["\u093E\u0924\u093E", 77, -1],
- ["\u0928\u093E", 68, -1, r_CONSONANT],
- ["\u0905\u0928\u093E", 81, -1],
- ["\u0906\u0928\u093E", 81, -1],
- ["\u093E\u0928\u093E", 81, -1],
- ["\u0906\u092F\u093E", 68, -1],
- ["\u093E\u092F\u093E", 68, -1],
- ["\u093F", -1, -1],
- ["\u0940", -1, -1],
- ["\u090A\u0902\u0917\u0940", 88, -1],
- ["\u0906\u090A\u0902\u0917\u0940", 89, -1],
- ["\u093E\u090A\u0902\u0917\u0940", 89, -1],
- ["\u090F\u0902\u0917\u0940", 88, -1],
- ["\u0906\u090F\u0902\u0917\u0940", 92, -1],
- ["\u093E\u090F\u0902\u0917\u0940", 92, -1],
- ["\u0942\u0902\u0917\u0940", 88, -1],
- ["\u0947\u0902\u0917\u0940", 88, -1],
- ["\u090F\u0917\u0940", 88, -1],
- ["\u0906\u090F\u0917\u0940", 97, -1],
- ["\u093E\u090F\u0917\u0940", 97, -1],
- ["\u0913\u0917\u0940", 88, -1],
- ["\u0906\u0913\u0917\u0940", 100, -1],
- ["\u093E\u0913\u0917\u0940", 100, -1],
- ["\u0947\u0917\u0940", 88, -1],
- ["\u094B\u0917\u0940", 88, -1],
- ["\u0924\u0940", 88, -1, r_CONSONANT],
- ["\u0905\u0924\u0940", 105, -1],
- ["\u0906\u0924\u0940", 105, -1],
- ["\u093E\u0924\u0940", 105, -1],
- ["\u0928\u0940", 88, -1, r_CONSONANT],
- ["\u0905\u0928\u0940", 109, -1],
- ["\u0941", -1, -1],
- ["\u0942", -1, -1],
- ["\u0947", -1, -1],
- ["\u090F\u0902\u0917\u0947", 113, -1],
- ["\u0906\u090F\u0902\u0917\u0947", 114, -1],
- ["\u093E\u090F\u0902\u0917\u0947", 114, -1],
- ["\u0947\u0902\u0917\u0947", 113, -1],
- ["\u0913\u0917\u0947", 113, -1],
- ["\u0906\u0913\u0917\u0947", 118, -1],
- ["\u093E\u0913\u0917\u0947", 118, -1],
- ["\u094B\u0917\u0947", 113, -1],
- ["\u0924\u0947", 113, -1, r_CONSONANT],
- ["\u0905\u0924\u0947", 122, -1],
- ["\u0906\u0924\u0947", 122, -1],
- ["\u093E\u0924\u0947", 122, -1],
- ["\u0928\u0947", 113, -1, r_CONSONANT],
- ["\u0905\u0928\u0947", 126, -1],
- ["\u0906\u0928\u0947", 126, -1],
- ["\u093E\u0928\u0947", 126, -1],
- ["\u094B", -1, -1],
- ["\u094D", -1, -1]
- ];
-
- /** @const */ var /** Array */ g_consonant = [255, 255, 255, 255, 159, 0, 0, 0, 248, 7];
-
-
-
- /** @return {boolean} */
- function r_CONSONANT() {
- if (!(base.in_grouping_b(g_consonant, 2325, 2399)))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- if (base.find_among_b(a_0) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['HindiStemmer'] = HindiStemmer;
diff --git a/js/hungarian-stemmer.js b/js/hungarian-stemmer.js
deleted file mode 100644
index d1c8c56..0000000
--- a/js/hungarian-stemmer.js
+++ /dev/null
@@ -1,709 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var HungarianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["cs", -1, -1],
- ["dzs", -1, -1],
- ["gy", -1, -1],
- ["ly", -1, -1],
- ["ny", -1, -1],
- ["sz", -1, -1],
- ["ty", -1, -1],
- ["zs", -1, -1]
- ];
-
- /** @const */ var a_1 = [
- ["\u00E1", -1, 1],
- ["\u00E9", -1, 2]
- ];
-
- /** @const */ var a_2 = [
- ["bb", -1, -1],
- ["cc", -1, -1],
- ["dd", -1, -1],
- ["ff", -1, -1],
- ["gg", -1, -1],
- ["jj", -1, -1],
- ["kk", -1, -1],
- ["ll", -1, -1],
- ["mm", -1, -1],
- ["nn", -1, -1],
- ["pp", -1, -1],
- ["rr", -1, -1],
- ["ccs", -1, -1],
- ["ss", -1, -1],
- ["zzs", -1, -1],
- ["tt", -1, -1],
- ["vv", -1, -1],
- ["ggy", -1, -1],
- ["lly", -1, -1],
- ["nny", -1, -1],
- ["tty", -1, -1],
- ["ssz", -1, -1],
- ["zz", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["al", -1, 1],
- ["el", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["ba", -1, -1],
- ["ra", -1, -1],
- ["be", -1, -1],
- ["re", -1, -1],
- ["ig", -1, -1],
- ["nak", -1, -1],
- ["nek", -1, -1],
- ["val", -1, -1],
- ["vel", -1, -1],
- ["ul", -1, -1],
- ["n\u00E1l", -1, -1],
- ["n\u00E9l", -1, -1],
- ["b\u00F3l", -1, -1],
- ["r\u00F3l", -1, -1],
- ["t\u00F3l", -1, -1],
- ["\u00FCl", -1, -1],
- ["b\u0151l", -1, -1],
- ["r\u0151l", -1, -1],
- ["t\u0151l", -1, -1],
- ["n", -1, -1],
- ["an", 19, -1],
- ["ban", 20, -1],
- ["en", 19, -1],
- ["ben", 22, -1],
- ["k\u00E9ppen", 22, -1],
- ["on", 19, -1],
- ["\u00F6n", 19, -1],
- ["k\u00E9pp", -1, -1],
- ["kor", -1, -1],
- ["t", -1, -1],
- ["at", 29, -1],
- ["et", 29, -1],
- ["k\u00E9nt", 29, -1],
- ["ank\u00E9nt", 32, -1],
- ["enk\u00E9nt", 32, -1],
- ["onk\u00E9nt", 32, -1],
- ["ot", 29, -1],
- ["\u00E9rt", 29, -1],
- ["\u00F6t", 29, -1],
- ["hez", -1, -1],
- ["hoz", -1, -1],
- ["h\u00F6z", -1, -1],
- ["v\u00E1", -1, -1],
- ["v\u00E9", -1, -1]
- ];
-
- /** @const */ var a_5 = [
- ["\u00E1n", -1, 2],
- ["\u00E9n", -1, 1],
- ["\u00E1nk\u00E9nt", -1, 2]
- ];
-
- /** @const */ var a_6 = [
- ["stul", -1, 1],
- ["astul", 0, 1],
- ["\u00E1stul", 0, 2],
- ["st\u00FCl", -1, 1],
- ["est\u00FCl", 3, 1],
- ["\u00E9st\u00FCl", 3, 3]
- ];
-
- /** @const */ var a_7 = [
- ["\u00E1", -1, 1],
- ["\u00E9", -1, 1]
- ];
-
- /** @const */ var a_8 = [
- ["k", -1, 3],
- ["ak", 0, 3],
- ["ek", 0, 3],
- ["ok", 0, 3],
- ["\u00E1k", 0, 1],
- ["\u00E9k", 0, 2],
- ["\u00F6k", 0, 3]
- ];
-
- /** @const */ var a_9 = [
- ["\u00E9i", -1, 1],
- ["\u00E1\u00E9i", 0, 3],
- ["\u00E9\u00E9i", 0, 2],
- ["\u00E9", -1, 1],
- ["k\u00E9", 3, 1],
- ["ak\u00E9", 4, 1],
- ["ek\u00E9", 4, 1],
- ["ok\u00E9", 4, 1],
- ["\u00E1k\u00E9", 4, 3],
- ["\u00E9k\u00E9", 4, 2],
- ["\u00F6k\u00E9", 4, 1],
- ["\u00E9\u00E9", 3, 2]
- ];
-
- /** @const */ var a_10 = [
- ["a", -1, 1],
- ["ja", 0, 1],
- ["d", -1, 1],
- ["ad", 2, 1],
- ["ed", 2, 1],
- ["od", 2, 1],
- ["\u00E1d", 2, 2],
- ["\u00E9d", 2, 3],
- ["\u00F6d", 2, 1],
- ["e", -1, 1],
- ["je", 9, 1],
- ["nk", -1, 1],
- ["unk", 11, 1],
- ["\u00E1nk", 11, 2],
- ["\u00E9nk", 11, 3],
- ["\u00FCnk", 11, 1],
- ["uk", -1, 1],
- ["juk", 16, 1],
- ["\u00E1juk", 17, 2],
- ["\u00FCk", -1, 1],
- ["j\u00FCk", 19, 1],
- ["\u00E9j\u00FCk", 20, 3],
- ["m", -1, 1],
- ["am", 22, 1],
- ["em", 22, 1],
- ["om", 22, 1],
- ["\u00E1m", 22, 2],
- ["\u00E9m", 22, 3],
- ["o", -1, 1],
- ["\u00E1", -1, 2],
- ["\u00E9", -1, 3]
- ];
-
- /** @const */ var a_11 = [
- ["id", -1, 1],
- ["aid", 0, 1],
- ["jaid", 1, 1],
- ["eid", 0, 1],
- ["jeid", 3, 1],
- ["\u00E1id", 0, 2],
- ["\u00E9id", 0, 3],
- ["i", -1, 1],
- ["ai", 7, 1],
- ["jai", 8, 1],
- ["ei", 7, 1],
- ["jei", 10, 1],
- ["\u00E1i", 7, 2],
- ["\u00E9i", 7, 3],
- ["itek", -1, 1],
- ["eitek", 14, 1],
- ["jeitek", 15, 1],
- ["\u00E9itek", 14, 3],
- ["ik", -1, 1],
- ["aik", 18, 1],
- ["jaik", 19, 1],
- ["eik", 18, 1],
- ["jeik", 21, 1],
- ["\u00E1ik", 18, 2],
- ["\u00E9ik", 18, 3],
- ["ink", -1, 1],
- ["aink", 25, 1],
- ["jaink", 26, 1],
- ["eink", 25, 1],
- ["jeink", 28, 1],
- ["\u00E1ink", 25, 2],
- ["\u00E9ink", 25, 3],
- ["aitok", -1, 1],
- ["jaitok", 32, 1],
- ["\u00E1itok", -1, 2],
- ["im", -1, 1],
- ["aim", 35, 1],
- ["jaim", 36, 1],
- ["eim", 35, 1],
- ["jeim", 38, 1],
- ["\u00E1im", 35, 2],
- ["\u00E9im", 35, 3]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 36, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1];
-
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- lab0: {
- var /** number */ v_1 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_v, 97, 369)))
- {
- break lab1;
- }
- golab2: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab3: {
- if (!(base.out_grouping(g_v, 97, 369)))
- {
- break lab3;
- }
- base.cursor = v_2;
- break golab2;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- lab4: {
- var /** number */ v_3 = base.cursor;
- lab5: {
- if (base.find_among(a_0) == 0)
- {
- break lab5;
- }
- break lab4;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- break lab0;
- }
- base.cursor = v_1;
- if (!(base.out_grouping(g_v, 97, 369)))
- {
- return false;
- }
- golab6: while(true)
- {
- lab7: {
- if (!(base.in_grouping(g_v, 97, 369)))
- {
- break lab7;
- }
- break golab6;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_v_ending() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_double() {
- var /** number */ v_1 = base.limit - base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- base.cursor = base.limit - v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_undouble() {
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.ket = base.cursor;
- {
- var /** number */ c1 = base.cursor - 1;
- if (c1 < base.limit_backward)
- {
- return false;
- }
- base.cursor = c1;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_instrum() {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- if (!r_double())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_undouble())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_case() {
- base.ket = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_v_ending())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_case_special() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_case_other() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_factive() {
- base.ket = base.cursor;
- if (base.find_among_b(a_7) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- if (!r_double())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_undouble())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_plural() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_8);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_owned() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_9);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_sing_owner() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_10);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_plur_owner() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_11);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_mark_regions();
- base.cursor = v_1;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_instrum();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_case();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_case_special();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_case_other();
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- r_factive();
- base.cursor = base.limit - v_6;
- var /** number */ v_7 = base.limit - base.cursor;
- r_owned();
- base.cursor = base.limit - v_7;
- var /** number */ v_8 = base.limit - base.cursor;
- r_sing_owner();
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- r_plur_owner();
- base.cursor = base.limit - v_9;
- var /** number */ v_10 = base.limit - base.cursor;
- r_plural();
- base.cursor = base.limit - v_10;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['HungarianStemmer'] = HungarianStemmer;
diff --git a/js/indonesian-stemmer.js b/js/indonesian-stemmer.js
deleted file mode 100644
index eb9663b..0000000
--- a/js/indonesian-stemmer.js
+++ /dev/null
@@ -1,422 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var IndonesianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["kah", -1, 1],
- ["lah", -1, 1],
- ["pun", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["nya", -1, 1],
- ["ku", -1, 1],
- ["mu", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["i", -1, 1, r_SUFFIX_I_OK],
- ["an", -1, 1, r_SUFFIX_AN_OK],
- ["kan", 1, 1, r_SUFFIX_KAN_OK]
- ];
-
- /** @const */ var a_3 = [
- ["di", -1, 1],
- ["ke", -1, 2],
- ["me", -1, 1],
- ["mem", 2, 5],
- ["men", 2, 1],
- ["meng", 4, 1],
- ["meny", 4, 3, r_VOWEL],
- ["pem", -1, 6],
- ["pen", -1, 2],
- ["peng", 8, 2],
- ["peny", 8, 4, r_VOWEL],
- ["ter", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["be", -1, 3, r_KER],
- ["belajar", 0, 4],
- ["ber", 0, 3],
- ["pe", -1, 1],
- ["pelajar", 3, 2],
- ["per", 3, 1]
- ];
-
- /** @const */ var /** Array */ g_vowel = [17, 65, 16];
-
- var /** number */ I_prefix = 0;
- var /** number */ I_measure = 0;
-
-
- /** @return {boolean} */
- function r_remove_particle() {
- base.ket = base.cursor;
- if (base.find_among_b(a_0) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- I_measure -= 1;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_possessive_pronoun() {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- I_measure -= 1;
- return true;
- };
-
- /** @return {boolean} */
- function r_SUFFIX_KAN_OK() {
- if (I_prefix == 3)
- {
- return false;
- }
- if (I_prefix == 2)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_SUFFIX_AN_OK() {
- return I_prefix != 1;
- };
-
- /** @return {boolean} */
- function r_SUFFIX_I_OK() {
- if (I_prefix > 2)
- {
- return false;
- }
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("s")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_1;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_suffix() {
- base.ket = base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- I_measure -= 1;
- return true;
- };
-
- /** @return {boolean} */
- function r_VOWEL() {
- if (!(base.in_grouping(g_vowel, 97, 117)))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_KER() {
- if (!(base.out_grouping(g_vowel, 97, 117)))
- {
- return false;
- }
- if (!(base.eq_s("er")))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_first_order_prefix() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- I_prefix = 1;
- I_measure -= 1;
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- I_prefix = 3;
- I_measure -= 1;
- break;
- case 3:
- I_prefix = 1;
- if (!base.slice_from("s"))
- {
- return false;
- }
- I_measure -= 1;
- break;
- case 4:
- I_prefix = 3;
- if (!base.slice_from("s"))
- {
- return false;
- }
- I_measure -= 1;
- break;
- case 5:
- I_prefix = 1;
- I_measure -= 1;
- lab0: {
- var /** number */ v_1 = base.cursor;
- lab1: {
- var /** number */ v_2 = base.cursor;
- if (!(base.in_grouping(g_vowel, 97, 117)))
- {
- break lab1;
- }
- base.cursor = v_2;
- if (!base.slice_from("p"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = v_1;
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 6:
- I_prefix = 3;
- I_measure -= 1;
- lab2: {
- var /** number */ v_3 = base.cursor;
- lab3: {
- var /** number */ v_4 = base.cursor;
- if (!(base.in_grouping(g_vowel, 97, 117)))
- {
- break lab3;
- }
- base.cursor = v_4;
- if (!base.slice_from("p"))
- {
- return false;
- }
- break lab2;
- }
- base.cursor = v_3;
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_second_order_prefix() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- I_prefix = 2;
- I_measure -= 1;
- break;
- case 2:
- if (!base.slice_from("ajar"))
- {
- return false;
- }
- I_measure -= 1;
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- I_prefix = 4;
- I_measure -= 1;
- break;
- case 4:
- if (!base.slice_from("ajar"))
- {
- return false;
- }
- I_prefix = 4;
- I_measure -= 1;
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- I_measure = 0;
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- lab3: {
- if (!(base.in_grouping(g_vowel, 97, 117)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- I_measure += 1;
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- if (I_measure <= 2)
- {
- return false;
- }
- I_prefix = 0;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_4 = base.limit - base.cursor;
- r_remove_particle();
- base.cursor = base.limit - v_4;
- if (I_measure <= 2)
- {
- return false;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- r_remove_possessive_pronoun();
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- if (I_measure <= 2)
- {
- return false;
- }
- lab4: {
- var /** number */ v_6 = base.cursor;
- lab5: {
- var /** number */ v_7 = base.cursor;
- if (!r_remove_first_order_prefix())
- {
- break lab5;
- }
- var /** number */ v_8 = base.cursor;
- lab6: {
- var /** number */ v_9 = base.cursor;
- if (I_measure <= 2)
- {
- break lab6;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- if (!r_remove_suffix())
- {
- break lab6;
- }
- base.cursor = base.limit_backward;
- base.cursor = v_9;
- if (I_measure <= 2)
- {
- break lab6;
- }
- if (!r_remove_second_order_prefix())
- {
- break lab6;
- }
- }
- base.cursor = v_8;
- base.cursor = v_7;
- break lab4;
- }
- base.cursor = v_6;
- var /** number */ v_10 = base.cursor;
- r_remove_second_order_prefix();
- base.cursor = v_10;
- var /** number */ v_11 = base.cursor;
- lab7: {
- if (I_measure <= 2)
- {
- break lab7;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- if (!r_remove_suffix())
- {
- break lab7;
- }
- base.cursor = base.limit_backward;
- }
- base.cursor = v_11;
- }
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['IndonesianStemmer'] = IndonesianStemmer;
diff --git a/js/irish-stemmer.js b/js/irish-stemmer.js
deleted file mode 100644
index 9051a97..0000000
--- a/js/irish-stemmer.js
+++ /dev/null
@@ -1,419 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var IrishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["b'", -1, 1],
- ["bh", -1, 4],
- ["bhf", 1, 2],
- ["bp", -1, 8],
- ["ch", -1, 5],
- ["d'", -1, 1],
- ["d'fh", 5, 2],
- ["dh", -1, 6],
- ["dt", -1, 9],
- ["fh", -1, 2],
- ["gc", -1, 5],
- ["gh", -1, 7],
- ["h-", -1, 1],
- ["m'", -1, 1],
- ["mb", -1, 4],
- ["mh", -1, 10],
- ["n-", -1, 1],
- ["nd", -1, 6],
- ["ng", -1, 7],
- ["ph", -1, 8],
- ["sh", -1, 3],
- ["t-", -1, 1],
- ["th", -1, 9],
- ["ts", -1, 3]
- ];
-
- /** @const */ var a_1 = [
- ["\u00EDochta", -1, 1],
- ["a\u00EDochta", 0, 1],
- ["ire", -1, 2],
- ["aire", 2, 2],
- ["abh", -1, 1],
- ["eabh", 4, 1],
- ["ibh", -1, 1],
- ["aibh", 6, 1],
- ["amh", -1, 1],
- ["eamh", 8, 1],
- ["imh", -1, 1],
- ["aimh", 10, 1],
- ["\u00EDocht", -1, 1],
- ["a\u00EDocht", 12, 1],
- ["ir\u00ED", -1, 2],
- ["air\u00ED", 14, 2]
- ];
-
- /** @const */ var a_2 = [
- ["\u00F3ideacha", -1, 6],
- ["patacha", -1, 5],
- ["achta", -1, 1],
- ["arcachta", 2, 2],
- ["eachta", 2, 1],
- ["grafa\u00EDochta", -1, 4],
- ["paite", -1, 5],
- ["ach", -1, 1],
- ["each", 7, 1],
- ["\u00F3ideach", 8, 6],
- ["gineach", 8, 3],
- ["patach", 7, 5],
- ["grafa\u00EDoch", -1, 4],
- ["pataigh", -1, 5],
- ["\u00F3idigh", -1, 6],
- ["acht\u00FAil", -1, 1],
- ["eacht\u00FAil", 15, 1],
- ["gineas", -1, 3],
- ["ginis", -1, 3],
- ["acht", -1, 1],
- ["arcacht", 19, 2],
- ["eacht", 19, 1],
- ["grafa\u00EDocht", -1, 4],
- ["arcachta\u00ED", -1, 2],
- ["grafa\u00EDochta\u00ED", -1, 4]
- ];
-
- /** @const */ var a_3 = [
- ["imid", -1, 1],
- ["aimid", 0, 1],
- ["\u00EDmid", -1, 1],
- ["a\u00EDmid", 2, 1],
- ["adh", -1, 2],
- ["eadh", 4, 2],
- ["faidh", -1, 1],
- ["fidh", -1, 1],
- ["\u00E1il", -1, 2],
- ["ain", -1, 2],
- ["tear", -1, 2],
- ["tar", -1, 2]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- lab2: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab2;
- }
- break golab1;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_pV = base.cursor;
- golab3: while(true)
- {
- lab4: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_initial_morph() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("f"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("s"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("b"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("c"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("d"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("g"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("p"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("m"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_noun_sfx() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_deriv() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("arc"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("gin"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("graf"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("paite"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u00F3id"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_sfx() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_initial_morph();
- base.cursor = v_1;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_3 = base.limit - base.cursor;
- r_noun_sfx();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_deriv();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_verb_sfx();
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['IrishStemmer'] = IrishStemmer;
diff --git a/js/italian-stemmer.js b/js/italian-stemmer.js
deleted file mode 100644
index 193cbf8..0000000
--- a/js/italian-stemmer.js
+++ /dev/null
@@ -1,1008 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var ItalianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 7],
- ["qu", 0, 6],
- ["\u00E1", 0, 1],
- ["\u00E9", 0, 2],
- ["\u00ED", 0, 3],
- ["\u00F3", 0, 4],
- ["\u00FA", 0, 5]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 3],
- ["I", 0, 1],
- ["U", 0, 2]
- ];
-
- /** @const */ var a_2 = [
- ["la", -1, -1],
- ["cela", 0, -1],
- ["gliela", 0, -1],
- ["mela", 0, -1],
- ["tela", 0, -1],
- ["vela", 0, -1],
- ["le", -1, -1],
- ["cele", 6, -1],
- ["gliele", 6, -1],
- ["mele", 6, -1],
- ["tele", 6, -1],
- ["vele", 6, -1],
- ["ne", -1, -1],
- ["cene", 12, -1],
- ["gliene", 12, -1],
- ["mene", 12, -1],
- ["sene", 12, -1],
- ["tene", 12, -1],
- ["vene", 12, -1],
- ["ci", -1, -1],
- ["li", -1, -1],
- ["celi", 20, -1],
- ["glieli", 20, -1],
- ["meli", 20, -1],
- ["teli", 20, -1],
- ["veli", 20, -1],
- ["gli", 20, -1],
- ["mi", -1, -1],
- ["si", -1, -1],
- ["ti", -1, -1],
- ["vi", -1, -1],
- ["lo", -1, -1],
- ["celo", 31, -1],
- ["glielo", 31, -1],
- ["melo", 31, -1],
- ["telo", 31, -1],
- ["velo", 31, -1]
- ];
-
- /** @const */ var a_3 = [
- ["ando", -1, 1],
- ["endo", -1, 1],
- ["ar", -1, 2],
- ["er", -1, 2],
- ["ir", -1, 2]
- ];
-
- /** @const */ var a_4 = [
- ["ic", -1, -1],
- ["abil", -1, -1],
- ["os", -1, -1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["ic", -1, 1],
- ["abil", -1, 1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["ica", -1, 1],
- ["logia", -1, 3],
- ["osa", -1, 1],
- ["ista", -1, 1],
- ["iva", -1, 9],
- ["anza", -1, 1],
- ["enza", -1, 5],
- ["ice", -1, 1],
- ["atrice", 7, 1],
- ["iche", -1, 1],
- ["logie", -1, 3],
- ["abile", -1, 1],
- ["ibile", -1, 1],
- ["usione", -1, 4],
- ["azione", -1, 2],
- ["uzione", -1, 4],
- ["atore", -1, 2],
- ["ose", -1, 1],
- ["ante", -1, 1],
- ["mente", -1, 1],
- ["amente", 19, 7],
- ["iste", -1, 1],
- ["ive", -1, 9],
- ["anze", -1, 1],
- ["enze", -1, 5],
- ["ici", -1, 1],
- ["atrici", 25, 1],
- ["ichi", -1, 1],
- ["abili", -1, 1],
- ["ibili", -1, 1],
- ["ismi", -1, 1],
- ["usioni", -1, 4],
- ["azioni", -1, 2],
- ["uzioni", -1, 4],
- ["atori", -1, 2],
- ["osi", -1, 1],
- ["anti", -1, 1],
- ["amenti", -1, 6],
- ["imenti", -1, 6],
- ["isti", -1, 1],
- ["ivi", -1, 9],
- ["ico", -1, 1],
- ["ismo", -1, 1],
- ["oso", -1, 1],
- ["amento", -1, 6],
- ["imento", -1, 6],
- ["ivo", -1, 9],
- ["it\u00E0", -1, 8],
- ["ist\u00E0", -1, 1],
- ["ist\u00E8", -1, 1],
- ["ist\u00EC", -1, 1]
- ];
-
- /** @const */ var a_7 = [
- ["isca", -1, 1],
- ["enda", -1, 1],
- ["ata", -1, 1],
- ["ita", -1, 1],
- ["uta", -1, 1],
- ["ava", -1, 1],
- ["eva", -1, 1],
- ["iva", -1, 1],
- ["erebbe", -1, 1],
- ["irebbe", -1, 1],
- ["isce", -1, 1],
- ["ende", -1, 1],
- ["are", -1, 1],
- ["ere", -1, 1],
- ["ire", -1, 1],
- ["asse", -1, 1],
- ["ate", -1, 1],
- ["avate", 16, 1],
- ["evate", 16, 1],
- ["ivate", 16, 1],
- ["ete", -1, 1],
- ["erete", 20, 1],
- ["irete", 20, 1],
- ["ite", -1, 1],
- ["ereste", -1, 1],
- ["ireste", -1, 1],
- ["ute", -1, 1],
- ["erai", -1, 1],
- ["irai", -1, 1],
- ["isci", -1, 1],
- ["endi", -1, 1],
- ["erei", -1, 1],
- ["irei", -1, 1],
- ["assi", -1, 1],
- ["ati", -1, 1],
- ["iti", -1, 1],
- ["eresti", -1, 1],
- ["iresti", -1, 1],
- ["uti", -1, 1],
- ["avi", -1, 1],
- ["evi", -1, 1],
- ["ivi", -1, 1],
- ["isco", -1, 1],
- ["ando", -1, 1],
- ["endo", -1, 1],
- ["Yamo", -1, 1],
- ["iamo", -1, 1],
- ["avamo", -1, 1],
- ["evamo", -1, 1],
- ["ivamo", -1, 1],
- ["eremo", -1, 1],
- ["iremo", -1, 1],
- ["assimo", -1, 1],
- ["ammo", -1, 1],
- ["emmo", -1, 1],
- ["eremmo", 54, 1],
- ["iremmo", 54, 1],
- ["immo", -1, 1],
- ["ano", -1, 1],
- ["iscano", 58, 1],
- ["avano", 58, 1],
- ["evano", 58, 1],
- ["ivano", 58, 1],
- ["eranno", -1, 1],
- ["iranno", -1, 1],
- ["ono", -1, 1],
- ["iscono", 65, 1],
- ["arono", 65, 1],
- ["erono", 65, 1],
- ["irono", 65, 1],
- ["erebbero", -1, 1],
- ["irebbero", -1, 1],
- ["assero", -1, 1],
- ["essero", -1, 1],
- ["issero", -1, 1],
- ["ato", -1, 1],
- ["ito", -1, 1],
- ["uto", -1, 1],
- ["avo", -1, 1],
- ["evo", -1, 1],
- ["ivo", -1, 1],
- ["ar", -1, 1],
- ["ir", -1, 1],
- ["er\u00E0", -1, 1],
- ["ir\u00E0", -1, 1],
- ["er\u00F2", -1, 1],
- ["ir\u00F2", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1];
-
- /** @const */ var /** Array */ g_AEIO = [17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2];
-
- /** @const */ var /** Array */ g_CG = [17];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u00E0"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u00E8"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u00EC"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u00F2"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u00F9"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("qU"))
- {
- return false;
- }
- break;
- case 7:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- base.cursor = v_1;
- while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_4 = base.cursor;
- lab3: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab3;
- }
- base.bra = base.cursor;
- lab4: {
- var /** number */ v_5 = base.cursor;
- lab5: {
- if (!(base.eq_s("u")))
- {
- break lab5;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab5;
- }
- if (!base.slice_from("U"))
- {
- return false;
- }
- break lab4;
- }
- base.cursor = v_5;
- if (!(base.eq_s("i")))
- {
- break lab3;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab3;
- }
- if (!base.slice_from("I"))
- {
- return false;
- }
- }
- base.cursor = v_4;
- break golab2;
- }
- base.cursor = v_4;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_3;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab4;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab4;
- }
- base.cursor++;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab2;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- }
- break lab1;
- }
- base.cursor = v_2;
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab0;
- }
- lab9: {
- var /** number */ v_6 = base.cursor;
- lab10: {
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab10;
- }
- base.cursor++;
- }
- break lab9;
- }
- base.cursor = v_6;
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab0;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_8 = base.cursor;
- lab13: {
- golab14: while(true)
- {
- lab15: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab15;
- }
- break golab14;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab16: while(true)
- {
- lab17: {
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab17;
- }
- break golab16;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab18: while(true)
- {
- lab19: {
- if (!(base.in_grouping(g_v, 97, 249)))
- {
- break lab19;
- }
- break golab18;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab20: while(true)
- {
- lab21: {
- if (!(base.out_grouping(g_v, 97, 249)))
- {
- break lab21;
- }
- break golab20;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_8;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_attached_pronoun() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- if (!r_RV())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("ic")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 3:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 5:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("ente"))
- {
- return false;
- }
- break;
- case 6:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 7:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- break;
- case 8:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 9:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("ic")))
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- if (base.find_among_b(a_7) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_vowel_suffix() {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.in_grouping_b(g_AEIO, 97, 242)))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_RV())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("i")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_RV())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("h")))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!(base.in_grouping_b(g_CG, 99, 103)))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!r_RV())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_exceptions() {
- base.bra = base.cursor;
- if (!(base.eq_s("divano")))
- {
- return false;
- }
- if (base.cursor < base.limit)
- {
- return false;
- }
- base.ket = base.cursor;
- if (!base.slice_from("divan"))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- lab0: {
- var /** number */ v_1 = base.cursor;
- lab1: {
- if (!r_exceptions())
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- r_prelude();
- base.cursor = v_2;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_4 = base.limit - base.cursor;
- r_attached_pronoun();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- lab2: {
- lab3: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab4: {
- if (!r_standard_suffix())
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- if (!r_verb_suffix())
- {
- break lab2;
- }
- }
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_7 = base.limit - base.cursor;
- r_vowel_suffix();
- base.cursor = base.limit - v_7;
- base.cursor = base.limit_backward;
- var /** number */ v_8 = base.cursor;
- r_postlude();
- base.cursor = v_8;
- }
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['ItalianStemmer'] = ItalianStemmer;
diff --git a/js/lithuanian-stemmer.js b/js/lithuanian-stemmer.js
deleted file mode 100644
index 4d1d886..0000000
--- a/js/lithuanian-stemmer.js
+++ /dev/null
@@ -1,558 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var LithuanianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["a", -1, -1],
- ["ia", 0, -1],
- ["eria", 1, -1],
- ["osna", 0, -1],
- ["iosna", 3, -1],
- ["uosna", 3, -1],
- ["iuosna", 5, -1],
- ["ysna", 0, -1],
- ["\u0117sna", 0, -1],
- ["e", -1, -1],
- ["ie", 9, -1],
- ["enie", 10, -1],
- ["erie", 10, -1],
- ["oje", 9, -1],
- ["ioje", 13, -1],
- ["uje", 9, -1],
- ["iuje", 15, -1],
- ["yje", 9, -1],
- ["enyje", 17, -1],
- ["eryje", 17, -1],
- ["\u0117je", 9, -1],
- ["ame", 9, -1],
- ["iame", 21, -1],
- ["sime", 9, -1],
- ["ome", 9, -1],
- ["\u0117me", 9, -1],
- ["tum\u0117me", 25, -1],
- ["ose", 9, -1],
- ["iose", 27, -1],
- ["uose", 27, -1],
- ["iuose", 29, -1],
- ["yse", 9, -1],
- ["enyse", 31, -1],
- ["eryse", 31, -1],
- ["\u0117se", 9, -1],
- ["ate", 9, -1],
- ["iate", 35, -1],
- ["ite", 9, -1],
- ["kite", 37, -1],
- ["site", 37, -1],
- ["ote", 9, -1],
- ["tute", 9, -1],
- ["\u0117te", 9, -1],
- ["tum\u0117te", 42, -1],
- ["i", -1, -1],
- ["ai", 44, -1],
- ["iai", 45, -1],
- ["eriai", 46, -1],
- ["ei", 44, -1],
- ["tumei", 48, -1],
- ["ki", 44, -1],
- ["imi", 44, -1],
- ["erimi", 51, -1],
- ["umi", 44, -1],
- ["iumi", 53, -1],
- ["si", 44, -1],
- ["asi", 55, -1],
- ["iasi", 56, -1],
- ["esi", 55, -1],
- ["iesi", 58, -1],
- ["siesi", 59, -1],
- ["isi", 55, -1],
- ["aisi", 61, -1],
- ["eisi", 61, -1],
- ["tumeisi", 63, -1],
- ["uisi", 61, -1],
- ["osi", 55, -1],
- ["\u0117josi", 66, -1],
- ["uosi", 66, -1],
- ["iuosi", 68, -1],
- ["siuosi", 69, -1],
- ["usi", 55, -1],
- ["ausi", 71, -1],
- ["\u010Diausi", 72, -1],
- ["\u0105si", 55, -1],
- ["\u0117si", 55, -1],
- ["\u0173si", 55, -1],
- ["t\u0173si", 76, -1],
- ["ti", 44, -1],
- ["enti", 78, -1],
- ["inti", 78, -1],
- ["oti", 78, -1],
- ["ioti", 81, -1],
- ["uoti", 81, -1],
- ["iuoti", 83, -1],
- ["auti", 78, -1],
- ["iauti", 85, -1],
- ["yti", 78, -1],
- ["\u0117ti", 78, -1],
- ["tel\u0117ti", 88, -1],
- ["in\u0117ti", 88, -1],
- ["ter\u0117ti", 88, -1],
- ["ui", 44, -1],
- ["iui", 92, -1],
- ["eniui", 93, -1],
- ["oj", -1, -1],
- ["\u0117j", -1, -1],
- ["k", -1, -1],
- ["am", -1, -1],
- ["iam", 98, -1],
- ["iem", -1, -1],
- ["im", -1, -1],
- ["sim", 101, -1],
- ["om", -1, -1],
- ["tum", -1, -1],
- ["\u0117m", -1, -1],
- ["tum\u0117m", 105, -1],
- ["an", -1, -1],
- ["on", -1, -1],
- ["ion", 108, -1],
- ["un", -1, -1],
- ["iun", 110, -1],
- ["\u0117n", -1, -1],
- ["o", -1, -1],
- ["io", 113, -1],
- ["enio", 114, -1],
- ["\u0117jo", 113, -1],
- ["uo", 113, -1],
- ["s", -1, -1],
- ["as", 118, -1],
- ["ias", 119, -1],
- ["es", 118, -1],
- ["ies", 121, -1],
- ["is", 118, -1],
- ["ais", 123, -1],
- ["iais", 124, -1],
- ["tumeis", 123, -1],
- ["imis", 123, -1],
- ["enimis", 127, -1],
- ["omis", 123, -1],
- ["iomis", 129, -1],
- ["umis", 123, -1],
- ["\u0117mis", 123, -1],
- ["enis", 123, -1],
- ["asis", 123, -1],
- ["ysis", 123, -1],
- ["ams", 118, -1],
- ["iams", 136, -1],
- ["iems", 118, -1],
- ["ims", 118, -1],
- ["enims", 139, -1],
- ["erims", 139, -1],
- ["oms", 118, -1],
- ["ioms", 142, -1],
- ["ums", 118, -1],
- ["\u0117ms", 118, -1],
- ["ens", 118, -1],
- ["os", 118, -1],
- ["ios", 147, -1],
- ["uos", 147, -1],
- ["iuos", 149, -1],
- ["ers", 118, -1],
- ["us", 118, -1],
- ["aus", 152, -1],
- ["iaus", 153, -1],
- ["ius", 152, -1],
- ["ys", 118, -1],
- ["enys", 156, -1],
- ["erys", 156, -1],
- ["\u0105s", 118, -1],
- ["i\u0105s", 159, -1],
- ["\u0117s", 118, -1],
- ["am\u0117s", 161, -1],
- ["iam\u0117s", 162, -1],
- ["im\u0117s", 161, -1],
- ["kim\u0117s", 164, -1],
- ["sim\u0117s", 164, -1],
- ["om\u0117s", 161, -1],
- ["\u0117m\u0117s", 161, -1],
- ["tum\u0117m\u0117s", 168, -1],
- ["at\u0117s", 161, -1],
- ["iat\u0117s", 170, -1],
- ["sit\u0117s", 161, -1],
- ["ot\u0117s", 161, -1],
- ["\u0117t\u0117s", 161, -1],
- ["tum\u0117t\u0117s", 174, -1],
- ["\u012Fs", 118, -1],
- ["\u016Bs", 118, -1],
- ["t\u0173s", 118, -1],
- ["at", -1, -1],
- ["iat", 179, -1],
- ["it", -1, -1],
- ["sit", 181, -1],
- ["ot", -1, -1],
- ["\u0117t", -1, -1],
- ["tum\u0117t", 184, -1],
- ["u", -1, -1],
- ["au", 186, -1],
- ["iau", 187, -1],
- ["\u010Diau", 188, -1],
- ["iu", 186, -1],
- ["eniu", 190, -1],
- ["siu", 190, -1],
- ["y", -1, -1],
- ["\u0105", -1, -1],
- ["i\u0105", 194, -1],
- ["\u0117", -1, -1],
- ["\u0119", -1, -1],
- ["\u012F", -1, -1],
- ["en\u012F", 198, -1],
- ["er\u012F", 198, -1],
- ["\u0173", -1, -1],
- ["i\u0173", 201, -1],
- ["er\u0173", 201, -1]
- ];
-
- /** @const */ var a_1 = [
- ["ing", -1, -1],
- ["aj", -1, -1],
- ["iaj", 1, -1],
- ["iej", -1, -1],
- ["oj", -1, -1],
- ["ioj", 4, -1],
- ["uoj", 4, -1],
- ["iuoj", 6, -1],
- ["auj", -1, -1],
- ["\u0105j", -1, -1],
- ["i\u0105j", 9, -1],
- ["\u0117j", -1, -1],
- ["\u0173j", -1, -1],
- ["i\u0173j", 12, -1],
- ["ok", -1, -1],
- ["iok", 14, -1],
- ["iuk", -1, -1],
- ["uliuk", 16, -1],
- ["u\u010Diuk", 16, -1],
- ["i\u0161k", -1, -1],
- ["iul", -1, -1],
- ["yl", -1, -1],
- ["\u0117l", -1, -1],
- ["am", -1, -1],
- ["dam", 23, -1],
- ["jam", 23, -1],
- ["zgan", -1, -1],
- ["ain", -1, -1],
- ["esn", -1, -1],
- ["op", -1, -1],
- ["iop", 29, -1],
- ["ias", -1, -1],
- ["ies", -1, -1],
- ["ais", -1, -1],
- ["iais", 33, -1],
- ["os", -1, -1],
- ["ios", 35, -1],
- ["uos", 35, -1],
- ["iuos", 37, -1],
- ["aus", -1, -1],
- ["iaus", 39, -1],
- ["\u0105s", -1, -1],
- ["i\u0105s", 41, -1],
- ["\u0119s", -1, -1],
- ["ut\u0117ait", -1, -1],
- ["ant", -1, -1],
- ["iant", 45, -1],
- ["siant", 46, -1],
- ["int", -1, -1],
- ["ot", -1, -1],
- ["uot", 49, -1],
- ["iuot", 50, -1],
- ["yt", -1, -1],
- ["\u0117t", -1, -1],
- ["yk\u0161t", -1, -1],
- ["iau", -1, -1],
- ["dav", -1, -1],
- ["sv", -1, -1],
- ["\u0161v", -1, -1],
- ["yk\u0161\u010D", -1, -1],
- ["\u0119", -1, -1],
- ["\u0117j\u0119", 60, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ojime", -1, 7],
- ["\u0117jime", -1, 3],
- ["avime", -1, 6],
- ["okate", -1, 8],
- ["aite", -1, 1],
- ["uote", -1, 2],
- ["asius", -1, 5],
- ["okat\u0117s", -1, 8],
- ["ait\u0117s", -1, 1],
- ["uot\u0117s", -1, 2],
- ["esiu", -1, 4]
- ];
-
- /** @const */ var a_3 = [
- ["\u010D", -1, 1],
- ["d\u017E", -1, 2]
- ];
-
- /** @const */ var a_4 = [
- ["gd", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 64, 1, 0, 64, 0, 0, 0, 0, 0, 0, 0, 4, 4];
-
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_step1() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_0) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_step2() {
- while(true)
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (base.cursor < I_p1)
- {
- break lab0;
- }
- var /** number */ v_3 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- base.limit_backward = v_3;
- break lab0;
- }
- base.bra = base.cursor;
- base.limit_backward = v_3;
- if (!base.slice_del())
- {
- return false;
- }
- continue;
- }
- base.cursor = base.limit - v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_fix_conflicts() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("ait\u0117"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("uot\u0117"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u0117jimas"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("esys"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("asys"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("avimas"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("ojimas"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("okat\u0117"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_fix_chdz() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("d"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_fix_gd() {
- base.ket = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_from("g"))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- I_p1 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- var /** number */ v_2 = base.cursor;
- lab1: {
- var /** number */ v_3 = base.cursor;
- if (!(base.eq_s("a")))
- {
- base.cursor = v_2;
- break lab1;
- }
- base.cursor = v_3;
- if (base.current.length <= 6)
- {
- base.cursor = v_2;
- break lab1;
- }
- {
- var /** number */ c1 = base.cursor + 1;
- if (c1 > base.limit)
- {
- base.cursor = v_2;
- break lab1;
- }
- base.cursor = c1;
- }
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.in_grouping(g_v, 97, 371)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab4: while(true)
- {
- lab5: {
- if (!(base.out_grouping(g_v, 97, 371)))
- {
- break lab5;
- }
- break golab4;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- }
- base.cursor = v_1;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_6 = base.limit - base.cursor;
- r_fix_conflicts();
- base.cursor = base.limit - v_6;
- var /** number */ v_7 = base.limit - base.cursor;
- r_step1();
- base.cursor = base.limit - v_7;
- var /** number */ v_8 = base.limit - base.cursor;
- r_fix_chdz();
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- r_step2();
- base.cursor = base.limit - v_9;
- var /** number */ v_10 = base.limit - base.cursor;
- r_fix_chdz();
- base.cursor = base.limit - v_10;
- var /** number */ v_11 = base.limit - base.cursor;
- r_fix_gd();
- base.cursor = base.limit - v_11;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['LithuanianStemmer'] = LithuanianStemmer;
diff --git a/js/nepali-stemmer.js b/js/nepali-stemmer.js
deleted file mode 100644
index 500c7bd..0000000
--- a/js/nepali-stemmer.js
+++ /dev/null
@@ -1,319 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var NepaliStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0932\u093E\u0907", -1, 1],
- ["\u0932\u093E\u0908", -1, 1],
- ["\u0938\u0901\u0917", -1, 1],
- ["\u0938\u0902\u0917", -1, 1],
- ["\u092E\u093E\u0930\u094D\u092B\u0924", -1, 1],
- ["\u0930\u0924", -1, 1],
- ["\u0915\u093E", -1, 2],
- ["\u092E\u093E", -1, 1],
- ["\u0926\u094D\u0935\u093E\u0930\u093E", -1, 1],
- ["\u0915\u093F", -1, 2],
- ["\u092A\u091B\u093F", -1, 1],
- ["\u0915\u0940", -1, 2],
- ["\u0932\u0947", -1, 1],
- ["\u0915\u0948", -1, 2],
- ["\u0938\u0901\u0917\u0948", -1, 1],
- ["\u092E\u0948", -1, 1],
- ["\u0915\u094B", -1, 2]
- ];
-
- /** @const */ var a_1 = [
- ["\u0901", -1, -1],
- ["\u0902", -1, -1],
- ["\u0948", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["\u0901", -1, 1],
- ["\u0902", -1, 1],
- ["\u0948", -1, 2]
- ];
-
- /** @const */ var a_3 = [
- ["\u0925\u093F\u090F", -1, 1],
- ["\u091B", -1, 1],
- ["\u0907\u091B", 1, 1],
- ["\u090F\u091B", 1, 1],
- ["\u093F\u091B", 1, 1],
- ["\u0947\u091B", 1, 1],
- ["\u0928\u0947\u091B", 5, 1],
- ["\u0939\u0941\u0928\u0947\u091B", 6, 1],
- ["\u0907\u0928\u094D\u091B", 1, 1],
- ["\u093F\u0928\u094D\u091B", 1, 1],
- ["\u0939\u0941\u0928\u094D\u091B", 1, 1],
- ["\u090F\u0915\u093E", -1, 1],
- ["\u0907\u090F\u0915\u093E", 11, 1],
- ["\u093F\u090F\u0915\u093E", 11, 1],
- ["\u0947\u0915\u093E", -1, 1],
- ["\u0928\u0947\u0915\u093E", 14, 1],
- ["\u0926\u093E", -1, 1],
- ["\u0907\u0926\u093E", 16, 1],
- ["\u093F\u0926\u093E", 16, 1],
- ["\u0926\u0947\u0916\u093F", -1, 1],
- ["\u092E\u093E\u0925\u093F", -1, 1],
- ["\u090F\u0915\u0940", -1, 1],
- ["\u0907\u090F\u0915\u0940", 21, 1],
- ["\u093F\u090F\u0915\u0940", 21, 1],
- ["\u0947\u0915\u0940", -1, 1],
- ["\u0926\u0947\u0916\u0940", -1, 1],
- ["\u0925\u0940", -1, 1],
- ["\u0926\u0940", -1, 1],
- ["\u091B\u0941", -1, 1],
- ["\u090F\u091B\u0941", 28, 1],
- ["\u0947\u091B\u0941", 28, 1],
- ["\u0928\u0947\u091B\u0941", 30, 1],
- ["\u0928\u0941", -1, 1],
- ["\u0939\u0930\u0941", -1, 1],
- ["\u0939\u0930\u0942", -1, 1],
- ["\u091B\u0947", -1, 1],
- ["\u0925\u0947", -1, 1],
- ["\u0928\u0947", -1, 1],
- ["\u090F\u0915\u0948", -1, 1],
- ["\u0947\u0915\u0948", -1, 1],
- ["\u0928\u0947\u0915\u0948", 39, 1],
- ["\u0926\u0948", -1, 1],
- ["\u0907\u0926\u0948", 41, 1],
- ["\u093F\u0926\u0948", 41, 1],
- ["\u090F\u0915\u094B", -1, 1],
- ["\u0907\u090F\u0915\u094B", 44, 1],
- ["\u093F\u090F\u0915\u094B", 44, 1],
- ["\u0947\u0915\u094B", -1, 1],
- ["\u0928\u0947\u0915\u094B", 47, 1],
- ["\u0926\u094B", -1, 1],
- ["\u0907\u0926\u094B", 49, 1],
- ["\u093F\u0926\u094B", 49, 1],
- ["\u092F\u094B", -1, 1],
- ["\u0907\u092F\u094B", 52, 1],
- ["\u092D\u092F\u094B", 52, 1],
- ["\u093F\u092F\u094B", 52, 1],
- ["\u0925\u093F\u092F\u094B", 55, 1],
- ["\u0926\u093F\u092F\u094B", 55, 1],
- ["\u0925\u094D\u092F\u094B", 52, 1],
- ["\u091B\u094C", -1, 1],
- ["\u0907\u091B\u094C", 59, 1],
- ["\u090F\u091B\u094C", 59, 1],
- ["\u093F\u091B\u094C", 59, 1],
- ["\u0947\u091B\u094C", 59, 1],
- ["\u0928\u0947\u091B\u094C", 63, 1],
- ["\u092F\u094C", -1, 1],
- ["\u0925\u093F\u092F\u094C", 65, 1],
- ["\u091B\u094D\u092F\u094C", 65, 1],
- ["\u0925\u094D\u092F\u094C", 65, 1],
- ["\u091B\u0928\u094D", -1, 1],
- ["\u0907\u091B\u0928\u094D", 69, 1],
- ["\u090F\u091B\u0928\u094D", 69, 1],
- ["\u093F\u091B\u0928\u094D", 69, 1],
- ["\u0947\u091B\u0928\u094D", 69, 1],
- ["\u0928\u0947\u091B\u0928\u094D", 73, 1],
- ["\u0932\u093E\u0928\u094D", -1, 1],
- ["\u091B\u093F\u0928\u094D", -1, 1],
- ["\u0925\u093F\u0928\u094D", -1, 1],
- ["\u092A\u0930\u094D", -1, 1],
- ["\u0907\u0938\u094D", -1, 1],
- ["\u0925\u093F\u0907\u0938\u094D", 79, 1],
- ["\u091B\u0938\u094D", -1, 1],
- ["\u0907\u091B\u0938\u094D", 81, 1],
- ["\u090F\u091B\u0938\u094D", 81, 1],
- ["\u093F\u091B\u0938\u094D", 81, 1],
- ["\u0947\u091B\u0938\u094D", 81, 1],
- ["\u0928\u0947\u091B\u0938\u094D", 85, 1],
- ["\u093F\u0938\u094D", -1, 1],
- ["\u0925\u093F\u0938\u094D", 87, 1],
- ["\u091B\u0947\u0938\u094D", -1, 1],
- ["\u0939\u094B\u0938\u094D", -1, 1]
- ];
-
-
-
- /** @return {boolean} */
- function r_remove_category_1() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- lab2: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab3: {
- if (!(base.eq_s_b("\u090F")))
- {
- break lab3;
- }
- break lab2;
- }
- base.cursor = base.limit - v_2;
- if (!(base.eq_s_b("\u0947")))
- {
- break lab1;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_check_category_2() {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_category_2() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("\u092F\u094C")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab2: {
- if (!(base.eq_s_b("\u091B\u094C")))
- {
- break lab2;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab3: {
- if (!(base.eq_s_b("\u0928\u094C")))
- {
- break lab3;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("\u0925\u0947")))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.eq_s_b("\u0924\u094D\u0930")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_category_3() {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_1 = base.limit - base.cursor;
- r_remove_category_1();
- base.cursor = base.limit - v_1;
- var /** number */ v_2 = base.limit - base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_5 = base.limit - base.cursor;
- if (!r_check_category_2())
- {
- break lab2;
- }
- base.cursor = base.limit - v_5;
- if (!r_remove_category_2())
- {
- break lab2;
- }
- }
- base.cursor = base.limit - v_4;
- if (!r_remove_category_3())
- {
- break lab1;
- }
- continue;
- }
- base.cursor = base.limit - v_3;
- break;
- }
- }
- base.cursor = base.limit - v_2;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['NepaliStemmer'] = NepaliStemmer;
diff --git a/js/norwegian-stemmer.js b/js/norwegian-stemmer.js
deleted file mode 100644
index a037992..0000000
--- a/js/norwegian-stemmer.js
+++ /dev/null
@@ -1,263 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var NorwegianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["a", -1, 1],
- ["e", -1, 1],
- ["ede", 1, 1],
- ["ande", 1, 1],
- ["ende", 1, 1],
- ["ane", 1, 1],
- ["ene", 1, 1],
- ["hetene", 6, 1],
- ["erte", 1, 3],
- ["en", -1, 1],
- ["heten", 9, 1],
- ["ar", -1, 1],
- ["er", -1, 1],
- ["heter", 12, 1],
- ["s", -1, 2],
- ["as", 14, 1],
- ["es", 14, 1],
- ["edes", 16, 1],
- ["endes", 16, 1],
- ["enes", 16, 1],
- ["hetenes", 19, 1],
- ["ens", 14, 1],
- ["hetens", 21, 1],
- ["ers", 14, 1],
- ["ets", 14, 1],
- ["et", -1, 1],
- ["het", 25, 1],
- ["ert", -1, 3],
- ["ast", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["dt", -1, -1],
- ["vt", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["leg", -1, 1],
- ["eleg", 0, 1],
- ["ig", -1, 1],
- ["eig", 2, 1],
- ["lig", 2, 1],
- ["elig", 4, 1],
- ["els", -1, 1],
- ["lov", -1, 1],
- ["elov", 7, 1],
- ["slov", 7, 1],
- ["hetslov", 9, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128];
-
- /** @const */ var /** Array */ g_s_ending = [119, 125, 149, 1];
-
- var /** number */ I_x = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- var /** number */ v_1 = base.cursor;
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- return false;
- }
- base.cursor = c1;
- }
- I_x = base.cursor;
- base.cursor = v_1;
- golab0: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_v, 97, 248)))
- {
- break lab1;
- }
- base.cursor = v_2;
- break golab0;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_v, 97, 248)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- lab4: {
- if (I_p1 >= I_x)
- {
- break lab4;
- }
- I_p1 = I_x;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_main_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- lab0: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- if (!(base.in_grouping_b(g_s_ending, 98, 122)))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_3;
- if (!(base.eq_s_b("k")))
- {
- return false;
- }
- if (!(base.out_grouping_b(g_v, 97, 248)))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("er"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_consonant_pair() {
- var /** number */ v_1 = base.limit - base.cursor;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_3 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- base.limit_backward = v_3;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_3;
- base.cursor = base.limit - v_1;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_other_suffix() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_mark_regions();
- base.cursor = v_1;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_main_suffix();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_consonant_pair();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_other_suffix();
- base.cursor = base.limit - v_4;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['NorwegianStemmer'] = NorwegianStemmer;
diff --git a/js/porter-stemmer.js b/js/porter-stemmer.js
deleted file mode 100644
index 8a1fcc8..0000000
--- a/js/porter-stemmer.js
+++ /dev/null
@@ -1,739 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var PorterStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["s", -1, 3],
- ["ies", 0, 2],
- ["sses", 0, 1],
- ["ss", 0, -1]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 3],
- ["bb", 0, 2],
- ["dd", 0, 2],
- ["ff", 0, 2],
- ["gg", 0, 2],
- ["bl", 0, 1],
- ["mm", 0, 2],
- ["nn", 0, 2],
- ["pp", 0, 2],
- ["rr", 0, 2],
- ["at", 0, 1],
- ["tt", 0, 2],
- ["iz", 0, 1]
- ];
-
- /** @const */ var a_2 = [
- ["ed", -1, 2],
- ["eed", 0, 1],
- ["ing", -1, 2]
- ];
-
- /** @const */ var a_3 = [
- ["anci", -1, 3],
- ["enci", -1, 2],
- ["abli", -1, 4],
- ["eli", -1, 6],
- ["alli", -1, 9],
- ["ousli", -1, 11],
- ["entli", -1, 5],
- ["aliti", -1, 9],
- ["biliti", -1, 13],
- ["iviti", -1, 12],
- ["tional", -1, 1],
- ["ational", 10, 8],
- ["alism", -1, 9],
- ["ation", -1, 8],
- ["ization", 13, 7],
- ["izer", -1, 7],
- ["ator", -1, 8],
- ["iveness", -1, 12],
- ["fulness", -1, 10],
- ["ousness", -1, 11]
- ];
-
- /** @const */ var a_4 = [
- ["icate", -1, 2],
- ["ative", -1, 3],
- ["alize", -1, 1],
- ["iciti", -1, 2],
- ["ical", -1, 2],
- ["ful", -1, 3],
- ["ness", -1, 3]
- ];
-
- /** @const */ var a_5 = [
- ["ic", -1, 1],
- ["ance", -1, 1],
- ["ence", -1, 1],
- ["able", -1, 1],
- ["ible", -1, 1],
- ["ate", -1, 1],
- ["ive", -1, 1],
- ["ize", -1, 1],
- ["iti", -1, 1],
- ["al", -1, 1],
- ["ism", -1, 1],
- ["ion", -1, 2],
- ["er", -1, 1],
- ["ous", -1, 1],
- ["ant", -1, 1],
- ["ent", -1, 1],
- ["ment", 15, 1],
- ["ement", 16, 1],
- ["ou", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1];
-
- /** @const */ var /** Array */ g_v_WXY = [1, 17, 65, 208, 1];
-
- var /** boolean */ B_Y_found = false;
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_shortv() {
- if (!(base.out_grouping_b(g_v_WXY, 89, 121)))
- {
- return false;
- }
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- return false;
- }
- if (!(base.out_grouping_b(g_v, 97, 121)))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_Step_1a() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("ss"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_1b() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from("ee"))
- {
- return false;
- }
- break;
- case 2:
- var /** number */ v_1 = base.limit - base.cursor;
- golab0: while(true)
- {
- lab1: {
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- break lab1;
- }
- break golab0;
- }
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- base.cursor = base.limit - v_1;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- among_var = base.find_among_b(a_1);
- base.cursor = base.limit - v_3;
- switch (among_var) {
- case 1:
- {
- var /** number */ c1 = base.cursor;
- base.insert(base.cursor, base.cursor, "e");
- base.cursor = c1;
- }
- break;
- case 2:
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor != I_p1)
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- if (!r_shortv())
- {
- return false;
- }
- base.cursor = base.limit - v_4;
- {
- var /** number */ c2 = base.cursor;
- base.insert(base.cursor, base.cursor, "e");
- base.cursor = c2;
- }
- break;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_1c() {
- base.ket = base.cursor;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("y")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("Y")))
- {
- return false;
- }
- }
- base.bra = base.cursor;
- golab2: while(true)
- {
- lab3: {
- if (!(base.in_grouping_b(g_v, 97, 121)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_2() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("tion"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("ence"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("ance"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("able"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("ent"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("ize"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("ate"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("al"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("ful"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("ous"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("ive"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("ble"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_3() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("al"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("ic"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_4() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("s")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("t")))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_5a() {
- base.ket = base.cursor;
- if (!(base.eq_s_b("e")))
- {
- return false;
- }
- base.bra = base.cursor;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!r_R2())
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!r_R1())
- {
- return false;
- }
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!r_shortv())
- {
- break lab2;
- }
- return false;
- }
- base.cursor = base.limit - v_2;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_5b() {
- base.ket = base.cursor;
- if (!(base.eq_s_b("l")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- if (!(base.eq_s_b("l")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- B_Y_found = false;
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- break lab0;
- }
- base.ket = base.cursor;
- if (!base.slice_from("Y"))
- {
- return false;
- }
- B_Y_found = true;
- }
- base.cursor = v_1;
- var /** number */ v_2 = base.cursor;
- lab1: {
- while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab2: {
- golab3: while(true)
- {
- var /** number */ v_4 = base.cursor;
- lab4: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab4;
- }
- base.bra = base.cursor;
- if (!(base.eq_s("y")))
- {
- break lab4;
- }
- base.ket = base.cursor;
- base.cursor = v_4;
- break golab3;
- }
- base.cursor = v_4;
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- if (!base.slice_from("Y"))
- {
- return false;
- }
- B_Y_found = true;
- continue;
- }
- base.cursor = v_3;
- break;
- }
- }
- base.cursor = v_2;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_5 = base.cursor;
- lab5: {
- golab6: while(true)
- {
- lab7: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab7;
- }
- break golab6;
- }
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- }
- golab8: while(true)
- {
- lab9: {
- if (!(base.out_grouping(g_v, 97, 121)))
- {
- break lab9;
- }
- break golab8;
- }
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab10: while(true)
- {
- lab11: {
- if (!(base.in_grouping(g_v, 97, 121)))
- {
- break lab11;
- }
- break golab10;
- }
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- }
- golab12: while(true)
- {
- lab13: {
- if (!(base.out_grouping(g_v, 97, 121)))
- {
- break lab13;
- }
- break golab12;
- }
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_5;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_10 = base.limit - base.cursor;
- r_Step_1a();
- base.cursor = base.limit - v_10;
- var /** number */ v_11 = base.limit - base.cursor;
- r_Step_1b();
- base.cursor = base.limit - v_11;
- var /** number */ v_12 = base.limit - base.cursor;
- r_Step_1c();
- base.cursor = base.limit - v_12;
- var /** number */ v_13 = base.limit - base.cursor;
- r_Step_2();
- base.cursor = base.limit - v_13;
- var /** number */ v_14 = base.limit - base.cursor;
- r_Step_3();
- base.cursor = base.limit - v_14;
- var /** number */ v_15 = base.limit - base.cursor;
- r_Step_4();
- base.cursor = base.limit - v_15;
- var /** number */ v_16 = base.limit - base.cursor;
- r_Step_5a();
- base.cursor = base.limit - v_16;
- var /** number */ v_17 = base.limit - base.cursor;
- r_Step_5b();
- base.cursor = base.limit - v_17;
- base.cursor = base.limit_backward;
- var /** number */ v_18 = base.cursor;
- lab14: {
- if (!B_Y_found)
- {
- break lab14;
- }
- while(true)
- {
- var /** number */ v_19 = base.cursor;
- lab15: {
- golab16: while(true)
- {
- var /** number */ v_20 = base.cursor;
- lab17: {
- base.bra = base.cursor;
- if (!(base.eq_s("Y")))
- {
- break lab17;
- }
- base.ket = base.cursor;
- base.cursor = v_20;
- break golab16;
- }
- base.cursor = v_20;
- if (base.cursor >= base.limit)
- {
- break lab15;
- }
- base.cursor++;
- }
- if (!base.slice_from("y"))
- {
- return false;
- }
- continue;
- }
- base.cursor = v_19;
- break;
- }
- }
- base.cursor = v_18;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['PorterStemmer'] = PorterStemmer;
diff --git a/js/portuguese-stemmer.js b/js/portuguese-stemmer.js
deleted file mode 100644
index ef5bcfc..0000000
--- a/js/portuguese-stemmer.js
+++ /dev/null
@@ -1,896 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var PortugueseStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 3],
- ["\u00E3", 0, 1],
- ["\u00F5", 0, 2]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 3],
- ["a~", 0, 1],
- ["o~", 0, 2]
- ];
-
- /** @const */ var a_2 = [
- ["ic", -1, -1],
- ["ad", -1, -1],
- ["os", -1, -1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_3 = [
- ["ante", -1, 1],
- ["avel", -1, 1],
- ["\u00EDvel", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["ic", -1, 1],
- ["abil", -1, 1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["ica", -1, 1],
- ["\u00E2ncia", -1, 1],
- ["\u00EAncia", -1, 4],
- ["logia", -1, 2],
- ["ira", -1, 9],
- ["adora", -1, 1],
- ["osa", -1, 1],
- ["ista", -1, 1],
- ["iva", -1, 8],
- ["eza", -1, 1],
- ["idade", -1, 7],
- ["ante", -1, 1],
- ["mente", -1, 6],
- ["amente", 12, 5],
- ["\u00E1vel", -1, 1],
- ["\u00EDvel", -1, 1],
- ["ico", -1, 1],
- ["ismo", -1, 1],
- ["oso", -1, 1],
- ["amento", -1, 1],
- ["imento", -1, 1],
- ["ivo", -1, 8],
- ["a\u00E7a~o", -1, 1],
- ["u\u00E7a~o", -1, 3],
- ["ador", -1, 1],
- ["icas", -1, 1],
- ["\u00EAncias", -1, 4],
- ["logias", -1, 2],
- ["iras", -1, 9],
- ["adoras", -1, 1],
- ["osas", -1, 1],
- ["istas", -1, 1],
- ["ivas", -1, 8],
- ["ezas", -1, 1],
- ["idades", -1, 7],
- ["adores", -1, 1],
- ["antes", -1, 1],
- ["a\u00E7o~es", -1, 1],
- ["u\u00E7o~es", -1, 3],
- ["icos", -1, 1],
- ["ismos", -1, 1],
- ["osos", -1, 1],
- ["amentos", -1, 1],
- ["imentos", -1, 1],
- ["ivos", -1, 8]
- ];
-
- /** @const */ var a_6 = [
- ["ada", -1, 1],
- ["ida", -1, 1],
- ["ia", -1, 1],
- ["aria", 2, 1],
- ["eria", 2, 1],
- ["iria", 2, 1],
- ["ara", -1, 1],
- ["era", -1, 1],
- ["ira", -1, 1],
- ["ava", -1, 1],
- ["asse", -1, 1],
- ["esse", -1, 1],
- ["isse", -1, 1],
- ["aste", -1, 1],
- ["este", -1, 1],
- ["iste", -1, 1],
- ["ei", -1, 1],
- ["arei", 16, 1],
- ["erei", 16, 1],
- ["irei", 16, 1],
- ["am", -1, 1],
- ["iam", 20, 1],
- ["ariam", 21, 1],
- ["eriam", 21, 1],
- ["iriam", 21, 1],
- ["aram", 20, 1],
- ["eram", 20, 1],
- ["iram", 20, 1],
- ["avam", 20, 1],
- ["em", -1, 1],
- ["arem", 29, 1],
- ["erem", 29, 1],
- ["irem", 29, 1],
- ["assem", 29, 1],
- ["essem", 29, 1],
- ["issem", 29, 1],
- ["ado", -1, 1],
- ["ido", -1, 1],
- ["ando", -1, 1],
- ["endo", -1, 1],
- ["indo", -1, 1],
- ["ara~o", -1, 1],
- ["era~o", -1, 1],
- ["ira~o", -1, 1],
- ["ar", -1, 1],
- ["er", -1, 1],
- ["ir", -1, 1],
- ["as", -1, 1],
- ["adas", 47, 1],
- ["idas", 47, 1],
- ["ias", 47, 1],
- ["arias", 50, 1],
- ["erias", 50, 1],
- ["irias", 50, 1],
- ["aras", 47, 1],
- ["eras", 47, 1],
- ["iras", 47, 1],
- ["avas", 47, 1],
- ["es", -1, 1],
- ["ardes", 58, 1],
- ["erdes", 58, 1],
- ["irdes", 58, 1],
- ["ares", 58, 1],
- ["eres", 58, 1],
- ["ires", 58, 1],
- ["asses", 58, 1],
- ["esses", 58, 1],
- ["isses", 58, 1],
- ["astes", 58, 1],
- ["estes", 58, 1],
- ["istes", 58, 1],
- ["is", -1, 1],
- ["ais", 71, 1],
- ["eis", 71, 1],
- ["areis", 73, 1],
- ["ereis", 73, 1],
- ["ireis", 73, 1],
- ["\u00E1reis", 73, 1],
- ["\u00E9reis", 73, 1],
- ["\u00EDreis", 73, 1],
- ["\u00E1sseis", 73, 1],
- ["\u00E9sseis", 73, 1],
- ["\u00EDsseis", 73, 1],
- ["\u00E1veis", 73, 1],
- ["\u00EDeis", 73, 1],
- ["ar\u00EDeis", 84, 1],
- ["er\u00EDeis", 84, 1],
- ["ir\u00EDeis", 84, 1],
- ["ados", -1, 1],
- ["idos", -1, 1],
- ["amos", -1, 1],
- ["\u00E1ramos", 90, 1],
- ["\u00E9ramos", 90, 1],
- ["\u00EDramos", 90, 1],
- ["\u00E1vamos", 90, 1],
- ["\u00EDamos", 90, 1],
- ["ar\u00EDamos", 95, 1],
- ["er\u00EDamos", 95, 1],
- ["ir\u00EDamos", 95, 1],
- ["emos", -1, 1],
- ["aremos", 99, 1],
- ["eremos", 99, 1],
- ["iremos", 99, 1],
- ["\u00E1ssemos", 99, 1],
- ["\u00EAssemos", 99, 1],
- ["\u00EDssemos", 99, 1],
- ["imos", -1, 1],
- ["armos", -1, 1],
- ["ermos", -1, 1],
- ["irmos", -1, 1],
- ["\u00E1mos", -1, 1],
- ["ar\u00E1s", -1, 1],
- ["er\u00E1s", -1, 1],
- ["ir\u00E1s", -1, 1],
- ["eu", -1, 1],
- ["iu", -1, 1],
- ["ou", -1, 1],
- ["ar\u00E1", -1, 1],
- ["er\u00E1", -1, 1],
- ["ir\u00E1", -1, 1]
- ];
-
- /** @const */ var a_7 = [
- ["a", -1, 1],
- ["i", -1, 1],
- ["o", -1, 1],
- ["os", -1, 1],
- ["\u00E1", -1, 1],
- ["\u00ED", -1, 1],
- ["\u00F3", -1, 1]
- ];
-
- /** @const */ var a_8 = [
- ["e", -1, 1],
- ["\u00E7", -1, 2],
- ["\u00E9", -1, 1],
- ["\u00EA", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("a~"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("o~"))
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab4;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab4;
- }
- base.cursor++;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab2;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- }
- break lab1;
- }
- base.cursor = v_2;
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab0;
- }
- lab9: {
- var /** number */ v_6 = base.cursor;
- lab10: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab10;
- }
- base.cursor++;
- }
- break lab9;
- }
- base.cursor = v_6;
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab0;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_8 = base.cursor;
- lab13: {
- golab14: while(true)
- {
- lab15: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab15;
- }
- break golab14;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab16: while(true)
- {
- lab17: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab17;
- }
- break golab16;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab18: while(true)
- {
- lab19: {
- if (!(base.in_grouping(g_v, 97, 250)))
- {
- break lab19;
- }
- break golab18;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab20: while(true)
- {
- lab21: {
- if (!(base.out_grouping(g_v, 97, 250)))
- {
- break lab21;
- }
- break golab20;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_8;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u00E3"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u00F5"))
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 3:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("ente"))
- {
- return false;
- }
- break;
- case 5:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- break;
- case 6:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 7:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 8:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 9:
- if (!r_RV())
- {
- return false;
- }
- if (!(base.eq_s_b("e")))
- {
- return false;
- }
- if (!base.slice_from("ir"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- if (base.find_among_b(a_6) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_residual_suffix() {
- base.ket = base.cursor;
- if (base.find_among_b(a_7) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_residual_form() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_8);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("u")))
- {
- break lab1;
- }
- base.bra = base.cursor;
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.eq_s_b("g")))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("i")))
- {
- return false;
- }
- base.bra = base.cursor;
- var /** number */ v_3 = base.limit - base.cursor;
- if (!(base.eq_s_b("c")))
- {
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("c"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_prelude();
- base.cursor = v_1;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab3: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab4: {
- if (!r_standard_suffix())
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = base.limit - v_6;
- if (!r_verb_suffix())
- {
- break lab2;
- }
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_7 = base.limit - base.cursor;
- lab5: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("i")))
- {
- break lab5;
- }
- base.bra = base.cursor;
- var /** number */ v_8 = base.limit - base.cursor;
- if (!(base.eq_s_b("c")))
- {
- break lab5;
- }
- base.cursor = base.limit - v_8;
- if (!r_RV())
- {
- break lab5;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- base.cursor = base.limit - v_7;
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!r_residual_suffix())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_9 = base.limit - base.cursor;
- r_residual_form();
- base.cursor = base.limit - v_9;
- base.cursor = base.limit_backward;
- var /** number */ v_10 = base.cursor;
- r_postlude();
- base.cursor = v_10;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['PortugueseStemmer'] = PortugueseStemmer;
diff --git a/js/romanian-stemmer.js b/js/romanian-stemmer.js
deleted file mode 100644
index 995e3b6..0000000
--- a/js/romanian-stemmer.js
+++ /dev/null
@@ -1,918 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var RomanianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u015F", -1, 1],
- ["\u0163", -1, 2]
- ];
-
- /** @const */ var a_1 = [
- ["", -1, 3],
- ["I", 0, 1],
- ["U", 0, 2]
- ];
-
- /** @const */ var a_2 = [
- ["ea", -1, 3],
- ["a\u021Bia", -1, 7],
- ["aua", -1, 2],
- ["iua", -1, 4],
- ["a\u021Bie", -1, 7],
- ["ele", -1, 3],
- ["ile", -1, 5],
- ["iile", 6, 4],
- ["iei", -1, 4],
- ["atei", -1, 6],
- ["ii", -1, 4],
- ["ului", -1, 1],
- ["ul", -1, 1],
- ["elor", -1, 3],
- ["ilor", -1, 4],
- ["iilor", 14, 4]
- ];
-
- /** @const */ var a_3 = [
- ["icala", -1, 4],
- ["iciva", -1, 4],
- ["ativa", -1, 5],
- ["itiva", -1, 6],
- ["icale", -1, 4],
- ["a\u021Biune", -1, 5],
- ["i\u021Biune", -1, 6],
- ["atoare", -1, 5],
- ["itoare", -1, 6],
- ["\u0103toare", -1, 5],
- ["icitate", -1, 4],
- ["abilitate", -1, 1],
- ["ibilitate", -1, 2],
- ["ivitate", -1, 3],
- ["icive", -1, 4],
- ["ative", -1, 5],
- ["itive", -1, 6],
- ["icali", -1, 4],
- ["atori", -1, 5],
- ["icatori", 18, 4],
- ["itori", -1, 6],
- ["\u0103tori", -1, 5],
- ["icitati", -1, 4],
- ["abilitati", -1, 1],
- ["ivitati", -1, 3],
- ["icivi", -1, 4],
- ["ativi", -1, 5],
- ["itivi", -1, 6],
- ["icit\u0103i", -1, 4],
- ["abilit\u0103i", -1, 1],
- ["ivit\u0103i", -1, 3],
- ["icit\u0103\u021Bi", -1, 4],
- ["abilit\u0103\u021Bi", -1, 1],
- ["ivit\u0103\u021Bi", -1, 3],
- ["ical", -1, 4],
- ["ator", -1, 5],
- ["icator", 35, 4],
- ["itor", -1, 6],
- ["\u0103tor", -1, 5],
- ["iciv", -1, 4],
- ["ativ", -1, 5],
- ["itiv", -1, 6],
- ["ical\u0103", -1, 4],
- ["iciv\u0103", -1, 4],
- ["ativ\u0103", -1, 5],
- ["itiv\u0103", -1, 6]
- ];
-
- /** @const */ var a_4 = [
- ["ica", -1, 1],
- ["abila", -1, 1],
- ["ibila", -1, 1],
- ["oasa", -1, 1],
- ["ata", -1, 1],
- ["ita", -1, 1],
- ["anta", -1, 1],
- ["ista", -1, 3],
- ["uta", -1, 1],
- ["iva", -1, 1],
- ["ic", -1, 1],
- ["ice", -1, 1],
- ["abile", -1, 1],
- ["ibile", -1, 1],
- ["isme", -1, 3],
- ["iune", -1, 2],
- ["oase", -1, 1],
- ["ate", -1, 1],
- ["itate", 17, 1],
- ["ite", -1, 1],
- ["ante", -1, 1],
- ["iste", -1, 3],
- ["ute", -1, 1],
- ["ive", -1, 1],
- ["ici", -1, 1],
- ["abili", -1, 1],
- ["ibili", -1, 1],
- ["iuni", -1, 2],
- ["atori", -1, 1],
- ["osi", -1, 1],
- ["ati", -1, 1],
- ["itati", 30, 1],
- ["iti", -1, 1],
- ["anti", -1, 1],
- ["isti", -1, 3],
- ["uti", -1, 1],
- ["i\u0219ti", -1, 3],
- ["ivi", -1, 1],
- ["it\u0103i", -1, 1],
- ["o\u0219i", -1, 1],
- ["it\u0103\u021Bi", -1, 1],
- ["abil", -1, 1],
- ["ibil", -1, 1],
- ["ism", -1, 3],
- ["ator", -1, 1],
- ["os", -1, 1],
- ["at", -1, 1],
- ["it", -1, 1],
- ["ant", -1, 1],
- ["ist", -1, 3],
- ["ut", -1, 1],
- ["iv", -1, 1],
- ["ic\u0103", -1, 1],
- ["abil\u0103", -1, 1],
- ["ibil\u0103", -1, 1],
- ["oas\u0103", -1, 1],
- ["at\u0103", -1, 1],
- ["it\u0103", -1, 1],
- ["ant\u0103", -1, 1],
- ["ist\u0103", -1, 3],
- ["ut\u0103", -1, 1],
- ["iv\u0103", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["ea", -1, 1],
- ["ia", -1, 1],
- ["esc", -1, 1],
- ["\u0103sc", -1, 1],
- ["ind", -1, 1],
- ["\u00E2nd", -1, 1],
- ["are", -1, 1],
- ["ere", -1, 1],
- ["ire", -1, 1],
- ["\u00E2re", -1, 1],
- ["se", -1, 2],
- ["ase", 10, 1],
- ["sese", 10, 2],
- ["ise", 10, 1],
- ["use", 10, 1],
- ["\u00E2se", 10, 1],
- ["e\u0219te", -1, 1],
- ["\u0103\u0219te", -1, 1],
- ["eze", -1, 1],
- ["ai", -1, 1],
- ["eai", 19, 1],
- ["iai", 19, 1],
- ["sei", -1, 2],
- ["e\u0219ti", -1, 1],
- ["\u0103\u0219ti", -1, 1],
- ["ui", -1, 1],
- ["ezi", -1, 1],
- ["\u00E2i", -1, 1],
- ["a\u0219i", -1, 1],
- ["se\u0219i", -1, 2],
- ["ase\u0219i", 29, 1],
- ["sese\u0219i", 29, 2],
- ["ise\u0219i", 29, 1],
- ["use\u0219i", 29, 1],
- ["\u00E2se\u0219i", 29, 1],
- ["i\u0219i", -1, 1],
- ["u\u0219i", -1, 1],
- ["\u00E2\u0219i", -1, 1],
- ["a\u021Bi", -1, 2],
- ["ea\u021Bi", 38, 1],
- ["ia\u021Bi", 38, 1],
- ["e\u021Bi", -1, 2],
- ["i\u021Bi", -1, 2],
- ["\u00E2\u021Bi", -1, 2],
- ["ar\u0103\u021Bi", -1, 1],
- ["ser\u0103\u021Bi", -1, 2],
- ["aser\u0103\u021Bi", 45, 1],
- ["seser\u0103\u021Bi", 45, 2],
- ["iser\u0103\u021Bi", 45, 1],
- ["user\u0103\u021Bi", 45, 1],
- ["\u00E2ser\u0103\u021Bi", 45, 1],
- ["ir\u0103\u021Bi", -1, 1],
- ["ur\u0103\u021Bi", -1, 1],
- ["\u00E2r\u0103\u021Bi", -1, 1],
- ["am", -1, 1],
- ["eam", 54, 1],
- ["iam", 54, 1],
- ["em", -1, 2],
- ["asem", 57, 1],
- ["sesem", 57, 2],
- ["isem", 57, 1],
- ["usem", 57, 1],
- ["\u00E2sem", 57, 1],
- ["im", -1, 2],
- ["\u00E2m", -1, 2],
- ["\u0103m", -1, 2],
- ["ar\u0103m", 65, 1],
- ["ser\u0103m", 65, 2],
- ["aser\u0103m", 67, 1],
- ["seser\u0103m", 67, 2],
- ["iser\u0103m", 67, 1],
- ["user\u0103m", 67, 1],
- ["\u00E2ser\u0103m", 67, 1],
- ["ir\u0103m", 65, 1],
- ["ur\u0103m", 65, 1],
- ["\u00E2r\u0103m", 65, 1],
- ["au", -1, 1],
- ["eau", 76, 1],
- ["iau", 76, 1],
- ["indu", -1, 1],
- ["\u00E2ndu", -1, 1],
- ["ez", -1, 1],
- ["easc\u0103", -1, 1],
- ["ar\u0103", -1, 1],
- ["ser\u0103", -1, 2],
- ["aser\u0103", 84, 1],
- ["seser\u0103", 84, 2],
- ["iser\u0103", 84, 1],
- ["user\u0103", 84, 1],
- ["\u00E2ser\u0103", 84, 1],
- ["ir\u0103", -1, 1],
- ["ur\u0103", -1, 1],
- ["\u00E2r\u0103", -1, 1],
- ["eaz\u0103", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["a", -1, 1],
- ["e", -1, 1],
- ["ie", 1, 1],
- ["i", -1, 1],
- ["\u0103", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4];
-
- var /** boolean */ B_standard_suffix_removed = false;
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_norm() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab3: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- break lab3;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u0219"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u021B"))
- {
- return false;
- }
- break;
- }
- base.cursor = v_3;
- break golab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_prelude() {
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab2;
- }
- base.bra = base.cursor;
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.eq_s("u")))
- {
- break lab4;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab4;
- }
- if (!base.slice_from("U"))
- {
- return false;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.eq_s("i")))
- {
- break lab2;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab2;
- }
- if (!base.slice_from("I"))
- {
- return false;
- }
- }
- base.cursor = v_2;
- break golab1;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab4;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab4;
- }
- base.cursor++;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab2;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- }
- break lab1;
- }
- base.cursor = v_2;
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab0;
- }
- lab9: {
- var /** number */ v_6 = base.cursor;
- lab10: {
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab10;
- }
- base.cursor++;
- }
- break lab9;
- }
- base.cursor = v_6;
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab0;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_8 = base.cursor;
- lab13: {
- golab14: while(true)
- {
- lab15: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab15;
- }
- break golab14;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab16: while(true)
- {
- lab17: {
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab17;
- }
- break golab16;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab18: while(true)
- {
- lab19: {
- if (!(base.in_grouping(g_v, 97, 259)))
- {
- break lab19;
- }
- break golab18;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab20: while(true)
- {
- lab21: {
- if (!(base.out_grouping(g_v, 97, 259)))
- {
- break lab21;
- }
- break golab20;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_8;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_1);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 3:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_step_0() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 5:
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("ab")))
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_1;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("at"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("a\u021Bi"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_combo_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("abil"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("ibil"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("iv"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("ic"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("at"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("it"))
- {
- return false;
- }
- break;
- }
- B_standard_suffix_removed = true;
- base.cursor = base.limit - v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- B_standard_suffix_removed = false;
- while(true)
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!r_combo_suffix())
- {
- break lab0;
- }
- continue;
- }
- base.cursor = base.limit - v_1;
- break;
- }
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.eq_s_b("\u021B")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("ist"))
- {
- return false;
- }
- break;
- }
- B_standard_suffix_removed = true;
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- if (!(base.out_grouping_b(g_v, 97, 259)))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_3;
- if (!(base.eq_s_b("u")))
- {
- base.limit_backward = v_2;
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_vowel_suffix() {
- base.ket = base.cursor;
- if (base.find_among_b(a_6) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_norm();
- var /** number */ v_2 = base.cursor;
- r_prelude();
- base.cursor = v_2;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_4 = base.limit - base.cursor;
- r_step_0();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- r_standard_suffix();
- base.cursor = base.limit - v_5;
- var /** number */ v_6 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab2: {
- if (!B_standard_suffix_removed)
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_7;
- if (!r_verb_suffix())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_6;
- var /** number */ v_8 = base.limit - base.cursor;
- r_vowel_suffix();
- base.cursor = base.limit - v_8;
- base.cursor = base.limit_backward;
- var /** number */ v_9 = base.cursor;
- r_postlude();
- base.cursor = v_9;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['RomanianStemmer'] = RomanianStemmer;
diff --git a/js/russian-stemmer.js b/js/russian-stemmer.js
deleted file mode 100644
index 1f2aafb..0000000
--- a/js/russian-stemmer.js
+++ /dev/null
@@ -1,622 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var RussianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0432", -1, 1],
- ["\u0438\u0432", 0, 2],
- ["\u044B\u0432", 0, 2],
- ["\u0432\u0448\u0438", -1, 1],
- ["\u0438\u0432\u0448\u0438", 3, 2],
- ["\u044B\u0432\u0448\u0438", 3, 2],
- ["\u0432\u0448\u0438\u0441\u044C", -1, 1],
- ["\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2],
- ["\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2]
- ];
-
- /** @const */ var a_1 = [
- ["\u0435\u0435", -1, 1],
- ["\u0438\u0435", -1, 1],
- ["\u043E\u0435", -1, 1],
- ["\u044B\u0435", -1, 1],
- ["\u0438\u043C\u0438", -1, 1],
- ["\u044B\u043C\u0438", -1, 1],
- ["\u0435\u0439", -1, 1],
- ["\u0438\u0439", -1, 1],
- ["\u043E\u0439", -1, 1],
- ["\u044B\u0439", -1, 1],
- ["\u0435\u043C", -1, 1],
- ["\u0438\u043C", -1, 1],
- ["\u043E\u043C", -1, 1],
- ["\u044B\u043C", -1, 1],
- ["\u0435\u0433\u043E", -1, 1],
- ["\u043E\u0433\u043E", -1, 1],
- ["\u0435\u043C\u0443", -1, 1],
- ["\u043E\u043C\u0443", -1, 1],
- ["\u0438\u0445", -1, 1],
- ["\u044B\u0445", -1, 1],
- ["\u0435\u044E", -1, 1],
- ["\u043E\u044E", -1, 1],
- ["\u0443\u044E", -1, 1],
- ["\u044E\u044E", -1, 1],
- ["\u0430\u044F", -1, 1],
- ["\u044F\u044F", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["\u0435\u043C", -1, 1],
- ["\u043D\u043D", -1, 1],
- ["\u0432\u0448", -1, 1],
- ["\u0438\u0432\u0448", 2, 2],
- ["\u044B\u0432\u0448", 2, 2],
- ["\u0449", -1, 1],
- ["\u044E\u0449", 5, 1],
- ["\u0443\u044E\u0449", 6, 2]
- ];
-
- /** @const */ var a_3 = [
- ["\u0441\u044C", -1, 1],
- ["\u0441\u044F", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["\u043B\u0430", -1, 1],
- ["\u0438\u043B\u0430", 0, 2],
- ["\u044B\u043B\u0430", 0, 2],
- ["\u043D\u0430", -1, 1],
- ["\u0435\u043D\u0430", 3, 2],
- ["\u0435\u0442\u0435", -1, 1],
- ["\u0438\u0442\u0435", -1, 2],
- ["\u0439\u0442\u0435", -1, 1],
- ["\u0435\u0439\u0442\u0435", 7, 2],
- ["\u0443\u0439\u0442\u0435", 7, 2],
- ["\u043B\u0438", -1, 1],
- ["\u0438\u043B\u0438", 10, 2],
- ["\u044B\u043B\u0438", 10, 2],
- ["\u0439", -1, 1],
- ["\u0435\u0439", 13, 2],
- ["\u0443\u0439", 13, 2],
- ["\u043B", -1, 1],
- ["\u0438\u043B", 16, 2],
- ["\u044B\u043B", 16, 2],
- ["\u0435\u043C", -1, 1],
- ["\u0438\u043C", -1, 2],
- ["\u044B\u043C", -1, 2],
- ["\u043D", -1, 1],
- ["\u0435\u043D", 22, 2],
- ["\u043B\u043E", -1, 1],
- ["\u0438\u043B\u043E", 24, 2],
- ["\u044B\u043B\u043E", 24, 2],
- ["\u043D\u043E", -1, 1],
- ["\u0435\u043D\u043E", 27, 2],
- ["\u043D\u043D\u043E", 27, 1],
- ["\u0435\u0442", -1, 1],
- ["\u0443\u0435\u0442", 30, 2],
- ["\u0438\u0442", -1, 2],
- ["\u044B\u0442", -1, 2],
- ["\u044E\u0442", -1, 1],
- ["\u0443\u044E\u0442", 34, 2],
- ["\u044F\u0442", -1, 2],
- ["\u043D\u044B", -1, 1],
- ["\u0435\u043D\u044B", 37, 2],
- ["\u0442\u044C", -1, 1],
- ["\u0438\u0442\u044C", 39, 2],
- ["\u044B\u0442\u044C", 39, 2],
- ["\u0435\u0448\u044C", -1, 1],
- ["\u0438\u0448\u044C", -1, 2],
- ["\u044E", -1, 2],
- ["\u0443\u044E", 44, 2]
- ];
-
- /** @const */ var a_5 = [
- ["\u0430", -1, 1],
- ["\u0435\u0432", -1, 1],
- ["\u043E\u0432", -1, 1],
- ["\u0435", -1, 1],
- ["\u0438\u0435", 3, 1],
- ["\u044C\u0435", 3, 1],
- ["\u0438", -1, 1],
- ["\u0435\u0438", 6, 1],
- ["\u0438\u0438", 6, 1],
- ["\u0430\u043C\u0438", 6, 1],
- ["\u044F\u043C\u0438", 6, 1],
- ["\u0438\u044F\u043C\u0438", 10, 1],
- ["\u0439", -1, 1],
- ["\u0435\u0439", 12, 1],
- ["\u0438\u0435\u0439", 13, 1],
- ["\u0438\u0439", 12, 1],
- ["\u043E\u0439", 12, 1],
- ["\u0430\u043C", -1, 1],
- ["\u0435\u043C", -1, 1],
- ["\u0438\u0435\u043C", 18, 1],
- ["\u043E\u043C", -1, 1],
- ["\u044F\u043C", -1, 1],
- ["\u0438\u044F\u043C", 21, 1],
- ["\u043E", -1, 1],
- ["\u0443", -1, 1],
- ["\u0430\u0445", -1, 1],
- ["\u044F\u0445", -1, 1],
- ["\u0438\u044F\u0445", 26, 1],
- ["\u044B", -1, 1],
- ["\u044C", -1, 1],
- ["\u044E", -1, 1],
- ["\u0438\u044E", 30, 1],
- ["\u044C\u044E", 30, 1],
- ["\u044F", -1, 1],
- ["\u0438\u044F", 33, 1],
- ["\u044C\u044F", 33, 1]
- ];
-
- /** @const */ var a_6 = [
- ["\u043E\u0441\u0442", -1, 1],
- ["\u043E\u0441\u0442\u044C", -1, 1]
- ];
-
- /** @const */ var a_7 = [
- ["\u0435\u0439\u0448\u0435", -1, 1],
- ["\u043D", -1, 2],
- ["\u0435\u0439\u0448", -1, 1],
- ["\u044C", -1, 3]
- ];
-
- /** @const */ var /** Array */ g_v = [33, 65, 8, 232];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- lab2: {
- if (!(base.in_grouping(g_v, 1072, 1103)))
- {
- break lab2;
- }
- break golab1;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_pV = base.cursor;
- golab3: while(true)
- {
- lab4: {
- if (!(base.out_grouping(g_v, 1072, 1103)))
- {
- break lab4;
- }
- break golab3;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 1072, 1103)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 1072, 1103)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_perfective_gerund() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("\u0430")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("\u044F")))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_adjective() {
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_adjectival() {
- var /** number */ among_var;
- if (!r_adjective())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("\u0430")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- if (!(base.eq_s_b("\u044F")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_reflexive() {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("\u0430")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("\u044F")))
- {
- return false;
- }
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_noun() {
- base.ket = base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_derivational() {
- base.ket = base.cursor;
- if (base.find_among_b(a_6) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_tidy_up() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_7);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u043D")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u043D")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.eq_s_b("\u043D")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab3: {
- base.bra = base.cursor;
- if (!(base.eq_s("\u0451")))
- {
- break lab3;
- }
- base.ket = base.cursor;
- base.cursor = v_3;
- break golab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- if (!base.slice_from("\u0435"))
- {
- return false;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_6 = base.limit_backward;
- base.limit_backward = I_pV;
- var /** number */ v_7 = base.limit - base.cursor;
- lab4: {
- lab5: {
- var /** number */ v_8 = base.limit - base.cursor;
- lab6: {
- if (!r_perfective_gerund())
- {
- break lab6;
- }
- break lab5;
- }
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- lab7: {
- if (!r_reflexive())
- {
- base.cursor = base.limit - v_9;
- break lab7;
- }
- }
- lab8: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab9: {
- if (!r_adjectival())
- {
- break lab9;
- }
- break lab8;
- }
- base.cursor = base.limit - v_10;
- lab10: {
- if (!r_verb())
- {
- break lab10;
- }
- break lab8;
- }
- base.cursor = base.limit - v_10;
- if (!r_noun())
- {
- break lab4;
- }
- }
- }
- }
- base.cursor = base.limit - v_7;
- var /** number */ v_11 = base.limit - base.cursor;
- lab11: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u0438")))
- {
- base.cursor = base.limit - v_11;
- break lab11;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- var /** number */ v_12 = base.limit - base.cursor;
- r_derivational();
- base.cursor = base.limit - v_12;
- var /** number */ v_13 = base.limit - base.cursor;
- r_tidy_up();
- base.cursor = base.limit - v_13;
- base.limit_backward = v_6;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['RussianStemmer'] = RussianStemmer;
diff --git a/js/serbian-stemmer.js b/js/serbian-stemmer.js
deleted file mode 100644
index f7c8c56..0000000
--- a/js/serbian-stemmer.js
+++ /dev/null
@@ -1,4557 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var SerbianStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0430", -1, 1],
- ["\u0431", -1, 2],
- ["\u0432", -1, 3],
- ["\u0433", -1, 4],
- ["\u0434", -1, 5],
- ["\u0435", -1, 7],
- ["\u0436", -1, 8],
- ["\u0437", -1, 9],
- ["\u0438", -1, 10],
- ["\u043A", -1, 12],
- ["\u043B", -1, 13],
- ["\u043C", -1, 15],
- ["\u043D", -1, 16],
- ["\u043E", -1, 18],
- ["\u043F", -1, 19],
- ["\u0440", -1, 20],
- ["\u0441", -1, 21],
- ["\u0442", -1, 22],
- ["\u0443", -1, 24],
- ["\u0444", -1, 25],
- ["\u0445", -1, 26],
- ["\u0446", -1, 27],
- ["\u0447", -1, 28],
- ["\u0448", -1, 30],
- ["\u0452", -1, 6],
- ["\u0458", -1, 11],
- ["\u0459", -1, 14],
- ["\u045A", -1, 17],
- ["\u045B", -1, 23],
- ["\u045F", -1, 29]
- ];
-
- /** @const */ var a_1 = [
- ["daba", -1, 73],
- ["ajaca", -1, 12],
- ["ejaca", -1, 14],
- ["ljaca", -1, 13],
- ["njaca", -1, 85],
- ["ojaca", -1, 15],
- ["alaca", -1, 82],
- ["elaca", -1, 83],
- ["olaca", -1, 84],
- ["maca", -1, 75],
- ["naca", -1, 76],
- ["raca", -1, 81],
- ["saca", -1, 80],
- ["vaca", -1, 79],
- ["\u0161aca", -1, 18],
- ["aoca", -1, 82],
- ["acaka", -1, 55],
- ["ajaka", -1, 16],
- ["ojaka", -1, 17],
- ["anaka", -1, 78],
- ["ataka", -1, 58],
- ["etaka", -1, 59],
- ["itaka", -1, 60],
- ["otaka", -1, 61],
- ["utaka", -1, 62],
- ["a\u010Daka", -1, 54],
- ["esama", -1, 67],
- ["izama", -1, 87],
- ["jacima", -1, 5],
- ["nicima", -1, 23],
- ["ticima", -1, 24],
- ["teticima", 30, 21],
- ["zicima", -1, 25],
- ["atcima", -1, 58],
- ["utcima", -1, 62],
- ["\u010Dcima", -1, 74],
- ["pesima", -1, 2],
- ["inzima", -1, 19],
- ["lozima", -1, 1],
- ["metara", -1, 68],
- ["centara", -1, 69],
- ["istara", -1, 70],
- ["ekata", -1, 86],
- ["anata", -1, 53],
- ["nstava", -1, 22],
- ["kustava", -1, 29],
- ["ajac", -1, 12],
- ["ejac", -1, 14],
- ["ljac", -1, 13],
- ["njac", -1, 85],
- ["anjac", 49, 11],
- ["ojac", -1, 15],
- ["alac", -1, 82],
- ["elac", -1, 83],
- ["olac", -1, 84],
- ["mac", -1, 75],
- ["nac", -1, 76],
- ["rac", -1, 81],
- ["sac", -1, 80],
- ["vac", -1, 79],
- ["\u0161ac", -1, 18],
- ["jebe", -1, 88],
- ["olce", -1, 84],
- ["kuse", -1, 27],
- ["rave", -1, 42],
- ["save", -1, 52],
- ["\u0161ave", -1, 51],
- ["baci", -1, 89],
- ["jaci", -1, 5],
- ["tvenici", -1, 20],
- ["snici", -1, 26],
- ["tetici", -1, 21],
- ["bojci", -1, 4],
- ["vojci", -1, 3],
- ["ojsci", -1, 66],
- ["atci", -1, 58],
- ["itci", -1, 60],
- ["utci", -1, 62],
- ["\u010Dci", -1, 74],
- ["pesi", -1, 2],
- ["inzi", -1, 19],
- ["lozi", -1, 1],
- ["acak", -1, 55],
- ["usak", -1, 57],
- ["atak", -1, 58],
- ["etak", -1, 59],
- ["itak", -1, 60],
- ["otak", -1, 61],
- ["utak", -1, 62],
- ["a\u010Dak", -1, 54],
- ["u\u0161ak", -1, 56],
- ["izam", -1, 87],
- ["tican", -1, 65],
- ["cajan", -1, 7],
- ["\u010Dajan", -1, 6],
- ["voljan", -1, 77],
- ["eskan", -1, 63],
- ["alan", -1, 40],
- ["bilan", -1, 33],
- ["gilan", -1, 37],
- ["nilan", -1, 39],
- ["rilan", -1, 38],
- ["silan", -1, 36],
- ["tilan", -1, 34],
- ["avilan", -1, 35],
- ["laran", -1, 9],
- ["eran", -1, 8],
- ["asan", -1, 91],
- ["esan", -1, 10],
- ["dusan", -1, 31],
- ["kusan", -1, 28],
- ["atan", -1, 47],
- ["pletan", -1, 50],
- ["tetan", -1, 49],
- ["antan", -1, 32],
- ["pravan", -1, 44],
- ["stavan", -1, 43],
- ["sivan", -1, 46],
- ["tivan", -1, 45],
- ["ozan", -1, 41],
- ["ti\u010Dan", -1, 64],
- ["a\u0161an", -1, 90],
- ["du\u0161an", -1, 30],
- ["metar", -1, 68],
- ["centar", -1, 69],
- ["istar", -1, 70],
- ["ekat", -1, 86],
- ["enat", -1, 48],
- ["oscu", -1, 72],
- ["o\u0161\u0107u", -1, 71]
- ];
-
- /** @const */ var a_2 = [
- ["aca", -1, 124],
- ["eca", -1, 125],
- ["uca", -1, 126],
- ["ga", -1, 20],
- ["acega", 3, 124],
- ["ecega", 3, 125],
- ["ucega", 3, 126],
- ["anjijega", 3, 84],
- ["enjijega", 3, 85],
- ["snjijega", 3, 122],
- ["\u0161njijega", 3, 86],
- ["kijega", 3, 95],
- ["skijega", 11, 1],
- ["\u0161kijega", 11, 2],
- ["elijega", 3, 83],
- ["nijega", 3, 13],
- ["osijega", 3, 123],
- ["atijega", 3, 120],
- ["evitijega", 3, 92],
- ["ovitijega", 3, 93],
- ["astijega", 3, 94],
- ["avijega", 3, 77],
- ["evijega", 3, 78],
- ["ivijega", 3, 79],
- ["ovijega", 3, 80],
- ["o\u0161ijega", 3, 91],
- ["anjega", 3, 84],
- ["enjega", 3, 85],
- ["snjega", 3, 122],
- ["\u0161njega", 3, 86],
- ["kega", 3, 95],
- ["skega", 30, 1],
- ["\u0161kega", 30, 2],
- ["elega", 3, 83],
- ["nega", 3, 13],
- ["anega", 34, 10],
- ["enega", 34, 87],
- ["snega", 34, 159],
- ["\u0161nega", 34, 88],
- ["osega", 3, 123],
- ["atega", 3, 120],
- ["evitega", 3, 92],
- ["ovitega", 3, 93],
- ["astega", 3, 94],
- ["avega", 3, 77],
- ["evega", 3, 78],
- ["ivega", 3, 79],
- ["ovega", 3, 80],
- ["a\u0107ega", 3, 14],
- ["e\u0107ega", 3, 15],
- ["u\u0107ega", 3, 16],
- ["o\u0161ega", 3, 91],
- ["acoga", 3, 124],
- ["ecoga", 3, 125],
- ["ucoga", 3, 126],
- ["anjoga", 3, 84],
- ["enjoga", 3, 85],
- ["snjoga", 3, 122],
- ["\u0161njoga", 3, 86],
- ["koga", 3, 95],
- ["skoga", 59, 1],
- ["\u0161koga", 59, 2],
- ["loga", 3, 19],
- ["eloga", 62, 83],
- ["noga", 3, 13],
- ["cinoga", 64, 137],
- ["\u010Dinoga", 64, 89],
- ["osoga", 3, 123],
- ["atoga", 3, 120],
- ["evitoga", 3, 92],
- ["ovitoga", 3, 93],
- ["astoga", 3, 94],
- ["avoga", 3, 77],
- ["evoga", 3, 78],
- ["ivoga", 3, 79],
- ["ovoga", 3, 80],
- ["a\u0107oga", 3, 14],
- ["e\u0107oga", 3, 15],
- ["u\u0107oga", 3, 16],
- ["o\u0161oga", 3, 91],
- ["uga", 3, 18],
- ["aja", -1, 109],
- ["caja", 81, 26],
- ["laja", 81, 30],
- ["raja", 81, 31],
- ["\u0107aja", 81, 28],
- ["\u010Daja", 81, 27],
- ["\u0111aja", 81, 29],
- ["bija", -1, 32],
- ["cija", -1, 33],
- ["dija", -1, 34],
- ["fija", -1, 40],
- ["gija", -1, 39],
- ["anjija", -1, 84],
- ["enjija", -1, 85],
- ["snjija", -1, 122],
- ["\u0161njija", -1, 86],
- ["kija", -1, 95],
- ["skija", 97, 1],
- ["\u0161kija", 97, 2],
- ["lija", -1, 24],
- ["elija", 100, 83],
- ["mija", -1, 37],
- ["nija", -1, 13],
- ["ganija", 103, 9],
- ["manija", 103, 6],
- ["panija", 103, 7],
- ["ranija", 103, 8],
- ["tanija", 103, 5],
- ["pija", -1, 41],
- ["rija", -1, 42],
- ["rarija", 110, 21],
- ["sija", -1, 23],
- ["osija", 112, 123],
- ["tija", -1, 44],
- ["atija", 114, 120],
- ["evitija", 114, 92],
- ["ovitija", 114, 93],
- ["otija", 114, 22],
- ["astija", 114, 94],
- ["avija", -1, 77],
- ["evija", -1, 78],
- ["ivija", -1, 79],
- ["ovija", -1, 80],
- ["zija", -1, 45],
- ["o\u0161ija", -1, 91],
- ["\u017Eija", -1, 38],
- ["anja", -1, 84],
- ["enja", -1, 85],
- ["snja", -1, 122],
- ["\u0161nja", -1, 86],
- ["ka", -1, 95],
- ["ska", 131, 1],
- ["\u0161ka", 131, 2],
- ["ala", -1, 104],
- ["acala", 134, 128],
- ["astajala", 134, 106],
- ["istajala", 134, 107],
- ["ostajala", 134, 108],
- ["ijala", 134, 47],
- ["injala", 134, 114],
- ["nala", 134, 46],
- ["irala", 134, 100],
- ["urala", 134, 105],
- ["tala", 134, 113],
- ["astala", 144, 110],
- ["istala", 144, 111],
- ["ostala", 144, 112],
- ["avala", 134, 97],
- ["evala", 134, 96],
- ["ivala", 134, 98],
- ["ovala", 134, 76],
- ["uvala", 134, 99],
- ["a\u010Dala", 134, 102],
- ["ela", -1, 83],
- ["ila", -1, 116],
- ["acila", 155, 124],
- ["lucila", 155, 121],
- ["nila", 155, 103],
- ["astanila", 158, 110],
- ["istanila", 158, 111],
- ["ostanila", 158, 112],
- ["rosila", 155, 127],
- ["jetila", 155, 118],
- ["ozila", 155, 48],
- ["a\u010Dila", 155, 101],
- ["lu\u010Dila", 155, 117],
- ["ro\u0161ila", 155, 90],
- ["ola", -1, 50],
- ["asla", -1, 115],
- ["nula", -1, 13],
- ["gama", -1, 20],
- ["logama", 171, 19],
- ["ugama", 171, 18],
- ["ajama", -1, 109],
- ["cajama", 174, 26],
- ["lajama", 174, 30],
- ["rajama", 174, 31],
- ["\u0107ajama", 174, 28],
- ["\u010Dajama", 174, 27],
- ["\u0111ajama", 174, 29],
- ["bijama", -1, 32],
- ["cijama", -1, 33],
- ["dijama", -1, 34],
- ["fijama", -1, 40],
- ["gijama", -1, 39],
- ["lijama", -1, 35],
- ["mijama", -1, 37],
- ["nijama", -1, 36],
- ["ganijama", 188, 9],
- ["manijama", 188, 6],
- ["panijama", 188, 7],
- ["ranijama", 188, 8],
- ["tanijama", 188, 5],
- ["pijama", -1, 41],
- ["rijama", -1, 42],
- ["sijama", -1, 43],
- ["tijama", -1, 44],
- ["zijama", -1, 45],
- ["\u017Eijama", -1, 38],
- ["alama", -1, 104],
- ["ijalama", 200, 47],
- ["nalama", 200, 46],
- ["elama", -1, 119],
- ["ilama", -1, 116],
- ["ramama", -1, 52],
- ["lemama", -1, 51],
- ["inama", -1, 11],
- ["cinama", 207, 137],
- ["\u010Dinama", 207, 89],
- ["rama", -1, 52],
- ["arama", 210, 53],
- ["drama", 210, 54],
- ["erama", 210, 55],
- ["orama", 210, 56],
- ["basama", -1, 135],
- ["gasama", -1, 131],
- ["jasama", -1, 129],
- ["kasama", -1, 133],
- ["nasama", -1, 132],
- ["tasama", -1, 130],
- ["vasama", -1, 134],
- ["esama", -1, 152],
- ["isama", -1, 154],
- ["etama", -1, 70],
- ["estama", -1, 71],
- ["istama", -1, 72],
- ["kstama", -1, 73],
- ["ostama", -1, 74],
- ["avama", -1, 77],
- ["evama", -1, 78],
- ["ivama", -1, 79],
- ["ba\u0161ama", -1, 63],
- ["ga\u0161ama", -1, 64],
- ["ja\u0161ama", -1, 61],
- ["ka\u0161ama", -1, 62],
- ["na\u0161ama", -1, 60],
- ["ta\u0161ama", -1, 59],
- ["va\u0161ama", -1, 65],
- ["e\u0161ama", -1, 66],
- ["i\u0161ama", -1, 67],
- ["lema", -1, 51],
- ["acima", -1, 124],
- ["ecima", -1, 125],
- ["ucima", -1, 126],
- ["ajima", -1, 109],
- ["cajima", 245, 26],
- ["lajima", 245, 30],
- ["rajima", 245, 31],
- ["\u0107ajima", 245, 28],
- ["\u010Dajima", 245, 27],
- ["\u0111ajima", 245, 29],
- ["bijima", -1, 32],
- ["cijima", -1, 33],
- ["dijima", -1, 34],
- ["fijima", -1, 40],
- ["gijima", -1, 39],
- ["anjijima", -1, 84],
- ["enjijima", -1, 85],
- ["snjijima", -1, 122],
- ["\u0161njijima", -1, 86],
- ["kijima", -1, 95],
- ["skijima", 261, 1],
- ["\u0161kijima", 261, 2],
- ["lijima", -1, 35],
- ["elijima", 264, 83],
- ["mijima", -1, 37],
- ["nijima", -1, 13],
- ["ganijima", 267, 9],
- ["manijima", 267, 6],
- ["panijima", 267, 7],
- ["ranijima", 267, 8],
- ["tanijima", 267, 5],
- ["pijima", -1, 41],
- ["rijima", -1, 42],
- ["sijima", -1, 43],
- ["osijima", 275, 123],
- ["tijima", -1, 44],
- ["atijima", 277, 120],
- ["evitijima", 277, 92],
- ["ovitijima", 277, 93],
- ["astijima", 277, 94],
- ["avijima", -1, 77],
- ["evijima", -1, 78],
- ["ivijima", -1, 79],
- ["ovijima", -1, 80],
- ["zijima", -1, 45],
- ["o\u0161ijima", -1, 91],
- ["\u017Eijima", -1, 38],
- ["anjima", -1, 84],
- ["enjima", -1, 85],
- ["snjima", -1, 122],
- ["\u0161njima", -1, 86],
- ["kima", -1, 95],
- ["skima", 293, 1],
- ["\u0161kima", 293, 2],
- ["alima", -1, 104],
- ["ijalima", 296, 47],
- ["nalima", 296, 46],
- ["elima", -1, 83],
- ["ilima", -1, 116],
- ["ozilima", 300, 48],
- ["olima", -1, 50],
- ["lemima", -1, 51],
- ["nima", -1, 13],
- ["anima", 304, 10],
- ["inima", 304, 11],
- ["cinima", 306, 137],
- ["\u010Dinima", 306, 89],
- ["onima", 304, 12],
- ["arima", -1, 53],
- ["drima", -1, 54],
- ["erima", -1, 55],
- ["orima", -1, 56],
- ["basima", -1, 135],
- ["gasima", -1, 131],
- ["jasima", -1, 129],
- ["kasima", -1, 133],
- ["nasima", -1, 132],
- ["tasima", -1, 130],
- ["vasima", -1, 134],
- ["esima", -1, 57],
- ["isima", -1, 58],
- ["osima", -1, 123],
- ["atima", -1, 120],
- ["ikatima", 324, 68],
- ["latima", 324, 69],
- ["etima", -1, 70],
- ["evitima", -1, 92],
- ["ovitima", -1, 93],
- ["astima", -1, 94],
- ["estima", -1, 71],
- ["istima", -1, 72],
- ["kstima", -1, 73],
- ["ostima", -1, 74],
- ["i\u0161tima", -1, 75],
- ["avima", -1, 77],
- ["evima", -1, 78],
- ["ajevima", 337, 109],
- ["cajevima", 338, 26],
- ["lajevima", 338, 30],
- ["rajevima", 338, 31],
- ["\u0107ajevima", 338, 28],
- ["\u010Dajevima", 338, 27],
- ["\u0111ajevima", 338, 29],
- ["ivima", -1, 79],
- ["ovima", -1, 80],
- ["govima", 346, 20],
- ["ugovima", 347, 17],
- ["lovima", 346, 82],
- ["olovima", 349, 49],
- ["movima", 346, 81],
- ["onovima", 346, 12],
- ["stvima", -1, 3],
- ["\u0161tvima", -1, 4],
- ["a\u0107ima", -1, 14],
- ["e\u0107ima", -1, 15],
- ["u\u0107ima", -1, 16],
- ["ba\u0161ima", -1, 63],
- ["ga\u0161ima", -1, 64],
- ["ja\u0161ima", -1, 61],
- ["ka\u0161ima", -1, 62],
- ["na\u0161ima", -1, 60],
- ["ta\u0161ima", -1, 59],
- ["va\u0161ima", -1, 65],
- ["e\u0161ima", -1, 66],
- ["i\u0161ima", -1, 67],
- ["o\u0161ima", -1, 91],
- ["na", -1, 13],
- ["ana", 368, 10],
- ["acana", 369, 128],
- ["urana", 369, 105],
- ["tana", 369, 113],
- ["avana", 369, 97],
- ["evana", 369, 96],
- ["ivana", 369, 98],
- ["uvana", 369, 99],
- ["a\u010Dana", 369, 102],
- ["acena", 368, 124],
- ["lucena", 368, 121],
- ["a\u010Dena", 368, 101],
- ["lu\u010Dena", 368, 117],
- ["ina", 368, 11],
- ["cina", 382, 137],
- ["anina", 382, 10],
- ["\u010Dina", 382, 89],
- ["ona", 368, 12],
- ["ara", -1, 53],
- ["dra", -1, 54],
- ["era", -1, 55],
- ["ora", -1, 56],
- ["basa", -1, 135],
- ["gasa", -1, 131],
- ["jasa", -1, 129],
- ["kasa", -1, 133],
- ["nasa", -1, 132],
- ["tasa", -1, 130],
- ["vasa", -1, 134],
- ["esa", -1, 57],
- ["isa", -1, 58],
- ["osa", -1, 123],
- ["ata", -1, 120],
- ["ikata", 401, 68],
- ["lata", 401, 69],
- ["eta", -1, 70],
- ["evita", -1, 92],
- ["ovita", -1, 93],
- ["asta", -1, 94],
- ["esta", -1, 71],
- ["ista", -1, 72],
- ["ksta", -1, 73],
- ["osta", -1, 74],
- ["nuta", -1, 13],
- ["i\u0161ta", -1, 75],
- ["ava", -1, 77],
- ["eva", -1, 78],
- ["ajeva", 415, 109],
- ["cajeva", 416, 26],
- ["lajeva", 416, 30],
- ["rajeva", 416, 31],
- ["\u0107ajeva", 416, 28],
- ["\u010Dajeva", 416, 27],
- ["\u0111ajeva", 416, 29],
- ["iva", -1, 79],
- ["ova", -1, 80],
- ["gova", 424, 20],
- ["ugova", 425, 17],
- ["lova", 424, 82],
- ["olova", 427, 49],
- ["mova", 424, 81],
- ["onova", 424, 12],
- ["stva", -1, 3],
- ["\u0161tva", -1, 4],
- ["a\u0107a", -1, 14],
- ["e\u0107a", -1, 15],
- ["u\u0107a", -1, 16],
- ["ba\u0161a", -1, 63],
- ["ga\u0161a", -1, 64],
- ["ja\u0161a", -1, 61],
- ["ka\u0161a", -1, 62],
- ["na\u0161a", -1, 60],
- ["ta\u0161a", -1, 59],
- ["va\u0161a", -1, 65],
- ["e\u0161a", -1, 66],
- ["i\u0161a", -1, 67],
- ["o\u0161a", -1, 91],
- ["ace", -1, 124],
- ["ece", -1, 125],
- ["uce", -1, 126],
- ["luce", 448, 121],
- ["astade", -1, 110],
- ["istade", -1, 111],
- ["ostade", -1, 112],
- ["ge", -1, 20],
- ["loge", 453, 19],
- ["uge", 453, 18],
- ["aje", -1, 104],
- ["caje", 456, 26],
- ["laje", 456, 30],
- ["raje", 456, 31],
- ["astaje", 456, 106],
- ["istaje", 456, 107],
- ["ostaje", 456, 108],
- ["\u0107aje", 456, 28],
- ["\u010Daje", 456, 27],
- ["\u0111aje", 456, 29],
- ["ije", -1, 116],
- ["bije", 466, 32],
- ["cije", 466, 33],
- ["dije", 466, 34],
- ["fije", 466, 40],
- ["gije", 466, 39],
- ["anjije", 466, 84],
- ["enjije", 466, 85],
- ["snjije", 466, 122],
- ["\u0161njije", 466, 86],
- ["kije", 466, 95],
- ["skije", 476, 1],
- ["\u0161kije", 476, 2],
- ["lije", 466, 35],
- ["elije", 479, 83],
- ["mije", 466, 37],
- ["nije", 466, 13],
- ["ganije", 482, 9],
- ["manije", 482, 6],
- ["panije", 482, 7],
- ["ranije", 482, 8],
- ["tanije", 482, 5],
- ["pije", 466, 41],
- ["rije", 466, 42],
- ["sije", 466, 43],
- ["osije", 490, 123],
- ["tije", 466, 44],
- ["atije", 492, 120],
- ["evitije", 492, 92],
- ["ovitije", 492, 93],
- ["astije", 492, 94],
- ["avije", 466, 77],
- ["evije", 466, 78],
- ["ivije", 466, 79],
- ["ovije", 466, 80],
- ["zije", 466, 45],
- ["o\u0161ije", 466, 91],
- ["\u017Eije", 466, 38],
- ["anje", -1, 84],
- ["enje", -1, 85],
- ["snje", -1, 122],
- ["\u0161nje", -1, 86],
- ["uje", -1, 25],
- ["lucuje", 508, 121],
- ["iruje", 508, 100],
- ["lu\u010Duje", 508, 117],
- ["ke", -1, 95],
- ["ske", 512, 1],
- ["\u0161ke", 512, 2],
- ["ale", -1, 104],
- ["acale", 515, 128],
- ["astajale", 515, 106],
- ["istajale", 515, 107],
- ["ostajale", 515, 108],
- ["ijale", 515, 47],
- ["injale", 515, 114],
- ["nale", 515, 46],
- ["irale", 515, 100],
- ["urale", 515, 105],
- ["tale", 515, 113],
- ["astale", 525, 110],
- ["istale", 525, 111],
- ["ostale", 525, 112],
- ["avale", 515, 97],
- ["evale", 515, 96],
- ["ivale", 515, 98],
- ["ovale", 515, 76],
- ["uvale", 515, 99],
- ["a\u010Dale", 515, 102],
- ["ele", -1, 83],
- ["ile", -1, 116],
- ["acile", 536, 124],
- ["lucile", 536, 121],
- ["nile", 536, 103],
- ["rosile", 536, 127],
- ["jetile", 536, 118],
- ["ozile", 536, 48],
- ["a\u010Dile", 536, 101],
- ["lu\u010Dile", 536, 117],
- ["ro\u0161ile", 536, 90],
- ["ole", -1, 50],
- ["asle", -1, 115],
- ["nule", -1, 13],
- ["rame", -1, 52],
- ["leme", -1, 51],
- ["acome", -1, 124],
- ["ecome", -1, 125],
- ["ucome", -1, 126],
- ["anjome", -1, 84],
- ["enjome", -1, 85],
- ["snjome", -1, 122],
- ["\u0161njome", -1, 86],
- ["kome", -1, 95],
- ["skome", 558, 1],
- ["\u0161kome", 558, 2],
- ["elome", -1, 83],
- ["nome", -1, 13],
- ["cinome", 562, 137],
- ["\u010Dinome", 562, 89],
- ["osome", -1, 123],
- ["atome", -1, 120],
- ["evitome", -1, 92],
- ["ovitome", -1, 93],
- ["astome", -1, 94],
- ["avome", -1, 77],
- ["evome", -1, 78],
- ["ivome", -1, 79],
- ["ovome", -1, 80],
- ["a\u0107ome", -1, 14],
- ["e\u0107ome", -1, 15],
- ["u\u0107ome", -1, 16],
- ["o\u0161ome", -1, 91],
- ["ne", -1, 13],
- ["ane", 578, 10],
- ["acane", 579, 128],
- ["urane", 579, 105],
- ["tane", 579, 113],
- ["astane", 582, 110],
- ["istane", 582, 111],
- ["ostane", 582, 112],
- ["avane", 579, 97],
- ["evane", 579, 96],
- ["ivane", 579, 98],
- ["uvane", 579, 99],
- ["a\u010Dane", 579, 102],
- ["acene", 578, 124],
- ["lucene", 578, 121],
- ["a\u010Dene", 578, 101],
- ["lu\u010Dene", 578, 117],
- ["ine", 578, 11],
- ["cine", 595, 137],
- ["anine", 595, 10],
- ["\u010Dine", 595, 89],
- ["one", 578, 12],
- ["are", -1, 53],
- ["dre", -1, 54],
- ["ere", -1, 55],
- ["ore", -1, 56],
- ["ase", -1, 161],
- ["base", 604, 135],
- ["acase", 604, 128],
- ["gase", 604, 131],
- ["jase", 604, 129],
- ["astajase", 608, 138],
- ["istajase", 608, 139],
- ["ostajase", 608, 140],
- ["injase", 608, 150],
- ["kase", 604, 133],
- ["nase", 604, 132],
- ["irase", 604, 155],
- ["urase", 604, 156],
- ["tase", 604, 130],
- ["vase", 604, 134],
- ["avase", 618, 144],
- ["evase", 618, 145],
- ["ivase", 618, 146],
- ["ovase", 618, 148],
- ["uvase", 618, 147],
- ["ese", -1, 57],
- ["ise", -1, 58],
- ["acise", 625, 124],
- ["lucise", 625, 121],
- ["rosise", 625, 127],
- ["jetise", 625, 149],
- ["ose", -1, 123],
- ["astadose", 630, 141],
- ["istadose", 630, 142],
- ["ostadose", 630, 143],
- ["ate", -1, 104],
- ["acate", 634, 128],
- ["ikate", 634, 68],
- ["late", 634, 69],
- ["irate", 634, 100],
- ["urate", 634, 105],
- ["tate", 634, 113],
- ["avate", 634, 97],
- ["evate", 634, 96],
- ["ivate", 634, 98],
- ["uvate", 634, 99],
- ["a\u010Date", 634, 102],
- ["ete", -1, 70],
- ["astadete", 646, 110],
- ["istadete", 646, 111],
- ["ostadete", 646, 112],
- ["astajete", 646, 106],
- ["istajete", 646, 107],
- ["ostajete", 646, 108],
- ["ijete", 646, 116],
- ["injete", 646, 114],
- ["ujete", 646, 25],
- ["lucujete", 655, 121],
- ["irujete", 655, 100],
- ["lu\u010Dujete", 655, 117],
- ["nete", 646, 13],
- ["astanete", 659, 110],
- ["istanete", 659, 111],
- ["ostanete", 659, 112],
- ["astete", 646, 115],
- ["ite", -1, 116],
- ["acite", 664, 124],
- ["lucite", 664, 121],
- ["nite", 664, 13],
- ["astanite", 667, 110],
- ["istanite", 667, 111],
- ["ostanite", 667, 112],
- ["rosite", 664, 127],
- ["jetite", 664, 118],
- ["astite", 664, 115],
- ["evite", 664, 92],
- ["ovite", 664, 93],
- ["a\u010Dite", 664, 101],
- ["lu\u010Dite", 664, 117],
- ["ro\u0161ite", 664, 90],
- ["ajte", -1, 104],
- ["urajte", 679, 105],
- ["tajte", 679, 113],
- ["astajte", 681, 106],
- ["istajte", 681, 107],
- ["ostajte", 681, 108],
- ["avajte", 679, 97],
- ["evajte", 679, 96],
- ["ivajte", 679, 98],
- ["uvajte", 679, 99],
- ["ijte", -1, 116],
- ["lucujte", -1, 121],
- ["irujte", -1, 100],
- ["lu\u010Dujte", -1, 117],
- ["aste", -1, 94],
- ["acaste", 693, 128],
- ["astajaste", 693, 106],
- ["istajaste", 693, 107],
- ["ostajaste", 693, 108],
- ["injaste", 693, 114],
- ["iraste", 693, 100],
- ["uraste", 693, 105],
- ["taste", 693, 113],
- ["avaste", 693, 97],
- ["evaste", 693, 96],
- ["ivaste", 693, 98],
- ["ovaste", 693, 76],
- ["uvaste", 693, 99],
- ["a\u010Daste", 693, 102],
- ["este", -1, 71],
- ["iste", -1, 72],
- ["aciste", 709, 124],
- ["luciste", 709, 121],
- ["niste", 709, 103],
- ["rosiste", 709, 127],
- ["jetiste", 709, 118],
- ["a\u010Diste", 709, 101],
- ["lu\u010Diste", 709, 117],
- ["ro\u0161iste", 709, 90],
- ["kste", -1, 73],
- ["oste", -1, 74],
- ["astadoste", 719, 110],
- ["istadoste", 719, 111],
- ["ostadoste", 719, 112],
- ["nuste", -1, 13],
- ["i\u0161te", -1, 75],
- ["ave", -1, 77],
- ["eve", -1, 78],
- ["ajeve", 726, 109],
- ["cajeve", 727, 26],
- ["lajeve", 727, 30],
- ["rajeve", 727, 31],
- ["\u0107ajeve", 727, 28],
- ["\u010Dajeve", 727, 27],
- ["\u0111ajeve", 727, 29],
- ["ive", -1, 79],
- ["ove", -1, 80],
- ["gove", 735, 20],
- ["ugove", 736, 17],
- ["love", 735, 82],
- ["olove", 738, 49],
- ["move", 735, 81],
- ["onove", 735, 12],
- ["a\u0107e", -1, 14],
- ["e\u0107e", -1, 15],
- ["u\u0107e", -1, 16],
- ["a\u010De", -1, 101],
- ["lu\u010De", -1, 117],
- ["a\u0161e", -1, 104],
- ["ba\u0161e", 747, 63],
- ["ga\u0161e", 747, 64],
- ["ja\u0161e", 747, 61],
- ["astaja\u0161e", 750, 106],
- ["istaja\u0161e", 750, 107],
- ["ostaja\u0161e", 750, 108],
- ["inja\u0161e", 750, 114],
- ["ka\u0161e", 747, 62],
- ["na\u0161e", 747, 60],
- ["ira\u0161e", 747, 100],
- ["ura\u0161e", 747, 105],
- ["ta\u0161e", 747, 59],
- ["va\u0161e", 747, 65],
- ["ava\u0161e", 760, 97],
- ["eva\u0161e", 760, 96],
- ["iva\u0161e", 760, 98],
- ["ova\u0161e", 760, 76],
- ["uva\u0161e", 760, 99],
- ["a\u010Da\u0161e", 747, 102],
- ["e\u0161e", -1, 66],
- ["i\u0161e", -1, 67],
- ["jeti\u0161e", 768, 118],
- ["a\u010Di\u0161e", 768, 101],
- ["lu\u010Di\u0161e", 768, 117],
- ["ro\u0161i\u0161e", 768, 90],
- ["o\u0161e", -1, 91],
- ["astado\u0161e", 773, 110],
- ["istado\u0161e", 773, 111],
- ["ostado\u0161e", 773, 112],
- ["aceg", -1, 124],
- ["eceg", -1, 125],
- ["uceg", -1, 126],
- ["anjijeg", -1, 84],
- ["enjijeg", -1, 85],
- ["snjijeg", -1, 122],
- ["\u0161njijeg", -1, 86],
- ["kijeg", -1, 95],
- ["skijeg", 784, 1],
- ["\u0161kijeg", 784, 2],
- ["elijeg", -1, 83],
- ["nijeg", -1, 13],
- ["osijeg", -1, 123],
- ["atijeg", -1, 120],
- ["evitijeg", -1, 92],
- ["ovitijeg", -1, 93],
- ["astijeg", -1, 94],
- ["avijeg", -1, 77],
- ["evijeg", -1, 78],
- ["ivijeg", -1, 79],
- ["ovijeg", -1, 80],
- ["o\u0161ijeg", -1, 91],
- ["anjeg", -1, 84],
- ["enjeg", -1, 85],
- ["snjeg", -1, 122],
- ["\u0161njeg", -1, 86],
- ["keg", -1, 95],
- ["eleg", -1, 83],
- ["neg", -1, 13],
- ["aneg", 805, 10],
- ["eneg", 805, 87],
- ["sneg", 805, 159],
- ["\u0161neg", 805, 88],
- ["oseg", -1, 123],
- ["ateg", -1, 120],
- ["aveg", -1, 77],
- ["eveg", -1, 78],
- ["iveg", -1, 79],
- ["oveg", -1, 80],
- ["a\u0107eg", -1, 14],
- ["e\u0107eg", -1, 15],
- ["u\u0107eg", -1, 16],
- ["o\u0161eg", -1, 91],
- ["acog", -1, 124],
- ["ecog", -1, 125],
- ["ucog", -1, 126],
- ["anjog", -1, 84],
- ["enjog", -1, 85],
- ["snjog", -1, 122],
- ["\u0161njog", -1, 86],
- ["kog", -1, 95],
- ["skog", 827, 1],
- ["\u0161kog", 827, 2],
- ["elog", -1, 83],
- ["nog", -1, 13],
- ["cinog", 831, 137],
- ["\u010Dinog", 831, 89],
- ["osog", -1, 123],
- ["atog", -1, 120],
- ["evitog", -1, 92],
- ["ovitog", -1, 93],
- ["astog", -1, 94],
- ["avog", -1, 77],
- ["evog", -1, 78],
- ["ivog", -1, 79],
- ["ovog", -1, 80],
- ["a\u0107og", -1, 14],
- ["e\u0107og", -1, 15],
- ["u\u0107og", -1, 16],
- ["o\u0161og", -1, 91],
- ["ah", -1, 104],
- ["acah", 847, 128],
- ["astajah", 847, 106],
- ["istajah", 847, 107],
- ["ostajah", 847, 108],
- ["injah", 847, 114],
- ["irah", 847, 100],
- ["urah", 847, 105],
- ["tah", 847, 113],
- ["avah", 847, 97],
- ["evah", 847, 96],
- ["ivah", 847, 98],
- ["ovah", 847, 76],
- ["uvah", 847, 99],
- ["a\u010Dah", 847, 102],
- ["ih", -1, 116],
- ["acih", 862, 124],
- ["ecih", 862, 125],
- ["ucih", 862, 126],
- ["lucih", 865, 121],
- ["anjijih", 862, 84],
- ["enjijih", 862, 85],
- ["snjijih", 862, 122],
- ["\u0161njijih", 862, 86],
- ["kijih", 862, 95],
- ["skijih", 871, 1],
- ["\u0161kijih", 871, 2],
- ["elijih", 862, 83],
- ["nijih", 862, 13],
- ["osijih", 862, 123],
- ["atijih", 862, 120],
- ["evitijih", 862, 92],
- ["ovitijih", 862, 93],
- ["astijih", 862, 94],
- ["avijih", 862, 77],
- ["evijih", 862, 78],
- ["ivijih", 862, 79],
- ["ovijih", 862, 80],
- ["o\u0161ijih", 862, 91],
- ["anjih", 862, 84],
- ["enjih", 862, 85],
- ["snjih", 862, 122],
- ["\u0161njih", 862, 86],
- ["kih", 862, 95],
- ["skih", 890, 1],
- ["\u0161kih", 890, 2],
- ["elih", 862, 83],
- ["nih", 862, 13],
- ["cinih", 894, 137],
- ["\u010Dinih", 894, 89],
- ["osih", 862, 123],
- ["rosih", 897, 127],
- ["atih", 862, 120],
- ["jetih", 862, 118],
- ["evitih", 862, 92],
- ["ovitih", 862, 93],
- ["astih", 862, 94],
- ["avih", 862, 77],
- ["evih", 862, 78],
- ["ivih", 862, 79],
- ["ovih", 862, 80],
- ["a\u0107ih", 862, 14],
- ["e\u0107ih", 862, 15],
- ["u\u0107ih", 862, 16],
- ["a\u010Dih", 862, 101],
- ["lu\u010Dih", 862, 117],
- ["o\u0161ih", 862, 91],
- ["ro\u0161ih", 913, 90],
- ["astadoh", -1, 110],
- ["istadoh", -1, 111],
- ["ostadoh", -1, 112],
- ["acuh", -1, 124],
- ["ecuh", -1, 125],
- ["ucuh", -1, 126],
- ["a\u0107uh", -1, 14],
- ["e\u0107uh", -1, 15],
- ["u\u0107uh", -1, 16],
- ["aci", -1, 124],
- ["aceci", -1, 124],
- ["ieci", -1, 162],
- ["ajuci", -1, 161],
- ["irajuci", 927, 155],
- ["urajuci", 927, 156],
- ["astajuci", 927, 138],
- ["istajuci", 927, 139],
- ["ostajuci", 927, 140],
- ["avajuci", 927, 144],
- ["evajuci", 927, 145],
- ["ivajuci", 927, 146],
- ["uvajuci", 927, 147],
- ["ujuci", -1, 157],
- ["lucujuci", 937, 121],
- ["irujuci", 937, 155],
- ["luci", -1, 121],
- ["nuci", -1, 164],
- ["etuci", -1, 153],
- ["astuci", -1, 136],
- ["gi", -1, 20],
- ["ugi", 944, 18],
- ["aji", -1, 109],
- ["caji", 946, 26],
- ["laji", 946, 30],
- ["raji", 946, 31],
- ["\u0107aji", 946, 28],
- ["\u010Daji", 946, 27],
- ["\u0111aji", 946, 29],
- ["biji", -1, 32],
- ["ciji", -1, 33],
- ["diji", -1, 34],
- ["fiji", -1, 40],
- ["giji", -1, 39],
- ["anjiji", -1, 84],
- ["enjiji", -1, 85],
- ["snjiji", -1, 122],
- ["\u0161njiji", -1, 86],
- ["kiji", -1, 95],
- ["skiji", 962, 1],
- ["\u0161kiji", 962, 2],
- ["liji", -1, 35],
- ["eliji", 965, 83],
- ["miji", -1, 37],
- ["niji", -1, 13],
- ["ganiji", 968, 9],
- ["maniji", 968, 6],
- ["paniji", 968, 7],
- ["raniji", 968, 8],
- ["taniji", 968, 5],
- ["piji", -1, 41],
- ["riji", -1, 42],
- ["siji", -1, 43],
- ["osiji", 976, 123],
- ["tiji", -1, 44],
- ["atiji", 978, 120],
- ["evitiji", 978, 92],
- ["ovitiji", 978, 93],
- ["astiji", 978, 94],
- ["aviji", -1, 77],
- ["eviji", -1, 78],
- ["iviji", -1, 79],
- ["oviji", -1, 80],
- ["ziji", -1, 45],
- ["o\u0161iji", -1, 91],
- ["\u017Eiji", -1, 38],
- ["anji", -1, 84],
- ["enji", -1, 85],
- ["snji", -1, 122],
- ["\u0161nji", -1, 86],
- ["ki", -1, 95],
- ["ski", 994, 1],
- ["\u0161ki", 994, 2],
- ["ali", -1, 104],
- ["acali", 997, 128],
- ["astajali", 997, 106],
- ["istajali", 997, 107],
- ["ostajali", 997, 108],
- ["ijali", 997, 47],
- ["injali", 997, 114],
- ["nali", 997, 46],
- ["irali", 997, 100],
- ["urali", 997, 105],
- ["tali", 997, 113],
- ["astali", 1007, 110],
- ["istali", 1007, 111],
- ["ostali", 1007, 112],
- ["avali", 997, 97],
- ["evali", 997, 96],
- ["ivali", 997, 98],
- ["ovali", 997, 76],
- ["uvali", 997, 99],
- ["a\u010Dali", 997, 102],
- ["eli", -1, 83],
- ["ili", -1, 116],
- ["acili", 1018, 124],
- ["lucili", 1018, 121],
- ["nili", 1018, 103],
- ["rosili", 1018, 127],
- ["jetili", 1018, 118],
- ["ozili", 1018, 48],
- ["a\u010Dili", 1018, 101],
- ["lu\u010Dili", 1018, 117],
- ["ro\u0161ili", 1018, 90],
- ["oli", -1, 50],
- ["asli", -1, 115],
- ["nuli", -1, 13],
- ["rami", -1, 52],
- ["lemi", -1, 51],
- ["ni", -1, 13],
- ["ani", 1033, 10],
- ["acani", 1034, 128],
- ["urani", 1034, 105],
- ["tani", 1034, 113],
- ["avani", 1034, 97],
- ["evani", 1034, 96],
- ["ivani", 1034, 98],
- ["uvani", 1034, 99],
- ["a\u010Dani", 1034, 102],
- ["aceni", 1033, 124],
- ["luceni", 1033, 121],
- ["a\u010Deni", 1033, 101],
- ["lu\u010Deni", 1033, 117],
- ["ini", 1033, 11],
- ["cini", 1047, 137],
- ["\u010Dini", 1047, 89],
- ["oni", 1033, 12],
- ["ari", -1, 53],
- ["dri", -1, 54],
- ["eri", -1, 55],
- ["ori", -1, 56],
- ["basi", -1, 135],
- ["gasi", -1, 131],
- ["jasi", -1, 129],
- ["kasi", -1, 133],
- ["nasi", -1, 132],
- ["tasi", -1, 130],
- ["vasi", -1, 134],
- ["esi", -1, 152],
- ["isi", -1, 154],
- ["osi", -1, 123],
- ["avsi", -1, 161],
- ["acavsi", 1065, 128],
- ["iravsi", 1065, 155],
- ["tavsi", 1065, 160],
- ["etavsi", 1068, 153],
- ["astavsi", 1068, 141],
- ["istavsi", 1068, 142],
- ["ostavsi", 1068, 143],
- ["ivsi", -1, 162],
- ["nivsi", 1073, 158],
- ["rosivsi", 1073, 127],
- ["nuvsi", -1, 164],
- ["ati", -1, 104],
- ["acati", 1077, 128],
- ["astajati", 1077, 106],
- ["istajati", 1077, 107],
- ["ostajati", 1077, 108],
- ["injati", 1077, 114],
- ["ikati", 1077, 68],
- ["lati", 1077, 69],
- ["irati", 1077, 100],
- ["urati", 1077, 105],
- ["tati", 1077, 113],
- ["astati", 1087, 110],
- ["istati", 1087, 111],
- ["ostati", 1087, 112],
- ["avati", 1077, 97],
- ["evati", 1077, 96],
- ["ivati", 1077, 98],
- ["ovati", 1077, 76],
- ["uvati", 1077, 99],
- ["a\u010Dati", 1077, 102],
- ["eti", -1, 70],
- ["iti", -1, 116],
- ["aciti", 1098, 124],
- ["luciti", 1098, 121],
- ["niti", 1098, 103],
- ["rositi", 1098, 127],
- ["jetiti", 1098, 118],
- ["eviti", 1098, 92],
- ["oviti", 1098, 93],
- ["a\u010Diti", 1098, 101],
- ["lu\u010Diti", 1098, 117],
- ["ro\u0161iti", 1098, 90],
- ["asti", -1, 94],
- ["esti", -1, 71],
- ["isti", -1, 72],
- ["ksti", -1, 73],
- ["osti", -1, 74],
- ["nuti", -1, 13],
- ["avi", -1, 77],
- ["evi", -1, 78],
- ["ajevi", 1116, 109],
- ["cajevi", 1117, 26],
- ["lajevi", 1117, 30],
- ["rajevi", 1117, 31],
- ["\u0107ajevi", 1117, 28],
- ["\u010Dajevi", 1117, 27],
- ["\u0111ajevi", 1117, 29],
- ["ivi", -1, 79],
- ["ovi", -1, 80],
- ["govi", 1125, 20],
- ["ugovi", 1126, 17],
- ["lovi", 1125, 82],
- ["olovi", 1128, 49],
- ["movi", 1125, 81],
- ["onovi", 1125, 12],
- ["ie\u0107i", -1, 116],
- ["a\u010De\u0107i", -1, 101],
- ["aju\u0107i", -1, 104],
- ["iraju\u0107i", 1134, 100],
- ["uraju\u0107i", 1134, 105],
- ["astaju\u0107i", 1134, 106],
- ["istaju\u0107i", 1134, 107],
- ["ostaju\u0107i", 1134, 108],
- ["avaju\u0107i", 1134, 97],
- ["evaju\u0107i", 1134, 96],
- ["ivaju\u0107i", 1134, 98],
- ["uvaju\u0107i", 1134, 99],
- ["uju\u0107i", -1, 25],
- ["iruju\u0107i", 1144, 100],
- ["lu\u010Duju\u0107i", 1144, 117],
- ["nu\u0107i", -1, 13],
- ["etu\u0107i", -1, 70],
- ["astu\u0107i", -1, 115],
- ["a\u010Di", -1, 101],
- ["lu\u010Di", -1, 117],
- ["ba\u0161i", -1, 63],
- ["ga\u0161i", -1, 64],
- ["ja\u0161i", -1, 61],
- ["ka\u0161i", -1, 62],
- ["na\u0161i", -1, 60],
- ["ta\u0161i", -1, 59],
- ["va\u0161i", -1, 65],
- ["e\u0161i", -1, 66],
- ["i\u0161i", -1, 67],
- ["o\u0161i", -1, 91],
- ["av\u0161i", -1, 104],
- ["irav\u0161i", 1162, 100],
- ["tav\u0161i", 1162, 113],
- ["etav\u0161i", 1164, 70],
- ["astav\u0161i", 1164, 110],
- ["istav\u0161i", 1164, 111],
- ["ostav\u0161i", 1164, 112],
- ["a\u010Dav\u0161i", 1162, 102],
- ["iv\u0161i", -1, 116],
- ["niv\u0161i", 1170, 103],
- ["ro\u0161iv\u0161i", 1170, 90],
- ["nuv\u0161i", -1, 13],
- ["aj", -1, 104],
- ["uraj", 1174, 105],
- ["taj", 1174, 113],
- ["avaj", 1174, 97],
- ["evaj", 1174, 96],
- ["ivaj", 1174, 98],
- ["uvaj", 1174, 99],
- ["ij", -1, 116],
- ["acoj", -1, 124],
- ["ecoj", -1, 125],
- ["ucoj", -1, 126],
- ["anjijoj", -1, 84],
- ["enjijoj", -1, 85],
- ["snjijoj", -1, 122],
- ["\u0161njijoj", -1, 86],
- ["kijoj", -1, 95],
- ["skijoj", 1189, 1],
- ["\u0161kijoj", 1189, 2],
- ["elijoj", -1, 83],
- ["nijoj", -1, 13],
- ["osijoj", -1, 123],
- ["evitijoj", -1, 92],
- ["ovitijoj", -1, 93],
- ["astijoj", -1, 94],
- ["avijoj", -1, 77],
- ["evijoj", -1, 78],
- ["ivijoj", -1, 79],
- ["ovijoj", -1, 80],
- ["o\u0161ijoj", -1, 91],
- ["anjoj", -1, 84],
- ["enjoj", -1, 85],
- ["snjoj", -1, 122],
- ["\u0161njoj", -1, 86],
- ["koj", -1, 95],
- ["skoj", 1207, 1],
- ["\u0161koj", 1207, 2],
- ["aloj", -1, 104],
- ["eloj", -1, 83],
- ["noj", -1, 13],
- ["cinoj", 1212, 137],
- ["\u010Dinoj", 1212, 89],
- ["osoj", -1, 123],
- ["atoj", -1, 120],
- ["evitoj", -1, 92],
- ["ovitoj", -1, 93],
- ["astoj", -1, 94],
- ["avoj", -1, 77],
- ["evoj", -1, 78],
- ["ivoj", -1, 79],
- ["ovoj", -1, 80],
- ["a\u0107oj", -1, 14],
- ["e\u0107oj", -1, 15],
- ["u\u0107oj", -1, 16],
- ["o\u0161oj", -1, 91],
- ["lucuj", -1, 121],
- ["iruj", -1, 100],
- ["lu\u010Duj", -1, 117],
- ["al", -1, 104],
- ["iral", 1231, 100],
- ["ural", 1231, 105],
- ["el", -1, 119],
- ["il", -1, 116],
- ["am", -1, 104],
- ["acam", 1236, 128],
- ["iram", 1236, 100],
- ["uram", 1236, 105],
- ["tam", 1236, 113],
- ["avam", 1236, 97],
- ["evam", 1236, 96],
- ["ivam", 1236, 98],
- ["uvam", 1236, 99],
- ["a\u010Dam", 1236, 102],
- ["em", -1, 119],
- ["acem", 1246, 124],
- ["ecem", 1246, 125],
- ["ucem", 1246, 126],
- ["astadem", 1246, 110],
- ["istadem", 1246, 111],
- ["ostadem", 1246, 112],
- ["ajem", 1246, 104],
- ["cajem", 1253, 26],
- ["lajem", 1253, 30],
- ["rajem", 1253, 31],
- ["astajem", 1253, 106],
- ["istajem", 1253, 107],
- ["ostajem", 1253, 108],
- ["\u0107ajem", 1253, 28],
- ["\u010Dajem", 1253, 27],
- ["\u0111ajem", 1253, 29],
- ["ijem", 1246, 116],
- ["anjijem", 1263, 84],
- ["enjijem", 1263, 85],
- ["snjijem", 1263, 123],
- ["\u0161njijem", 1263, 86],
- ["kijem", 1263, 95],
- ["skijem", 1268, 1],
- ["\u0161kijem", 1268, 2],
- ["lijem", 1263, 24],
- ["elijem", 1271, 83],
- ["nijem", 1263, 13],
- ["rarijem", 1263, 21],
- ["sijem", 1263, 23],
- ["osijem", 1275, 123],
- ["atijem", 1263, 120],
- ["evitijem", 1263, 92],
- ["ovitijem", 1263, 93],
- ["otijem", 1263, 22],
- ["astijem", 1263, 94],
- ["avijem", 1263, 77],
- ["evijem", 1263, 78],
- ["ivijem", 1263, 79],
- ["ovijem", 1263, 80],
- ["o\u0161ijem", 1263, 91],
- ["anjem", 1246, 84],
- ["enjem", 1246, 85],
- ["injem", 1246, 114],
- ["snjem", 1246, 122],
- ["\u0161njem", 1246, 86],
- ["ujem", 1246, 25],
- ["lucujem", 1292, 121],
- ["irujem", 1292, 100],
- ["lu\u010Dujem", 1292, 117],
- ["kem", 1246, 95],
- ["skem", 1296, 1],
- ["\u0161kem", 1296, 2],
- ["elem", 1246, 83],
- ["nem", 1246, 13],
- ["anem", 1300, 10],
- ["astanem", 1301, 110],
- ["istanem", 1301, 111],
- ["ostanem", 1301, 112],
- ["enem", 1300, 87],
- ["snem", 1300, 159],
- ["\u0161nem", 1300, 88],
- ["basem", 1246, 135],
- ["gasem", 1246, 131],
- ["jasem", 1246, 129],
- ["kasem", 1246, 133],
- ["nasem", 1246, 132],
- ["tasem", 1246, 130],
- ["vasem", 1246, 134],
- ["esem", 1246, 152],
- ["isem", 1246, 154],
- ["osem", 1246, 123],
- ["atem", 1246, 120],
- ["etem", 1246, 70],
- ["evitem", 1246, 92],
- ["ovitem", 1246, 93],
- ["astem", 1246, 94],
- ["istem", 1246, 151],
- ["i\u0161tem", 1246, 75],
- ["avem", 1246, 77],
- ["evem", 1246, 78],
- ["ivem", 1246, 79],
- ["a\u0107em", 1246, 14],
- ["e\u0107em", 1246, 15],
- ["u\u0107em", 1246, 16],
- ["ba\u0161em", 1246, 63],
- ["ga\u0161em", 1246, 64],
- ["ja\u0161em", 1246, 61],
- ["ka\u0161em", 1246, 62],
- ["na\u0161em", 1246, 60],
- ["ta\u0161em", 1246, 59],
- ["va\u0161em", 1246, 65],
- ["e\u0161em", 1246, 66],
- ["i\u0161em", 1246, 67],
- ["o\u0161em", 1246, 91],
- ["im", -1, 116],
- ["acim", 1341, 124],
- ["ecim", 1341, 125],
- ["ucim", 1341, 126],
- ["lucim", 1344, 121],
- ["anjijim", 1341, 84],
- ["enjijim", 1341, 85],
- ["snjijim", 1341, 122],
- ["\u0161njijim", 1341, 86],
- ["kijim", 1341, 95],
- ["skijim", 1350, 1],
- ["\u0161kijim", 1350, 2],
- ["elijim", 1341, 83],
- ["nijim", 1341, 13],
- ["osijim", 1341, 123],
- ["atijim", 1341, 120],
- ["evitijim", 1341, 92],
- ["ovitijim", 1341, 93],
- ["astijim", 1341, 94],
- ["avijim", 1341, 77],
- ["evijim", 1341, 78],
- ["ivijim", 1341, 79],
- ["ovijim", 1341, 80],
- ["o\u0161ijim", 1341, 91],
- ["anjim", 1341, 84],
- ["enjim", 1341, 85],
- ["snjim", 1341, 122],
- ["\u0161njim", 1341, 86],
- ["kim", 1341, 95],
- ["skim", 1369, 1],
- ["\u0161kim", 1369, 2],
- ["elim", 1341, 83],
- ["nim", 1341, 13],
- ["cinim", 1373, 137],
- ["\u010Dinim", 1373, 89],
- ["osim", 1341, 123],
- ["rosim", 1376, 127],
- ["atim", 1341, 120],
- ["jetim", 1341, 118],
- ["evitim", 1341, 92],
- ["ovitim", 1341, 93],
- ["astim", 1341, 94],
- ["avim", 1341, 77],
- ["evim", 1341, 78],
- ["ivim", 1341, 79],
- ["ovim", 1341, 80],
- ["a\u0107im", 1341, 14],
- ["e\u0107im", 1341, 15],
- ["u\u0107im", 1341, 16],
- ["a\u010Dim", 1341, 101],
- ["lu\u010Dim", 1341, 117],
- ["o\u0161im", 1341, 91],
- ["ro\u0161im", 1392, 90],
- ["acom", -1, 124],
- ["ecom", -1, 125],
- ["ucom", -1, 126],
- ["gom", -1, 20],
- ["logom", 1397, 19],
- ["ugom", 1397, 18],
- ["bijom", -1, 32],
- ["cijom", -1, 33],
- ["dijom", -1, 34],
- ["fijom", -1, 40],
- ["gijom", -1, 39],
- ["lijom", -1, 35],
- ["mijom", -1, 37],
- ["nijom", -1, 36],
- ["ganijom", 1407, 9],
- ["manijom", 1407, 6],
- ["panijom", 1407, 7],
- ["ranijom", 1407, 8],
- ["tanijom", 1407, 5],
- ["pijom", -1, 41],
- ["rijom", -1, 42],
- ["sijom", -1, 43],
- ["tijom", -1, 44],
- ["zijom", -1, 45],
- ["\u017Eijom", -1, 38],
- ["anjom", -1, 84],
- ["enjom", -1, 85],
- ["snjom", -1, 122],
- ["\u0161njom", -1, 86],
- ["kom", -1, 95],
- ["skom", 1423, 1],
- ["\u0161kom", 1423, 2],
- ["alom", -1, 104],
- ["ijalom", 1426, 47],
- ["nalom", 1426, 46],
- ["elom", -1, 83],
- ["ilom", -1, 116],
- ["ozilom", 1430, 48],
- ["olom", -1, 50],
- ["ramom", -1, 52],
- ["lemom", -1, 51],
- ["nom", -1, 13],
- ["anom", 1435, 10],
- ["inom", 1435, 11],
- ["cinom", 1437, 137],
- ["aninom", 1437, 10],
- ["\u010Dinom", 1437, 89],
- ["onom", 1435, 12],
- ["arom", -1, 53],
- ["drom", -1, 54],
- ["erom", -1, 55],
- ["orom", -1, 56],
- ["basom", -1, 135],
- ["gasom", -1, 131],
- ["jasom", -1, 129],
- ["kasom", -1, 133],
- ["nasom", -1, 132],
- ["tasom", -1, 130],
- ["vasom", -1, 134],
- ["esom", -1, 57],
- ["isom", -1, 58],
- ["osom", -1, 123],
- ["atom", -1, 120],
- ["ikatom", 1456, 68],
- ["latom", 1456, 69],
- ["etom", -1, 70],
- ["evitom", -1, 92],
- ["ovitom", -1, 93],
- ["astom", -1, 94],
- ["estom", -1, 71],
- ["istom", -1, 72],
- ["kstom", -1, 73],
- ["ostom", -1, 74],
- ["avom", -1, 77],
- ["evom", -1, 78],
- ["ivom", -1, 79],
- ["ovom", -1, 80],
- ["lovom", 1470, 82],
- ["movom", 1470, 81],
- ["stvom", -1, 3],
- ["\u0161tvom", -1, 4],
- ["a\u0107om", -1, 14],
- ["e\u0107om", -1, 15],
- ["u\u0107om", -1, 16],
- ["ba\u0161om", -1, 63],
- ["ga\u0161om", -1, 64],
- ["ja\u0161om", -1, 61],
- ["ka\u0161om", -1, 62],
- ["na\u0161om", -1, 60],
- ["ta\u0161om", -1, 59],
- ["va\u0161om", -1, 65],
- ["e\u0161om", -1, 66],
- ["i\u0161om", -1, 67],
- ["o\u0161om", -1, 91],
- ["an", -1, 104],
- ["acan", 1488, 128],
- ["iran", 1488, 100],
- ["uran", 1488, 105],
- ["tan", 1488, 113],
- ["avan", 1488, 97],
- ["evan", 1488, 96],
- ["ivan", 1488, 98],
- ["uvan", 1488, 99],
- ["a\u010Dan", 1488, 102],
- ["acen", -1, 124],
- ["lucen", -1, 121],
- ["a\u010Den", -1, 101],
- ["lu\u010Den", -1, 117],
- ["anin", -1, 10],
- ["ao", -1, 104],
- ["acao", 1503, 128],
- ["astajao", 1503, 106],
- ["istajao", 1503, 107],
- ["ostajao", 1503, 108],
- ["injao", 1503, 114],
- ["irao", 1503, 100],
- ["urao", 1503, 105],
- ["tao", 1503, 113],
- ["astao", 1511, 110],
- ["istao", 1511, 111],
- ["ostao", 1511, 112],
- ["avao", 1503, 97],
- ["evao", 1503, 96],
- ["ivao", 1503, 98],
- ["ovao", 1503, 76],
- ["uvao", 1503, 99],
- ["a\u010Dao", 1503, 102],
- ["go", -1, 20],
- ["ugo", 1521, 18],
- ["io", -1, 116],
- ["acio", 1523, 124],
- ["lucio", 1523, 121],
- ["lio", 1523, 24],
- ["nio", 1523, 103],
- ["rario", 1523, 21],
- ["sio", 1523, 23],
- ["rosio", 1529, 127],
- ["jetio", 1523, 118],
- ["otio", 1523, 22],
- ["a\u010Dio", 1523, 101],
- ["lu\u010Dio", 1523, 117],
- ["ro\u0161io", 1523, 90],
- ["bijo", -1, 32],
- ["cijo", -1, 33],
- ["dijo", -1, 34],
- ["fijo", -1, 40],
- ["gijo", -1, 39],
- ["lijo", -1, 35],
- ["mijo", -1, 37],
- ["nijo", -1, 36],
- ["pijo", -1, 41],
- ["rijo", -1, 42],
- ["sijo", -1, 43],
- ["tijo", -1, 44],
- ["zijo", -1, 45],
- ["\u017Eijo", -1, 38],
- ["anjo", -1, 84],
- ["enjo", -1, 85],
- ["snjo", -1, 122],
- ["\u0161njo", -1, 86],
- ["ko", -1, 95],
- ["sko", 1554, 1],
- ["\u0161ko", 1554, 2],
- ["alo", -1, 104],
- ["acalo", 1557, 128],
- ["astajalo", 1557, 106],
- ["istajalo", 1557, 107],
- ["ostajalo", 1557, 108],
- ["ijalo", 1557, 47],
- ["injalo", 1557, 114],
- ["nalo", 1557, 46],
- ["iralo", 1557, 100],
- ["uralo", 1557, 105],
- ["talo", 1557, 113],
- ["astalo", 1567, 110],
- ["istalo", 1567, 111],
- ["ostalo", 1567, 112],
- ["avalo", 1557, 97],
- ["evalo", 1557, 96],
- ["ivalo", 1557, 98],
- ["ovalo", 1557, 76],
- ["uvalo", 1557, 99],
- ["a\u010Dalo", 1557, 102],
- ["elo", -1, 83],
- ["ilo", -1, 116],
- ["acilo", 1578, 124],
- ["lucilo", 1578, 121],
- ["nilo", 1578, 103],
- ["rosilo", 1578, 127],
- ["jetilo", 1578, 118],
- ["a\u010Dilo", 1578, 101],
- ["lu\u010Dilo", 1578, 117],
- ["ro\u0161ilo", 1578, 90],
- ["aslo", -1, 115],
- ["nulo", -1, 13],
- ["amo", -1, 104],
- ["acamo", 1589, 128],
- ["ramo", 1589, 52],
- ["iramo", 1591, 100],
- ["uramo", 1591, 105],
- ["tamo", 1589, 113],
- ["avamo", 1589, 97],
- ["evamo", 1589, 96],
- ["ivamo", 1589, 98],
- ["uvamo", 1589, 99],
- ["a\u010Damo", 1589, 102],
- ["emo", -1, 119],
- ["astademo", 1600, 110],
- ["istademo", 1600, 111],
- ["ostademo", 1600, 112],
- ["astajemo", 1600, 106],
- ["istajemo", 1600, 107],
- ["ostajemo", 1600, 108],
- ["ijemo", 1600, 116],
- ["injemo", 1600, 114],
- ["ujemo", 1600, 25],
- ["lucujemo", 1609, 121],
- ["irujemo", 1609, 100],
- ["lu\u010Dujemo", 1609, 117],
- ["lemo", 1600, 51],
- ["nemo", 1600, 13],
- ["astanemo", 1614, 110],
- ["istanemo", 1614, 111],
- ["ostanemo", 1614, 112],
- ["etemo", 1600, 70],
- ["astemo", 1600, 115],
- ["imo", -1, 116],
- ["acimo", 1620, 124],
- ["lucimo", 1620, 121],
- ["nimo", 1620, 13],
- ["astanimo", 1623, 110],
- ["istanimo", 1623, 111],
- ["ostanimo", 1623, 112],
- ["rosimo", 1620, 127],
- ["etimo", 1620, 70],
- ["jetimo", 1628, 118],
- ["astimo", 1620, 115],
- ["a\u010Dimo", 1620, 101],
- ["lu\u010Dimo", 1620, 117],
- ["ro\u0161imo", 1620, 90],
- ["ajmo", -1, 104],
- ["urajmo", 1634, 105],
- ["tajmo", 1634, 113],
- ["astajmo", 1636, 106],
- ["istajmo", 1636, 107],
- ["ostajmo", 1636, 108],
- ["avajmo", 1634, 97],
- ["evajmo", 1634, 96],
- ["ivajmo", 1634, 98],
- ["uvajmo", 1634, 99],
- ["ijmo", -1, 116],
- ["ujmo", -1, 25],
- ["lucujmo", 1645, 121],
- ["irujmo", 1645, 100],
- ["lu\u010Dujmo", 1645, 117],
- ["asmo", -1, 104],
- ["acasmo", 1649, 128],
- ["astajasmo", 1649, 106],
- ["istajasmo", 1649, 107],
- ["ostajasmo", 1649, 108],
- ["injasmo", 1649, 114],
- ["irasmo", 1649, 100],
- ["urasmo", 1649, 105],
- ["tasmo", 1649, 113],
- ["avasmo", 1649, 97],
- ["evasmo", 1649, 96],
- ["ivasmo", 1649, 98],
- ["ovasmo", 1649, 76],
- ["uvasmo", 1649, 99],
- ["a\u010Dasmo", 1649, 102],
- ["ismo", -1, 116],
- ["acismo", 1664, 124],
- ["lucismo", 1664, 121],
- ["nismo", 1664, 103],
- ["rosismo", 1664, 127],
- ["jetismo", 1664, 118],
- ["a\u010Dismo", 1664, 101],
- ["lu\u010Dismo", 1664, 117],
- ["ro\u0161ismo", 1664, 90],
- ["astadosmo", -1, 110],
- ["istadosmo", -1, 111],
- ["ostadosmo", -1, 112],
- ["nusmo", -1, 13],
- ["no", -1, 13],
- ["ano", 1677, 104],
- ["acano", 1678, 128],
- ["urano", 1678, 105],
- ["tano", 1678, 113],
- ["avano", 1678, 97],
- ["evano", 1678, 96],
- ["ivano", 1678, 98],
- ["uvano", 1678, 99],
- ["a\u010Dano", 1678, 102],
- ["aceno", 1677, 124],
- ["luceno", 1677, 121],
- ["a\u010Deno", 1677, 101],
- ["lu\u010Deno", 1677, 117],
- ["ino", 1677, 11],
- ["cino", 1691, 137],
- ["\u010Dino", 1691, 89],
- ["ato", -1, 120],
- ["ikato", 1694, 68],
- ["lato", 1694, 69],
- ["eto", -1, 70],
- ["evito", -1, 92],
- ["ovito", -1, 93],
- ["asto", -1, 94],
- ["esto", -1, 71],
- ["isto", -1, 72],
- ["ksto", -1, 73],
- ["osto", -1, 74],
- ["nuto", -1, 13],
- ["nuo", -1, 13],
- ["avo", -1, 77],
- ["evo", -1, 78],
- ["ivo", -1, 79],
- ["ovo", -1, 80],
- ["stvo", -1, 3],
- ["\u0161tvo", -1, 4],
- ["as", -1, 161],
- ["acas", 1713, 128],
- ["iras", 1713, 155],
- ["uras", 1713, 156],
- ["tas", 1713, 160],
- ["avas", 1713, 144],
- ["evas", 1713, 145],
- ["ivas", 1713, 146],
- ["uvas", 1713, 147],
- ["es", -1, 163],
- ["astades", 1722, 141],
- ["istades", 1722, 142],
- ["ostades", 1722, 143],
- ["astajes", 1722, 138],
- ["istajes", 1722, 139],
- ["ostajes", 1722, 140],
- ["ijes", 1722, 162],
- ["injes", 1722, 150],
- ["ujes", 1722, 157],
- ["lucujes", 1731, 121],
- ["irujes", 1731, 155],
- ["nes", 1722, 164],
- ["astanes", 1734, 141],
- ["istanes", 1734, 142],
- ["ostanes", 1734, 143],
- ["etes", 1722, 153],
- ["astes", 1722, 136],
- ["is", -1, 162],
- ["acis", 1740, 124],
- ["lucis", 1740, 121],
- ["nis", 1740, 158],
- ["rosis", 1740, 127],
- ["jetis", 1740, 149],
- ["at", -1, 104],
- ["acat", 1746, 128],
- ["astajat", 1746, 106],
- ["istajat", 1746, 107],
- ["ostajat", 1746, 108],
- ["injat", 1746, 114],
- ["irat", 1746, 100],
- ["urat", 1746, 105],
- ["tat", 1746, 113],
- ["astat", 1754, 110],
- ["istat", 1754, 111],
- ["ostat", 1754, 112],
- ["avat", 1746, 97],
- ["evat", 1746, 96],
- ["ivat", 1746, 98],
- ["irivat", 1760, 100],
- ["ovat", 1746, 76],
- ["uvat", 1746, 99],
- ["a\u010Dat", 1746, 102],
- ["it", -1, 116],
- ["acit", 1765, 124],
- ["lucit", 1765, 121],
- ["rosit", 1765, 127],
- ["jetit", 1765, 118],
- ["a\u010Dit", 1765, 101],
- ["lu\u010Dit", 1765, 117],
- ["ro\u0161it", 1765, 90],
- ["nut", -1, 13],
- ["astadu", -1, 110],
- ["istadu", -1, 111],
- ["ostadu", -1, 112],
- ["gu", -1, 20],
- ["logu", 1777, 19],
- ["ugu", 1777, 18],
- ["ahu", -1, 104],
- ["acahu", 1780, 128],
- ["astajahu", 1780, 106],
- ["istajahu", 1780, 107],
- ["ostajahu", 1780, 108],
- ["injahu", 1780, 114],
- ["irahu", 1780, 100],
- ["urahu", 1780, 105],
- ["avahu", 1780, 97],
- ["evahu", 1780, 96],
- ["ivahu", 1780, 98],
- ["ovahu", 1780, 76],
- ["uvahu", 1780, 99],
- ["a\u010Dahu", 1780, 102],
- ["aju", -1, 104],
- ["caju", 1794, 26],
- ["acaju", 1795, 128],
- ["laju", 1794, 30],
- ["raju", 1794, 31],
- ["iraju", 1798, 100],
- ["uraju", 1798, 105],
- ["taju", 1794, 113],
- ["astaju", 1801, 106],
- ["istaju", 1801, 107],
- ["ostaju", 1801, 108],
- ["avaju", 1794, 97],
- ["evaju", 1794, 96],
- ["ivaju", 1794, 98],
- ["uvaju", 1794, 99],
- ["\u0107aju", 1794, 28],
- ["\u010Daju", 1794, 27],
- ["a\u010Daju", 1810, 102],
- ["\u0111aju", 1794, 29],
- ["iju", -1, 116],
- ["biju", 1813, 32],
- ["ciju", 1813, 33],
- ["diju", 1813, 34],
- ["fiju", 1813, 40],
- ["giju", 1813, 39],
- ["anjiju", 1813, 84],
- ["enjiju", 1813, 85],
- ["snjiju", 1813, 122],
- ["\u0161njiju", 1813, 86],
- ["kiju", 1813, 95],
- ["liju", 1813, 24],
- ["eliju", 1824, 83],
- ["miju", 1813, 37],
- ["niju", 1813, 13],
- ["ganiju", 1827, 9],
- ["maniju", 1827, 6],
- ["paniju", 1827, 7],
- ["raniju", 1827, 8],
- ["taniju", 1827, 5],
- ["piju", 1813, 41],
- ["riju", 1813, 42],
- ["rariju", 1834, 21],
- ["siju", 1813, 23],
- ["osiju", 1836, 123],
- ["tiju", 1813, 44],
- ["atiju", 1838, 120],
- ["otiju", 1838, 22],
- ["aviju", 1813, 77],
- ["eviju", 1813, 78],
- ["iviju", 1813, 79],
- ["oviju", 1813, 80],
- ["ziju", 1813, 45],
- ["o\u0161iju", 1813, 91],
- ["\u017Eiju", 1813, 38],
- ["anju", -1, 84],
- ["enju", -1, 85],
- ["snju", -1, 122],
- ["\u0161nju", -1, 86],
- ["uju", -1, 25],
- ["lucuju", 1852, 121],
- ["iruju", 1852, 100],
- ["lu\u010Duju", 1852, 117],
- ["ku", -1, 95],
- ["sku", 1856, 1],
- ["\u0161ku", 1856, 2],
- ["alu", -1, 104],
- ["ijalu", 1859, 47],
- ["nalu", 1859, 46],
- ["elu", -1, 83],
- ["ilu", -1, 116],
- ["ozilu", 1863, 48],
- ["olu", -1, 50],
- ["ramu", -1, 52],
- ["acemu", -1, 124],
- ["ecemu", -1, 125],
- ["ucemu", -1, 126],
- ["anjijemu", -1, 84],
- ["enjijemu", -1, 85],
- ["snjijemu", -1, 122],
- ["\u0161njijemu", -1, 86],
- ["kijemu", -1, 95],
- ["skijemu", 1874, 1],
- ["\u0161kijemu", 1874, 2],
- ["elijemu", -1, 83],
- ["nijemu", -1, 13],
- ["osijemu", -1, 123],
- ["atijemu", -1, 120],
- ["evitijemu", -1, 92],
- ["ovitijemu", -1, 93],
- ["astijemu", -1, 94],
- ["avijemu", -1, 77],
- ["evijemu", -1, 78],
- ["ivijemu", -1, 79],
- ["ovijemu", -1, 80],
- ["o\u0161ijemu", -1, 91],
- ["anjemu", -1, 84],
- ["enjemu", -1, 85],
- ["snjemu", -1, 122],
- ["\u0161njemu", -1, 86],
- ["kemu", -1, 95],
- ["skemu", 1893, 1],
- ["\u0161kemu", 1893, 2],
- ["lemu", -1, 51],
- ["elemu", 1896, 83],
- ["nemu", -1, 13],
- ["anemu", 1898, 10],
- ["enemu", 1898, 87],
- ["snemu", 1898, 159],
- ["\u0161nemu", 1898, 88],
- ["osemu", -1, 123],
- ["atemu", -1, 120],
- ["evitemu", -1, 92],
- ["ovitemu", -1, 93],
- ["astemu", -1, 94],
- ["avemu", -1, 77],
- ["evemu", -1, 78],
- ["ivemu", -1, 79],
- ["ovemu", -1, 80],
- ["a\u0107emu", -1, 14],
- ["e\u0107emu", -1, 15],
- ["u\u0107emu", -1, 16],
- ["o\u0161emu", -1, 91],
- ["acomu", -1, 124],
- ["ecomu", -1, 125],
- ["ucomu", -1, 126],
- ["anjomu", -1, 84],
- ["enjomu", -1, 85],
- ["snjomu", -1, 122],
- ["\u0161njomu", -1, 86],
- ["komu", -1, 95],
- ["skomu", 1923, 1],
- ["\u0161komu", 1923, 2],
- ["elomu", -1, 83],
- ["nomu", -1, 13],
- ["cinomu", 1927, 137],
- ["\u010Dinomu", 1927, 89],
- ["osomu", -1, 123],
- ["atomu", -1, 120],
- ["evitomu", -1, 92],
- ["ovitomu", -1, 93],
- ["astomu", -1, 94],
- ["avomu", -1, 77],
- ["evomu", -1, 78],
- ["ivomu", -1, 79],
- ["ovomu", -1, 80],
- ["a\u0107omu", -1, 14],
- ["e\u0107omu", -1, 15],
- ["u\u0107omu", -1, 16],
- ["o\u0161omu", -1, 91],
- ["nu", -1, 13],
- ["anu", 1943, 10],
- ["astanu", 1944, 110],
- ["istanu", 1944, 111],
- ["ostanu", 1944, 112],
- ["inu", 1943, 11],
- ["cinu", 1948, 137],
- ["aninu", 1948, 10],
- ["\u010Dinu", 1948, 89],
- ["onu", 1943, 12],
- ["aru", -1, 53],
- ["dru", -1, 54],
- ["eru", -1, 55],
- ["oru", -1, 56],
- ["basu", -1, 135],
- ["gasu", -1, 131],
- ["jasu", -1, 129],
- ["kasu", -1, 133],
- ["nasu", -1, 132],
- ["tasu", -1, 130],
- ["vasu", -1, 134],
- ["esu", -1, 57],
- ["isu", -1, 58],
- ["osu", -1, 123],
- ["atu", -1, 120],
- ["ikatu", 1967, 68],
- ["latu", 1967, 69],
- ["etu", -1, 70],
- ["evitu", -1, 92],
- ["ovitu", -1, 93],
- ["astu", -1, 94],
- ["estu", -1, 71],
- ["istu", -1, 72],
- ["kstu", -1, 73],
- ["ostu", -1, 74],
- ["i\u0161tu", -1, 75],
- ["avu", -1, 77],
- ["evu", -1, 78],
- ["ivu", -1, 79],
- ["ovu", -1, 80],
- ["lovu", 1982, 82],
- ["movu", 1982, 81],
- ["stvu", -1, 3],
- ["\u0161tvu", -1, 4],
- ["ba\u0161u", -1, 63],
- ["ga\u0161u", -1, 64],
- ["ja\u0161u", -1, 61],
- ["ka\u0161u", -1, 62],
- ["na\u0161u", -1, 60],
- ["ta\u0161u", -1, 59],
- ["va\u0161u", -1, 65],
- ["e\u0161u", -1, 66],
- ["i\u0161u", -1, 67],
- ["o\u0161u", -1, 91],
- ["avav", -1, 97],
- ["evav", -1, 96],
- ["ivav", -1, 98],
- ["uvav", -1, 99],
- ["kov", -1, 95],
- ["a\u0161", -1, 104],
- ["ira\u0161", 2002, 100],
- ["ura\u0161", 2002, 105],
- ["ta\u0161", 2002, 113],
- ["ava\u0161", 2002, 97],
- ["eva\u0161", 2002, 96],
- ["iva\u0161", 2002, 98],
- ["uva\u0161", 2002, 99],
- ["a\u010Da\u0161", 2002, 102],
- ["e\u0161", -1, 119],
- ["astade\u0161", 2011, 110],
- ["istade\u0161", 2011, 111],
- ["ostade\u0161", 2011, 112],
- ["astaje\u0161", 2011, 106],
- ["istaje\u0161", 2011, 107],
- ["ostaje\u0161", 2011, 108],
- ["ije\u0161", 2011, 116],
- ["inje\u0161", 2011, 114],
- ["uje\u0161", 2011, 25],
- ["iruje\u0161", 2020, 100],
- ["lu\u010Duje\u0161", 2020, 117],
- ["ne\u0161", 2011, 13],
- ["astane\u0161", 2023, 110],
- ["istane\u0161", 2023, 111],
- ["ostane\u0161", 2023, 112],
- ["ete\u0161", 2011, 70],
- ["aste\u0161", 2011, 115],
- ["i\u0161", -1, 116],
- ["ni\u0161", 2029, 103],
- ["jeti\u0161", 2029, 118],
- ["a\u010Di\u0161", 2029, 101],
- ["lu\u010Di\u0161", 2029, 117],
- ["ro\u0161i\u0161", 2029, 90]
- ];
-
- /** @const */ var a_3 = [
- ["a", -1, 1],
- ["oga", 0, 1],
- ["ama", 0, 1],
- ["ima", 0, 1],
- ["ena", 0, 1],
- ["e", -1, 1],
- ["og", -1, 1],
- ["anog", 6, 1],
- ["enog", 6, 1],
- ["anih", -1, 1],
- ["enih", -1, 1],
- ["i", -1, 1],
- ["ani", 11, 1],
- ["eni", 11, 1],
- ["anoj", -1, 1],
- ["enoj", -1, 1],
- ["anim", -1, 1],
- ["enim", -1, 1],
- ["om", -1, 1],
- ["enom", 18, 1],
- ["o", -1, 1],
- ["ano", 20, 1],
- ["eno", 20, 1],
- ["ost", -1, 1],
- ["u", -1, 1],
- ["enu", 24, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16];
-
- /** @const */ var /** Array */ g_sa = [65, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 128];
-
- /** @const */ var /** Array */ g_ca = [119, 95, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 16];
-
- /** @const */ var /** Array */ g_rg = [1];
-
- var /** number */ I_p1 = 0;
- var /** boolean */ B_no_diacritics = false;
-
-
- /** @return {boolean} */
- function r_cyr_to_lat() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab3: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- break lab3;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("b"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("v"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("g"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("d"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u0111"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u017E"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("z"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("j"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("k"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("l"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("lj"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("m"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("n"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("nj"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("o"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("p"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("r"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("s"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("\u0107"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("f"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("h"))
- {
- return false;
- }
- break;
- case 27:
- if (!base.slice_from("c"))
- {
- return false;
- }
- break;
- case 28:
- if (!base.slice_from("\u010D"))
- {
- return false;
- }
- break;
- case 29:
- if (!base.slice_from("d\u017E"))
- {
- return false;
- }
- break;
- case 30:
- if (!base.slice_from("\u0161"))
- {
- return false;
- }
- break;
- }
- base.cursor = v_3;
- break golab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab3: {
- if (!(base.in_grouping(g_ca, 98, 382)))
- {
- break lab3;
- }
- base.bra = base.cursor;
- if (!(base.eq_s("ije")))
- {
- break lab3;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_ca, 98, 382)))
- {
- break lab3;
- }
- if (!base.slice_from("e"))
- {
- return false;
- }
- base.cursor = v_3;
- break golab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- var /** number */ v_4 = base.cursor;
- lab4: {
- while(true)
- {
- var /** number */ v_5 = base.cursor;
- lab5: {
- golab6: while(true)
- {
- var /** number */ v_6 = base.cursor;
- lab7: {
- if (!(base.in_grouping(g_ca, 98, 382)))
- {
- break lab7;
- }
- base.bra = base.cursor;
- if (!(base.eq_s("je")))
- {
- break lab7;
- }
- base.ket = base.cursor;
- if (!(base.in_grouping(g_ca, 98, 382)))
- {
- break lab7;
- }
- if (!base.slice_from("e"))
- {
- return false;
- }
- base.cursor = v_6;
- break golab6;
- }
- base.cursor = v_6;
- if (base.cursor >= base.limit)
- {
- break lab5;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_5;
- break;
- }
- }
- base.cursor = v_4;
- var /** number */ v_7 = base.cursor;
- lab8: {
- while(true)
- {
- var /** number */ v_8 = base.cursor;
- lab9: {
- golab10: while(true)
- {
- var /** number */ v_9 = base.cursor;
- lab11: {
- base.bra = base.cursor;
- if (!(base.eq_s("dj")))
- {
- break lab11;
- }
- base.ket = base.cursor;
- if (!base.slice_from("\u0111"))
- {
- return false;
- }
- base.cursor = v_9;
- break golab10;
- }
- base.cursor = v_9;
- if (base.cursor >= base.limit)
- {
- break lab9;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_8;
- break;
- }
- }
- base.cursor = v_7;
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- B_no_diacritics = true;
- var /** number */ v_1 = base.cursor;
- lab0: {
- golab1: while(true)
- {
- lab2: {
- if (!(base.in_grouping(g_sa, 263, 382)))
- {
- break lab2;
- }
- break golab1;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- B_no_diacritics = false;
- }
- base.cursor = v_1;
- I_p1 = base.limit;
- var /** number */ v_3 = base.cursor;
- lab3: {
- golab4: while(true)
- {
- lab5: {
- if (!(base.in_grouping(g_v, 97, 117)))
- {
- break lab5;
- }
- break golab4;
- }
- if (base.cursor >= base.limit)
- {
- break lab3;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- if (I_p1 >= 2)
- {
- break lab3;
- }
- golab6: while(true)
- {
- lab7: {
- if (!(base.out_grouping(g_v, 97, 117)))
- {
- break lab7;
- }
- break golab6;
- }
- if (base.cursor >= base.limit)
- {
- break lab3;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- }
- base.cursor = v_3;
- var /** number */ v_6 = base.cursor;
- lab8: {
- golab9: while(true)
- {
- lab10: {
- if (!(base.eq_s("r")))
- {
- break lab10;
- }
- break golab9;
- }
- if (base.cursor >= base.limit)
- {
- break lab8;
- }
- base.cursor++;
- }
- lab11: {
- var /** number */ v_8 = base.cursor;
- lab12: {
- if (base.cursor < 2)
- {
- break lab12;
- }
- break lab11;
- }
- base.cursor = v_8;
- golab13: while(true)
- {
- lab14: {
- if (!(base.out_grouping(g_rg, 114, 114)))
- {
- break lab14;
- }
- break golab13;
- }
- if (base.cursor >= base.limit)
- {
- break lab8;
- }
- base.cursor++;
- }
- }
- if ((I_p1 - base.cursor) <= 1)
- {
- break lab8;
- }
- I_p1 = base.cursor;
- }
- base.cursor = v_6;
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_Step_1() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_1);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("loga"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("peh"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("vojka"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("bojka"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("jak"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u010Dajni"))
- {
- return false;
- }
- break;
- case 7:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("cajni"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("erni"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("larni"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("esni"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("anjca"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("ajca"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("ljca"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("ejca"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("ojca"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("ajka"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("ojka"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("\u0161ca"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("ing"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("tvenik"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("tetika"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("nstva"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("nik"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("tik"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("zik"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("snik"))
- {
- return false;
- }
- break;
- case 27:
- if (!base.slice_from("kusi"))
- {
- return false;
- }
- break;
- case 28:
- if (!base.slice_from("kusni"))
- {
- return false;
- }
- break;
- case 29:
- if (!base.slice_from("kustva"))
- {
- return false;
- }
- break;
- case 30:
- if (!base.slice_from("du\u0161ni"))
- {
- return false;
- }
- break;
- case 31:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("dusni"))
- {
- return false;
- }
- break;
- case 32:
- if (!base.slice_from("antni"))
- {
- return false;
- }
- break;
- case 33:
- if (!base.slice_from("bilni"))
- {
- return false;
- }
- break;
- case 34:
- if (!base.slice_from("tilni"))
- {
- return false;
- }
- break;
- case 35:
- if (!base.slice_from("avilni"))
- {
- return false;
- }
- break;
- case 36:
- if (!base.slice_from("silni"))
- {
- return false;
- }
- break;
- case 37:
- if (!base.slice_from("gilni"))
- {
- return false;
- }
- break;
- case 38:
- if (!base.slice_from("rilni"))
- {
- return false;
- }
- break;
- case 39:
- if (!base.slice_from("nilni"))
- {
- return false;
- }
- break;
- case 40:
- if (!base.slice_from("alni"))
- {
- return false;
- }
- break;
- case 41:
- if (!base.slice_from("ozni"))
- {
- return false;
- }
- break;
- case 42:
- if (!base.slice_from("ravi"))
- {
- return false;
- }
- break;
- case 43:
- if (!base.slice_from("stavni"))
- {
- return false;
- }
- break;
- case 44:
- if (!base.slice_from("pravni"))
- {
- return false;
- }
- break;
- case 45:
- if (!base.slice_from("tivni"))
- {
- return false;
- }
- break;
- case 46:
- if (!base.slice_from("sivni"))
- {
- return false;
- }
- break;
- case 47:
- if (!base.slice_from("atni"))
- {
- return false;
- }
- break;
- case 48:
- if (!base.slice_from("enta"))
- {
- return false;
- }
- break;
- case 49:
- if (!base.slice_from("tetni"))
- {
- return false;
- }
- break;
- case 50:
- if (!base.slice_from("pletni"))
- {
- return false;
- }
- break;
- case 51:
- if (!base.slice_from("\u0161avi"))
- {
- return false;
- }
- break;
- case 52:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("savi"))
- {
- return false;
- }
- break;
- case 53:
- if (!base.slice_from("anta"))
- {
- return false;
- }
- break;
- case 54:
- if (!base.slice_from("a\u010Dka"))
- {
- return false;
- }
- break;
- case 55:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("acka"))
- {
- return false;
- }
- break;
- case 56:
- if (!base.slice_from("u\u0161ka"))
- {
- return false;
- }
- break;
- case 57:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("uska"))
- {
- return false;
- }
- break;
- case 58:
- if (!base.slice_from("atka"))
- {
- return false;
- }
- break;
- case 59:
- if (!base.slice_from("etka"))
- {
- return false;
- }
- break;
- case 60:
- if (!base.slice_from("itka"))
- {
- return false;
- }
- break;
- case 61:
- if (!base.slice_from("otka"))
- {
- return false;
- }
- break;
- case 62:
- if (!base.slice_from("utka"))
- {
- return false;
- }
- break;
- case 63:
- if (!base.slice_from("eskna"))
- {
- return false;
- }
- break;
- case 64:
- if (!base.slice_from("ti\u010Dni"))
- {
- return false;
- }
- break;
- case 65:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ticni"))
- {
- return false;
- }
- break;
- case 66:
- if (!base.slice_from("ojska"))
- {
- return false;
- }
- break;
- case 67:
- if (!base.slice_from("esma"))
- {
- return false;
- }
- break;
- case 68:
- if (!base.slice_from("metra"))
- {
- return false;
- }
- break;
- case 69:
- if (!base.slice_from("centra"))
- {
- return false;
- }
- break;
- case 70:
- if (!base.slice_from("istra"))
- {
- return false;
- }
- break;
- case 71:
- if (!base.slice_from("osti"))
- {
- return false;
- }
- break;
- case 72:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("osti"))
- {
- return false;
- }
- break;
- case 73:
- if (!base.slice_from("dba"))
- {
- return false;
- }
- break;
- case 74:
- if (!base.slice_from("\u010Dka"))
- {
- return false;
- }
- break;
- case 75:
- if (!base.slice_from("mca"))
- {
- return false;
- }
- break;
- case 76:
- if (!base.slice_from("nca"))
- {
- return false;
- }
- break;
- case 77:
- if (!base.slice_from("voljni"))
- {
- return false;
- }
- break;
- case 78:
- if (!base.slice_from("anki"))
- {
- return false;
- }
- break;
- case 79:
- if (!base.slice_from("vca"))
- {
- return false;
- }
- break;
- case 80:
- if (!base.slice_from("sca"))
- {
- return false;
- }
- break;
- case 81:
- if (!base.slice_from("rca"))
- {
- return false;
- }
- break;
- case 82:
- if (!base.slice_from("alca"))
- {
- return false;
- }
- break;
- case 83:
- if (!base.slice_from("elca"))
- {
- return false;
- }
- break;
- case 84:
- if (!base.slice_from("olca"))
- {
- return false;
- }
- break;
- case 85:
- if (!base.slice_from("njca"))
- {
- return false;
- }
- break;
- case 86:
- if (!base.slice_from("ekta"))
- {
- return false;
- }
- break;
- case 87:
- if (!base.slice_from("izma"))
- {
- return false;
- }
- break;
- case 88:
- if (!base.slice_from("jebi"))
- {
- return false;
- }
- break;
- case 89:
- if (!base.slice_from("baci"))
- {
- return false;
- }
- break;
- case 90:
- if (!base.slice_from("a\u0161ni"))
- {
- return false;
- }
- break;
- case 91:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("asni"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_2() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- if (!base.slice_from("sk"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u0161k"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("stv"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u0161tv"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("tanij"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("manij"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("panij"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("ranij"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("ganij"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("an"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("in"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("on"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("n"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("a\u0107"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("e\u0107"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("u\u0107"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("ugov"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("ug"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("g"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("rari"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("oti"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("si"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("li"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("uj"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("caj"))
- {
- return false;
- }
- break;
- case 27:
- if (!base.slice_from("\u010Daj"))
- {
- return false;
- }
- break;
- case 28:
- if (!base.slice_from("\u0107aj"))
- {
- return false;
- }
- break;
- case 29:
- if (!base.slice_from("\u0111aj"))
- {
- return false;
- }
- break;
- case 30:
- if (!base.slice_from("laj"))
- {
- return false;
- }
- break;
- case 31:
- if (!base.slice_from("raj"))
- {
- return false;
- }
- break;
- case 32:
- if (!base.slice_from("bij"))
- {
- return false;
- }
- break;
- case 33:
- if (!base.slice_from("cij"))
- {
- return false;
- }
- break;
- case 34:
- if (!base.slice_from("dij"))
- {
- return false;
- }
- break;
- case 35:
- if (!base.slice_from("lij"))
- {
- return false;
- }
- break;
- case 36:
- if (!base.slice_from("nij"))
- {
- return false;
- }
- break;
- case 37:
- if (!base.slice_from("mij"))
- {
- return false;
- }
- break;
- case 38:
- if (!base.slice_from("\u017Eij"))
- {
- return false;
- }
- break;
- case 39:
- if (!base.slice_from("gij"))
- {
- return false;
- }
- break;
- case 40:
- if (!base.slice_from("fij"))
- {
- return false;
- }
- break;
- case 41:
- if (!base.slice_from("pij"))
- {
- return false;
- }
- break;
- case 42:
- if (!base.slice_from("rij"))
- {
- return false;
- }
- break;
- case 43:
- if (!base.slice_from("sij"))
- {
- return false;
- }
- break;
- case 44:
- if (!base.slice_from("tij"))
- {
- return false;
- }
- break;
- case 45:
- if (!base.slice_from("zij"))
- {
- return false;
- }
- break;
- case 46:
- if (!base.slice_from("nal"))
- {
- return false;
- }
- break;
- case 47:
- if (!base.slice_from("ijal"))
- {
- return false;
- }
- break;
- case 48:
- if (!base.slice_from("ozil"))
- {
- return false;
- }
- break;
- case 49:
- if (!base.slice_from("olov"))
- {
- return false;
- }
- break;
- case 50:
- if (!base.slice_from("ol"))
- {
- return false;
- }
- break;
- case 51:
- if (!base.slice_from("lem"))
- {
- return false;
- }
- break;
- case 52:
- if (!base.slice_from("ram"))
- {
- return false;
- }
- break;
- case 53:
- if (!base.slice_from("ar"))
- {
- return false;
- }
- break;
- case 54:
- if (!base.slice_from("dr"))
- {
- return false;
- }
- break;
- case 55:
- if (!base.slice_from("er"))
- {
- return false;
- }
- break;
- case 56:
- if (!base.slice_from("or"))
- {
- return false;
- }
- break;
- case 57:
- if (!base.slice_from("es"))
- {
- return false;
- }
- break;
- case 58:
- if (!base.slice_from("is"))
- {
- return false;
- }
- break;
- case 59:
- if (!base.slice_from("ta\u0161"))
- {
- return false;
- }
- break;
- case 60:
- if (!base.slice_from("na\u0161"))
- {
- return false;
- }
- break;
- case 61:
- if (!base.slice_from("ja\u0161"))
- {
- return false;
- }
- break;
- case 62:
- if (!base.slice_from("ka\u0161"))
- {
- return false;
- }
- break;
- case 63:
- if (!base.slice_from("ba\u0161"))
- {
- return false;
- }
- break;
- case 64:
- if (!base.slice_from("ga\u0161"))
- {
- return false;
- }
- break;
- case 65:
- if (!base.slice_from("va\u0161"))
- {
- return false;
- }
- break;
- case 66:
- if (!base.slice_from("e\u0161"))
- {
- return false;
- }
- break;
- case 67:
- if (!base.slice_from("i\u0161"))
- {
- return false;
- }
- break;
- case 68:
- if (!base.slice_from("ikat"))
- {
- return false;
- }
- break;
- case 69:
- if (!base.slice_from("lat"))
- {
- return false;
- }
- break;
- case 70:
- if (!base.slice_from("et"))
- {
- return false;
- }
- break;
- case 71:
- if (!base.slice_from("est"))
- {
- return false;
- }
- break;
- case 72:
- if (!base.slice_from("ist"))
- {
- return false;
- }
- break;
- case 73:
- if (!base.slice_from("kst"))
- {
- return false;
- }
- break;
- case 74:
- if (!base.slice_from("ost"))
- {
- return false;
- }
- break;
- case 75:
- if (!base.slice_from("i\u0161t"))
- {
- return false;
- }
- break;
- case 76:
- if (!base.slice_from("ova"))
- {
- return false;
- }
- break;
- case 77:
- if (!base.slice_from("av"))
- {
- return false;
- }
- break;
- case 78:
- if (!base.slice_from("ev"))
- {
- return false;
- }
- break;
- case 79:
- if (!base.slice_from("iv"))
- {
- return false;
- }
- break;
- case 80:
- if (!base.slice_from("ov"))
- {
- return false;
- }
- break;
- case 81:
- if (!base.slice_from("mov"))
- {
- return false;
- }
- break;
- case 82:
- if (!base.slice_from("lov"))
- {
- return false;
- }
- break;
- case 83:
- if (!base.slice_from("el"))
- {
- return false;
- }
- break;
- case 84:
- if (!base.slice_from("anj"))
- {
- return false;
- }
- break;
- case 85:
- if (!base.slice_from("enj"))
- {
- return false;
- }
- break;
- case 86:
- if (!base.slice_from("\u0161nj"))
- {
- return false;
- }
- break;
- case 87:
- if (!base.slice_from("en"))
- {
- return false;
- }
- break;
- case 88:
- if (!base.slice_from("\u0161n"))
- {
- return false;
- }
- break;
- case 89:
- if (!base.slice_from("\u010Din"))
- {
- return false;
- }
- break;
- case 90:
- if (!base.slice_from("ro\u0161i"))
- {
- return false;
- }
- break;
- case 91:
- if (!base.slice_from("o\u0161"))
- {
- return false;
- }
- break;
- case 92:
- if (!base.slice_from("evit"))
- {
- return false;
- }
- break;
- case 93:
- if (!base.slice_from("ovit"))
- {
- return false;
- }
- break;
- case 94:
- if (!base.slice_from("ast"))
- {
- return false;
- }
- break;
- case 95:
- if (!base.slice_from("k"))
- {
- return false;
- }
- break;
- case 96:
- if (!base.slice_from("eva"))
- {
- return false;
- }
- break;
- case 97:
- if (!base.slice_from("ava"))
- {
- return false;
- }
- break;
- case 98:
- if (!base.slice_from("iva"))
- {
- return false;
- }
- break;
- case 99:
- if (!base.slice_from("uva"))
- {
- return false;
- }
- break;
- case 100:
- if (!base.slice_from("ir"))
- {
- return false;
- }
- break;
- case 101:
- if (!base.slice_from("a\u010D"))
- {
- return false;
- }
- break;
- case 102:
- if (!base.slice_from("a\u010Da"))
- {
- return false;
- }
- break;
- case 103:
- if (!base.slice_from("ni"))
- {
- return false;
- }
- break;
- case 104:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 105:
- if (!base.slice_from("ur"))
- {
- return false;
- }
- break;
- case 106:
- if (!base.slice_from("astaj"))
- {
- return false;
- }
- break;
- case 107:
- if (!base.slice_from("istaj"))
- {
- return false;
- }
- break;
- case 108:
- if (!base.slice_from("ostaj"))
- {
- return false;
- }
- break;
- case 109:
- if (!base.slice_from("aj"))
- {
- return false;
- }
- break;
- case 110:
- if (!base.slice_from("asta"))
- {
- return false;
- }
- break;
- case 111:
- if (!base.slice_from("ista"))
- {
- return false;
- }
- break;
- case 112:
- if (!base.slice_from("osta"))
- {
- return false;
- }
- break;
- case 113:
- if (!base.slice_from("ta"))
- {
- return false;
- }
- break;
- case 114:
- if (!base.slice_from("inj"))
- {
- return false;
- }
- break;
- case 115:
- if (!base.slice_from("as"))
- {
- return false;
- }
- break;
- case 116:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 117:
- if (!base.slice_from("lu\u010D"))
- {
- return false;
- }
- break;
- case 118:
- if (!base.slice_from("jeti"))
- {
- return false;
- }
- break;
- case 119:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 120:
- if (!base.slice_from("at"))
- {
- return false;
- }
- break;
- case 121:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("luc"))
- {
- return false;
- }
- break;
- case 122:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("snj"))
- {
- return false;
- }
- break;
- case 123:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("os"))
- {
- return false;
- }
- break;
- case 124:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ac"))
- {
- return false;
- }
- break;
- case 125:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ec"))
- {
- return false;
- }
- break;
- case 126:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("uc"))
- {
- return false;
- }
- break;
- case 127:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("rosi"))
- {
- return false;
- }
- break;
- case 128:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("aca"))
- {
- return false;
- }
- break;
- case 129:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("jas"))
- {
- return false;
- }
- break;
- case 130:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("tas"))
- {
- return false;
- }
- break;
- case 131:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("gas"))
- {
- return false;
- }
- break;
- case 132:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("nas"))
- {
- return false;
- }
- break;
- case 133:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("kas"))
- {
- return false;
- }
- break;
- case 134:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("vas"))
- {
- return false;
- }
- break;
- case 135:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("bas"))
- {
- return false;
- }
- break;
- case 136:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("as"))
- {
- return false;
- }
- break;
- case 137:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("cin"))
- {
- return false;
- }
- break;
- case 138:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("astaj"))
- {
- return false;
- }
- break;
- case 139:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("istaj"))
- {
- return false;
- }
- break;
- case 140:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ostaj"))
- {
- return false;
- }
- break;
- case 141:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("asta"))
- {
- return false;
- }
- break;
- case 142:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ista"))
- {
- return false;
- }
- break;
- case 143:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("osta"))
- {
- return false;
- }
- break;
- case 144:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ava"))
- {
- return false;
- }
- break;
- case 145:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("eva"))
- {
- return false;
- }
- break;
- case 146:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("iva"))
- {
- return false;
- }
- break;
- case 147:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("uva"))
- {
- return false;
- }
- break;
- case 148:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ova"))
- {
- return false;
- }
- break;
- case 149:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("jeti"))
- {
- return false;
- }
- break;
- case 150:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("inj"))
- {
- return false;
- }
- break;
- case 151:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ist"))
- {
- return false;
- }
- break;
- case 152:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("es"))
- {
- return false;
- }
- break;
- case 153:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("et"))
- {
- return false;
- }
- break;
- case 154:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("is"))
- {
- return false;
- }
- break;
- case 155:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ir"))
- {
- return false;
- }
- break;
- case 156:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ur"))
- {
- return false;
- }
- break;
- case 157:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("uj"))
- {
- return false;
- }
- break;
- case 158:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ni"))
- {
- return false;
- }
- break;
- case 159:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("sn"))
- {
- return false;
- }
- break;
- case 160:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("ta"))
- {
- return false;
- }
- break;
- case 161:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 162:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 163:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 164:
- if (!B_no_diacritics)
- {
- return false;
- }
- if (!base.slice_from("n"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_Step_3() {
- base.ket = base.cursor;
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_from(""))
- {
- return false;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_cyr_to_lat();
- r_prelude();
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_4 = base.limit - base.cursor;
- r_Step_1();
- base.cursor = base.limit - v_4;
- var /** number */ v_5 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab2: {
- if (!r_Step_2())
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_6;
- if (!r_Step_3())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['SerbianStemmer'] = SerbianStemmer;
diff --git a/js/spanish-stemmer.js b/js/spanish-stemmer.js
deleted file mode 100644
index 9137989..0000000
--- a/js/spanish-stemmer.js
+++ /dev/null
@@ -1,976 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var SpanishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["", -1, 6],
- ["\u00E1", 0, 1],
- ["\u00E9", 0, 2],
- ["\u00ED", 0, 3],
- ["\u00F3", 0, 4],
- ["\u00FA", 0, 5]
- ];
-
- /** @const */ var a_1 = [
- ["la", -1, -1],
- ["sela", 0, -1],
- ["le", -1, -1],
- ["me", -1, -1],
- ["se", -1, -1],
- ["lo", -1, -1],
- ["selo", 5, -1],
- ["las", -1, -1],
- ["selas", 7, -1],
- ["les", -1, -1],
- ["los", -1, -1],
- ["selos", 10, -1],
- ["nos", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ando", -1, 6],
- ["iendo", -1, 6],
- ["yendo", -1, 7],
- ["\u00E1ndo", -1, 2],
- ["i\u00E9ndo", -1, 1],
- ["ar", -1, 6],
- ["er", -1, 6],
- ["ir", -1, 6],
- ["\u00E1r", -1, 3],
- ["\u00E9r", -1, 4],
- ["\u00EDr", -1, 5]
- ];
-
- /** @const */ var a_3 = [
- ["ic", -1, -1],
- ["ad", -1, -1],
- ["os", -1, -1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_4 = [
- ["able", -1, 1],
- ["ible", -1, 1],
- ["ante", -1, 1]
- ];
-
- /** @const */ var a_5 = [
- ["ic", -1, 1],
- ["abil", -1, 1],
- ["iv", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["ica", -1, 1],
- ["ancia", -1, 2],
- ["encia", -1, 5],
- ["adora", -1, 2],
- ["osa", -1, 1],
- ["ista", -1, 1],
- ["iva", -1, 9],
- ["anza", -1, 1],
- ["log\u00EDa", -1, 3],
- ["idad", -1, 8],
- ["able", -1, 1],
- ["ible", -1, 1],
- ["ante", -1, 2],
- ["mente", -1, 7],
- ["amente", 13, 6],
- ["aci\u00F3n", -1, 2],
- ["uci\u00F3n", -1, 4],
- ["ico", -1, 1],
- ["ismo", -1, 1],
- ["oso", -1, 1],
- ["amiento", -1, 1],
- ["imiento", -1, 1],
- ["ivo", -1, 9],
- ["ador", -1, 2],
- ["icas", -1, 1],
- ["ancias", -1, 2],
- ["encias", -1, 5],
- ["adoras", -1, 2],
- ["osas", -1, 1],
- ["istas", -1, 1],
- ["ivas", -1, 9],
- ["anzas", -1, 1],
- ["log\u00EDas", -1, 3],
- ["idades", -1, 8],
- ["ables", -1, 1],
- ["ibles", -1, 1],
- ["aciones", -1, 2],
- ["uciones", -1, 4],
- ["adores", -1, 2],
- ["antes", -1, 2],
- ["icos", -1, 1],
- ["ismos", -1, 1],
- ["osos", -1, 1],
- ["amientos", -1, 1],
- ["imientos", -1, 1],
- ["ivos", -1, 9]
- ];
-
- /** @const */ var a_7 = [
- ["ya", -1, 1],
- ["ye", -1, 1],
- ["yan", -1, 1],
- ["yen", -1, 1],
- ["yeron", -1, 1],
- ["yendo", -1, 1],
- ["yo", -1, 1],
- ["yas", -1, 1],
- ["yes", -1, 1],
- ["yais", -1, 1],
- ["yamos", -1, 1],
- ["y\u00F3", -1, 1]
- ];
-
- /** @const */ var a_8 = [
- ["aba", -1, 2],
- ["ada", -1, 2],
- ["ida", -1, 2],
- ["ara", -1, 2],
- ["iera", -1, 2],
- ["\u00EDa", -1, 2],
- ["ar\u00EDa", 5, 2],
- ["er\u00EDa", 5, 2],
- ["ir\u00EDa", 5, 2],
- ["ad", -1, 2],
- ["ed", -1, 2],
- ["id", -1, 2],
- ["ase", -1, 2],
- ["iese", -1, 2],
- ["aste", -1, 2],
- ["iste", -1, 2],
- ["an", -1, 2],
- ["aban", 16, 2],
- ["aran", 16, 2],
- ["ieran", 16, 2],
- ["\u00EDan", 16, 2],
- ["ar\u00EDan", 20, 2],
- ["er\u00EDan", 20, 2],
- ["ir\u00EDan", 20, 2],
- ["en", -1, 1],
- ["asen", 24, 2],
- ["iesen", 24, 2],
- ["aron", -1, 2],
- ["ieron", -1, 2],
- ["ar\u00E1n", -1, 2],
- ["er\u00E1n", -1, 2],
- ["ir\u00E1n", -1, 2],
- ["ado", -1, 2],
- ["ido", -1, 2],
- ["ando", -1, 2],
- ["iendo", -1, 2],
- ["ar", -1, 2],
- ["er", -1, 2],
- ["ir", -1, 2],
- ["as", -1, 2],
- ["abas", 39, 2],
- ["adas", 39, 2],
- ["idas", 39, 2],
- ["aras", 39, 2],
- ["ieras", 39, 2],
- ["\u00EDas", 39, 2],
- ["ar\u00EDas", 45, 2],
- ["er\u00EDas", 45, 2],
- ["ir\u00EDas", 45, 2],
- ["es", -1, 1],
- ["ases", 49, 2],
- ["ieses", 49, 2],
- ["abais", -1, 2],
- ["arais", -1, 2],
- ["ierais", -1, 2],
- ["\u00EDais", -1, 2],
- ["ar\u00EDais", 55, 2],
- ["er\u00EDais", 55, 2],
- ["ir\u00EDais", 55, 2],
- ["aseis", -1, 2],
- ["ieseis", -1, 2],
- ["asteis", -1, 2],
- ["isteis", -1, 2],
- ["\u00E1is", -1, 2],
- ["\u00E9is", -1, 1],
- ["ar\u00E9is", 64, 2],
- ["er\u00E9is", 64, 2],
- ["ir\u00E9is", 64, 2],
- ["ados", -1, 2],
- ["idos", -1, 2],
- ["amos", -1, 2],
- ["\u00E1bamos", 70, 2],
- ["\u00E1ramos", 70, 2],
- ["i\u00E9ramos", 70, 2],
- ["\u00EDamos", 70, 2],
- ["ar\u00EDamos", 74, 2],
- ["er\u00EDamos", 74, 2],
- ["ir\u00EDamos", 74, 2],
- ["emos", -1, 1],
- ["aremos", 78, 2],
- ["eremos", 78, 2],
- ["iremos", 78, 2],
- ["\u00E1semos", 78, 2],
- ["i\u00E9semos", 78, 2],
- ["imos", -1, 2],
- ["ar\u00E1s", -1, 2],
- ["er\u00E1s", -1, 2],
- ["ir\u00E1s", -1, 2],
- ["\u00EDs", -1, 2],
- ["ar\u00E1", -1, 2],
- ["er\u00E1", -1, 2],
- ["ir\u00E1", -1, 2],
- ["ar\u00E9", -1, 2],
- ["er\u00E9", -1, 2],
- ["ir\u00E9", -1, 2],
- ["i\u00F3", -1, 2]
- ];
-
- /** @const */ var a_9 = [
- ["a", -1, 1],
- ["e", -1, 2],
- ["o", -1, 1],
- ["os", -1, 1],
- ["\u00E1", -1, 1],
- ["\u00E9", -1, 2],
- ["\u00ED", -1, 1],
- ["\u00F3", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10];
-
- var /** number */ I_p2 = 0;
- var /** number */ I_p1 = 0;
- var /** number */ I_pV = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_pV = base.limit;
- I_p1 = base.limit;
- I_p2 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_2 = base.cursor;
- lab2: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab2;
- }
- lab3: {
- var /** number */ v_3 = base.cursor;
- lab4: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab4;
- }
- golab5: while(true)
- {
- lab6: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab6;
- }
- break golab5;
- }
- if (base.cursor >= base.limit)
- {
- break lab4;
- }
- base.cursor++;
- }
- break lab3;
- }
- base.cursor = v_3;
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab2;
- }
- golab7: while(true)
- {
- lab8: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab8;
- }
- break golab7;
- }
- if (base.cursor >= base.limit)
- {
- break lab2;
- }
- base.cursor++;
- }
- }
- break lab1;
- }
- base.cursor = v_2;
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab0;
- }
- lab9: {
- var /** number */ v_6 = base.cursor;
- lab10: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab10;
- }
- golab11: while(true)
- {
- lab12: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab12;
- }
- break golab11;
- }
- if (base.cursor >= base.limit)
- {
- break lab10;
- }
- base.cursor++;
- }
- break lab9;
- }
- base.cursor = v_6;
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab0;
- }
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- }
- }
- I_pV = base.cursor;
- }
- base.cursor = v_1;
- var /** number */ v_8 = base.cursor;
- lab13: {
- golab14: while(true)
- {
- lab15: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab15;
- }
- break golab14;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab16: while(true)
- {
- lab17: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab17;
- }
- break golab16;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- golab18: while(true)
- {
- lab19: {
- if (!(base.in_grouping(g_v, 97, 252)))
- {
- break lab19;
- }
- break golab18;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- golab20: while(true)
- {
- lab21: {
- if (!(base.out_grouping(g_v, 97, 252)))
- {
- break lab21;
- }
- break golab20;
- }
- if (base.cursor >= base.limit)
- {
- break lab13;
- }
- base.cursor++;
- }
- I_p2 = base.cursor;
- }
- base.cursor = v_8;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- var /** number */ among_var;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("a"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("e"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("i"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("o"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 6:
- if (base.cursor >= base.limit)
- {
- break lab0;
- }
- base.cursor++;
- break;
- }
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_RV() {
- return I_pV <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R2() {
- return I_p2 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_attached_pronoun() {
- var /** number */ among_var;
- base.ket = base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- return false;
- }
- if (!r_RV())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- base.bra = base.cursor;
- if (!base.slice_from("iendo"))
- {
- return false;
- }
- break;
- case 2:
- base.bra = base.cursor;
- if (!base.slice_from("ando"))
- {
- return false;
- }
- break;
- case 3:
- base.bra = base.cursor;
- if (!base.slice_from("ar"))
- {
- return false;
- }
- break;
- case 4:
- base.bra = base.cursor;
- if (!base.slice_from("er"))
- {
- return false;
- }
- break;
- case 5:
- base.bra = base.cursor;
- if (!base.slice_from("ir"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 7:
- if (!(base.eq_s_b("u")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("ic")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 3:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("log"))
- {
- return false;
- }
- break;
- case 4:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("u"))
- {
- return false;
- }
- break;
- case 5:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_from("ente"))
- {
- return false;
- }
- break;
- case 6:
- if (!r_R1())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- switch (among_var) {
- case 1:
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_2;
- break lab1;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- break;
- case 7:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- if (base.find_among_b(a_4) == 0)
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_3;
- break lab2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 8:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- base.ket = base.cursor;
- if (base.find_among_b(a_5) == 0)
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_4;
- break lab3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- case 9:
- if (!r_R2())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- lab4: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("at")))
- {
- base.cursor = base.limit - v_5;
- break lab4;
- }
- base.bra = base.cursor;
- if (!r_R2())
- {
- base.cursor = base.limit - v_5;
- break lab4;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_y_verb_suffix() {
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- if (base.find_among_b(a_7) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- if (!(base.eq_s_b("u")))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_verb_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_pV)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_pV;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_8);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("u")))
- {
- base.cursor = base.limit - v_3;
- break lab0;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.eq_s_b("g")))
- {
- base.cursor = base.limit - v_3;
- break lab0;
- }
- base.cursor = base.limit - v_4;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_residual_suffix() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_9);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_RV())
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("u")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.bra = base.cursor;
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.eq_s_b("g")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- base.cursor = base.limit - v_2;
- if (!r_RV())
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_mark_regions();
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_attached_pronoun();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- lab0: {
- lab1: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- if (!r_standard_suffix())
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- lab3: {
- if (!r_y_verb_suffix())
- {
- break lab3;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- if (!r_verb_suffix())
- {
- break lab0;
- }
- }
- }
- base.cursor = base.limit - v_3;
- var /** number */ v_5 = base.limit - base.cursor;
- r_residual_suffix();
- base.cursor = base.limit - v_5;
- base.cursor = base.limit_backward;
- var /** number */ v_6 = base.cursor;
- r_postlude();
- base.cursor = v_6;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['SpanishStemmer'] = SpanishStemmer;
diff --git a/js/swedish-stemmer.js b/js/swedish-stemmer.js
deleted file mode 100644
index 1e636f7..0000000
--- a/js/swedish-stemmer.js
+++ /dev/null
@@ -1,267 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var SwedishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["a", -1, 1],
- ["arna", 0, 1],
- ["erna", 0, 1],
- ["heterna", 2, 1],
- ["orna", 0, 1],
- ["ad", -1, 1],
- ["e", -1, 1],
- ["ade", 6, 1],
- ["ande", 6, 1],
- ["arne", 6, 1],
- ["are", 6, 1],
- ["aste", 6, 1],
- ["en", -1, 1],
- ["anden", 12, 1],
- ["aren", 12, 1],
- ["heten", 12, 1],
- ["ern", -1, 1],
- ["ar", -1, 1],
- ["er", -1, 1],
- ["heter", 18, 1],
- ["or", -1, 1],
- ["s", -1, 2],
- ["as", 21, 1],
- ["arnas", 22, 1],
- ["ernas", 22, 1],
- ["ornas", 22, 1],
- ["es", 21, 1],
- ["ades", 26, 1],
- ["andes", 26, 1],
- ["ens", 21, 1],
- ["arens", 29, 1],
- ["hetens", 29, 1],
- ["erns", 21, 1],
- ["at", -1, 1],
- ["andet", -1, 1],
- ["het", -1, 1],
- ["ast", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["dd", -1, -1],
- ["gd", -1, -1],
- ["nn", -1, -1],
- ["dt", -1, -1],
- ["gt", -1, -1],
- ["kt", -1, -1],
- ["tt", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ig", -1, 1],
- ["lig", 0, 1],
- ["els", -1, 1],
- ["fullt", -1, 3],
- ["l\u00F6st", -1, 2]
- ];
-
- /** @const */ var /** Array */ g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32];
-
- /** @const */ var /** Array */ g_s_ending = [119, 127, 149];
-
- var /** number */ I_x = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- var /** number */ v_1 = base.cursor;
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- return false;
- }
- base.cursor = c1;
- }
- I_x = base.cursor;
- base.cursor = v_1;
- golab0: while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- if (!(base.in_grouping(g_v, 97, 246)))
- {
- break lab1;
- }
- base.cursor = v_2;
- break golab0;
- }
- base.cursor = v_2;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- golab2: while(true)
- {
- lab3: {
- if (!(base.out_grouping(g_v, 97, 246)))
- {
- break lab3;
- }
- break golab2;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- I_p1 = base.cursor;
- lab4: {
- if (I_p1 >= I_x)
- {
- break lab4;
- }
- I_p1 = I_x;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_main_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_0);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- base.limit_backward = v_2;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!(base.in_grouping_b(g_s_ending, 98, 121)))
- {
- return false;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_consonant_pair() {
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- var /** number */ v_3 = base.limit - base.cursor;
- if (base.find_among_b(a_1) == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.cursor = base.limit - v_3;
- base.ket = base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.cursor--;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- /** @return {boolean} */
- function r_other_suffix() {
- var /** number */ among_var;
- if (base.cursor < I_p1)
- {
- return false;
- }
- var /** number */ v_2 = base.limit_backward;
- base.limit_backward = I_p1;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_2);
- if (among_var == 0)
- {
- base.limit_backward = v_2;
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("l\u00F6s"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("full"))
- {
- return false;
- }
- break;
- }
- base.limit_backward = v_2;
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- var /** number */ v_1 = base.cursor;
- r_mark_regions();
- base.cursor = v_1;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_2 = base.limit - base.cursor;
- r_main_suffix();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_consonant_pair();
- base.cursor = base.limit - v_3;
- var /** number */ v_4 = base.limit - base.cursor;
- r_other_suffix();
- base.cursor = base.limit - v_4;
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['SwedishStemmer'] = SwedishStemmer;
diff --git a/js/tamil-stemmer.js b/js/tamil-stemmer.js
deleted file mode 100644
index 2cc4b12..0000000
--- a/js/tamil-stemmer.js
+++ /dev/null
@@ -1,1190 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var TamilStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u0BB5\u0BC1", -1, 3],
- ["\u0BB5\u0BC2", -1, 4],
- ["\u0BB5\u0BCA", -1, 2],
- ["\u0BB5\u0BCB", -1, 1]
- ];
-
- /** @const */ var a_1 = [
- ["\u0B95", -1, -1],
- ["\u0B99", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9E", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BA8", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BAE", -1, -1],
- ["\u0BAF", -1, -1],
- ["\u0BB5", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_4 = [
- ["", -1, 2],
- ["\u0BC8", 0, 1],
- ["\u0BCD", 0, 1]
- ];
-
- /** @const */ var a_5 = [
- ["\u0BA8\u0BCD\u0BA4", -1, 1],
- ["\u0BAF", -1, 1],
- ["\u0BB5", -1, 1],
- ["\u0BA9\u0BC1", -1, 8],
- ["\u0BC1\u0B95\u0BCD", -1, 7],
- ["\u0BC1\u0B95\u0BCD\u0B95\u0BCD", -1, 7],
- ["\u0B9F\u0BCD\u0B95\u0BCD", -1, 3],
- ["\u0BB1\u0BCD\u0B95\u0BCD", -1, 4],
- ["\u0B99\u0BCD", -1, 9],
- ["\u0B9F\u0BCD\u0B9F\u0BCD", -1, 5],
- ["\u0BA4\u0BCD\u0BA4\u0BCD", -1, 6],
- ["\u0BA8\u0BCD\u0BA4\u0BCD", -1, 1],
- ["\u0BA8\u0BCD", -1, 1],
- ["\u0B9F\u0BCD\u0BAA\u0BCD", -1, 3],
- ["\u0BAF\u0BCD", -1, 2],
- ["\u0BA9\u0BCD\u0BB1\u0BCD", -1, 4],
- ["\u0BB5\u0BCD", -1, 1]
- ];
-
- /** @const */ var a_6 = [
- ["\u0B95", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9F", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BB1", -1, -1]
- ];
-
- /** @const */ var a_7 = [
- ["\u0B95", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9F", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BB1", -1, -1]
- ];
-
- /** @const */ var a_8 = [
- ["\u0B9E", -1, -1],
- ["\u0BA3", -1, -1],
- ["\u0BA8", -1, -1],
- ["\u0BA9", -1, -1],
- ["\u0BAE", -1, -1],
- ["\u0BAF", -1, -1],
- ["\u0BB0", -1, -1],
- ["\u0BB2", -1, -1],
- ["\u0BB3", -1, -1],
- ["\u0BB4", -1, -1],
- ["\u0BB5", -1, -1]
- ];
-
- /** @const */ var a_9 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1],
- ["\u0BCD", -1, -1]
- ];
-
- /** @const */ var a_10 = [
- ["\u0B85", -1, -1],
- ["\u0B87", -1, -1],
- ["\u0B89", -1, -1]
- ];
-
- /** @const */ var a_11 = [
- ["\u0B95", -1, -1],
- ["\u0B99", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9E", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BA8", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BAE", -1, -1],
- ["\u0BAF", -1, -1],
- ["\u0BB5", -1, -1]
- ];
-
- /** @const */ var a_12 = [
- ["\u0B95", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9F", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BB1", -1, -1]
- ];
-
- /** @const */ var a_13 = [
- ["\u0B95\u0BB3\u0BCD", -1, 4],
- ["\u0BC1\u0B99\u0BCD\u0B95\u0BB3\u0BCD", 0, 1],
- ["\u0B9F\u0BCD\u0B95\u0BB3\u0BCD", 0, 3],
- ["\u0BB1\u0BCD\u0B95\u0BB3\u0BCD", 0, 2]
- ];
-
- /** @const */ var a_14 = [
- ["\u0BBE", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BCB", -1, -1]
- ];
-
- /** @const */ var a_15 = [
- ["\u0BAA\u0BBF", -1, -1],
- ["\u0BB5\u0BBF", -1, -1]
- ];
-
- /** @const */ var a_16 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_17 = [
- ["\u0BAA\u0B9F\u0BCD\u0B9F", -1, 3],
- ["\u0BAA\u0B9F\u0BCD\u0B9F\u0BA3", -1, 3],
- ["\u0BA4\u0BBE\u0BA9", -1, 3],
- ["\u0BAA\u0B9F\u0BBF\u0BA4\u0BBE\u0BA9", 2, 3],
- ["\u0BC6\u0BA9", -1, 1],
- ["\u0BBE\u0B95\u0BBF\u0BAF", -1, 1],
- ["\u0B95\u0BC1\u0BB0\u0BBF\u0BAF", -1, 3],
- ["\u0BC1\u0B9F\u0BC8\u0BAF", -1, 1],
- ["\u0BB2\u0BCD\u0BB2", -1, 2],
- ["\u0BC1\u0BB3\u0BCD\u0BB3", -1, 1],
- ["\u0BBE\u0B95\u0BBF", -1, 1],
- ["\u0BAA\u0B9F\u0BBF", -1, 3],
- ["\u0BBF\u0BA9\u0BCD\u0BB1\u0BBF", -1, 1],
- ["\u0BAA\u0BB1\u0BCD\u0BB1\u0BBF", -1, 3],
- ["\u0BAA\u0B9F\u0BC1", -1, 3],
- ["\u0BB5\u0BBF\u0B9F\u0BC1", -1, 3],
- ["\u0BAA\u0B9F\u0BCD\u0B9F\u0BC1", -1, 3],
- ["\u0BB5\u0BBF\u0B9F\u0BCD\u0B9F\u0BC1", -1, 3],
- ["\u0BAA\u0B9F\u0BCD\u0B9F\u0BA4\u0BC1", -1, 3],
- ["\u0BC6\u0BA9\u0BCD\u0BB1\u0BC1", -1, 1],
- ["\u0BC1\u0B9F\u0BC8", -1, 1],
- ["\u0BBF\u0BB2\u0BCD\u0BB2\u0BC8", -1, 1],
- ["\u0BC1\u0B9F\u0BA9\u0BCD", -1, 1],
- ["\u0BBF\u0B9F\u0BAE\u0BCD", -1, 1],
- ["\u0BC6\u0BB2\u0BCD\u0BB2\u0BBE\u0BAE\u0BCD", -1, 3],
- ["\u0BC6\u0BA9\u0BC1\u0BAE\u0BCD", -1, 1]
- ];
-
- /** @const */ var a_18 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_19 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_20 = [
- ["\u0BB5\u0BBF\u0B9F", -1, 2],
- ["\u0BC0", -1, 7],
- ["\u0BCA\u0B9F\u0BC1", -1, 2],
- ["\u0BCB\u0B9F\u0BC1", -1, 2],
- ["\u0BA4\u0BC1", -1, 6],
- ["\u0BBF\u0BB0\u0BC1\u0BA8\u0BCD\u0BA4\u0BC1", 4, 2],
- ["\u0BBF\u0BA9\u0BCD\u0BB1\u0BC1", -1, 2],
- ["\u0BC1\u0B9F\u0BC8", -1, 2],
- ["\u0BA9\u0BC8", -1, 1],
- ["\u0B95\u0BA3\u0BCD", -1, 1],
- ["\u0BBF\u0BA9\u0BCD", -1, 3],
- ["\u0BAE\u0BC1\u0BA9\u0BCD", -1, 1],
- ["\u0BBF\u0B9F\u0BAE\u0BCD", -1, 4],
- ["\u0BBF\u0BB1\u0BCD", -1, 2],
- ["\u0BAE\u0BC7\u0BB1\u0BCD", -1, 1],
- ["\u0BB2\u0BCD", -1, 5],
- ["\u0BBE\u0BAE\u0BB2\u0BCD", 15, 2],
- ["\u0BBE\u0BB2\u0BCD", 15, 2],
- ["\u0BBF\u0BB2\u0BCD", 15, 2],
- ["\u0BAE\u0BC7\u0BB2\u0BCD", 15, 1],
- ["\u0BC1\u0BB3\u0BCD", -1, 2],
- ["\u0B95\u0BC0\u0BB4\u0BCD", -1, 1]
- ];
-
- /** @const */ var a_21 = [
- ["\u0B95", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9F", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BB1", -1, -1]
- ];
-
- /** @const */ var a_22 = [
- ["\u0B95", -1, -1],
- ["\u0B9A", -1, -1],
- ["\u0B9F", -1, -1],
- ["\u0BA4", -1, -1],
- ["\u0BAA", -1, -1],
- ["\u0BB1", -1, -1]
- ];
-
- /** @const */ var a_23 = [
- ["\u0B85", -1, -1],
- ["\u0B86", -1, -1],
- ["\u0B87", -1, -1],
- ["\u0B88", -1, -1],
- ["\u0B89", -1, -1],
- ["\u0B8A", -1, -1],
- ["\u0B8E", -1, -1],
- ["\u0B8F", -1, -1],
- ["\u0B90", -1, -1],
- ["\u0B92", -1, -1],
- ["\u0B93", -1, -1],
- ["\u0B94", -1, -1]
- ];
-
- /** @const */ var a_24 = [
- ["\u0BBE", -1, -1],
- ["\u0BBF", -1, -1],
- ["\u0BC0", -1, -1],
- ["\u0BC1", -1, -1],
- ["\u0BC2", -1, -1],
- ["\u0BC6", -1, -1],
- ["\u0BC7", -1, -1],
- ["\u0BC8", -1, -1]
- ];
-
- /** @const */ var a_25 = [
- ["\u0B95", -1, 1],
- ["\u0BA4", -1, 1],
- ["\u0BA9", -1, 1],
- ["\u0BAA", -1, 1],
- ["\u0BAF", -1, 1],
- ["\u0BBE", -1, 5],
- ["\u0B95\u0BC1", -1, 6],
- ["\u0BAA\u0B9F\u0BC1", -1, 1],
- ["\u0BA4\u0BC1", -1, 3],
- ["\u0BBF\u0BB1\u0BCD\u0BB1\u0BC1", -1, 1],
- ["\u0BA9\u0BC8", -1, 1],
- ["\u0BB5\u0BC8", -1, 1],
- ["\u0BA9\u0BA9\u0BCD", -1, 1],
- ["\u0BAA\u0BA9\u0BCD", -1, 1],
- ["\u0BB5\u0BA9\u0BCD", -1, 2],
- ["\u0BBE\u0BA9\u0BCD", -1, 4],
- ["\u0BA9\u0BBE\u0BA9\u0BCD", 15, 1],
- ["\u0BAE\u0BBF\u0BA9\u0BCD", -1, 1],
- ["\u0BA9\u0BC6\u0BA9\u0BCD", -1, 1],
- ["\u0BC7\u0BA9\u0BCD", -1, 5],
- ["\u0BA9\u0BAE\u0BCD", -1, 1],
- ["\u0BAA\u0BAE\u0BCD", -1, 1],
- ["\u0BBE\u0BAE\u0BCD", -1, 5],
- ["\u0B95\u0BC1\u0BAE\u0BCD", -1, 1],
- ["\u0B9F\u0BC1\u0BAE\u0BCD", -1, 5],
- ["\u0BA4\u0BC1\u0BAE\u0BCD", -1, 1],
- ["\u0BB1\u0BC1\u0BAE\u0BCD", -1, 1],
- ["\u0BC6\u0BAE\u0BCD", -1, 5],
- ["\u0BC7\u0BAE\u0BCD", -1, 5],
- ["\u0BCB\u0BAE\u0BCD", -1, 5],
- ["\u0BBE\u0BAF\u0BCD", -1, 5],
- ["\u0BA9\u0BB0\u0BCD", -1, 1],
- ["\u0BAA\u0BB0\u0BCD", -1, 1],
- ["\u0BC0\u0BAF\u0BB0\u0BCD", -1, 5],
- ["\u0BB5\u0BB0\u0BCD", -1, 1],
- ["\u0BBE\u0BB0\u0BCD", -1, 5],
- ["\u0BA9\u0BBE\u0BB0\u0BCD", 35, 1],
- ["\u0BAE\u0BBE\u0BB0\u0BCD", 35, 1],
- ["\u0B95\u0BCA\u0BA3\u0BCD\u0B9F\u0BBF\u0BB0\u0BCD", -1, 1],
- ["\u0BA9\u0BBF\u0BB0\u0BCD", -1, 5],
- ["\u0BC0\u0BB0\u0BCD", -1, 5],
- ["\u0BA9\u0BB3\u0BCD", -1, 1],
- ["\u0BAA\u0BB3\u0BCD", -1, 1],
- ["\u0BB5\u0BB3\u0BCD", -1, 1],
- ["\u0BBE\u0BB3\u0BCD", -1, 5],
- ["\u0BA9\u0BBE\u0BB3\u0BCD", 44, 1]
- ];
-
- /** @const */ var a_26 = [
- ["\u0B95\u0BBF\u0BB1", -1, -1],
- ["\u0B95\u0BBF\u0BA9\u0BCD\u0BB1", -1, -1],
- ["\u0BBE\u0BA8\u0BBF\u0BA9\u0BCD\u0BB1", -1, -1],
- ["\u0B95\u0BBF\u0BB1\u0BCD", -1, -1],
- ["\u0B95\u0BBF\u0BA9\u0BCD\u0BB1\u0BCD", -1, -1],
- ["\u0BBE\u0BA8\u0BBF\u0BA9\u0BCD\u0BB1\u0BCD", -1, -1]
- ];
-
- var /** boolean */ B_found_vetrumai_urupu = false;
- var /** boolean */ B_found_a_match = false;
-
-
- /** @return {boolean} */
- function r_has_min_length() {
- return base.current.length > 4;
- };
-
- /** @return {boolean} */
- function r_fix_va_start() {
- var /** number */ among_var;
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- return false;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u0B93"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u0B92"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u0B89"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u0B8A"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_fix_endings() {
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- if (!r_fix_ending())
- {
- break lab1;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_question_prefixes() {
- base.bra = base.cursor;
- if (!(base.eq_s("\u0B8E")))
- {
- return false;
- }
- if (base.find_among(a_1) == 0)
- {
- return false;
- }
- if (!(base.eq_s("\u0BCD")))
- {
- return false;
- }
- base.ket = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.cursor;
- r_fix_va_start();
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_fix_ending() {
- var /** number */ among_var;
- if (base.current.length <= 3)
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- break lab1;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- var /** number */ v_2 = base.limit - base.cursor;
- if (base.find_among_b(a_2) == 0)
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u0BB3\u0BCD"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u0BB2\u0BCD"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u0B9F\u0BC1"))
- {
- return false;
- }
- break;
- case 6:
- if (!B_found_vetrumai_urupu)
- {
- break lab1;
- }
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("\u0BC8")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_3;
- }
- if (!base.slice_from("\u0BAE\u0BCD"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 8:
- {
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- if (base.find_among_b(a_3) == 0)
- {
- break lab3;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 9:
- among_var = base.find_among_b(a_4);
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u0BAE\u0BCD"))
- {
- return false;
- }
- break;
- }
- break;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u0BCD")))
- {
- return false;
- }
- lab4: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab5: {
- if (base.find_among_b(a_6) == 0)
- {
- break lab5;
- }
- var /** number */ v_6 = base.limit - base.cursor;
- lab6: {
- if (!(base.eq_s_b("\u0BCD")))
- {
- base.cursor = base.limit - v_6;
- break lab6;
- }
- if (base.find_among_b(a_7) == 0)
- {
- base.cursor = base.limit - v_6;
- break lab6;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_5;
- lab7: {
- if (base.find_among_b(a_8) == 0)
- {
- break lab7;
- }
- base.bra = base.cursor;
- if (!(base.eq_s_b("\u0BCD")))
- {
- break lab7;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_5;
- var /** number */ v_7 = base.limit - base.cursor;
- if (base.find_among_b(a_9) == 0)
- {
- return false;
- }
- base.cursor = base.limit - v_7;
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- }
- }
- base.cursor = base.limit_backward;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_pronoun_prefixes() {
- base.bra = base.cursor;
- if (base.find_among(a_10) == 0)
- {
- return false;
- }
- if (base.find_among(a_11) == 0)
- {
- return false;
- }
- if (!(base.eq_s("\u0BCD")))
- {
- return false;
- }
- base.ket = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_1 = base.cursor;
- r_fix_va_start();
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_plural_suffix() {
- var /** number */ among_var;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_13);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (base.find_among_b(a_12) == 0)
- {
- break lab1;
- }
- if (!base.slice_from("\u0BC1\u0B99\u0BCD"))
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- }
- break;
- case 2:
- if (!base.slice_from("\u0BB2\u0BCD"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u0BB3\u0BCD"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- base.cursor = base.limit_backward;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_question_suffixes() {
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- if (base.find_among_b(a_14) == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_1;
- base.cursor = base.limit_backward;
- r_fix_endings();
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_command_suffixes() {
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- if (base.find_among_b(a_15) == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.cursor = base.limit_backward;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_um() {
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u0BC1\u0BAE\u0BCD")))
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- base.cursor = base.limit_backward;
- var /** number */ v_1 = base.cursor;
- r_fix_ending();
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_common_word_endings() {
- var /** number */ among_var;
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_17);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 2:
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (base.find_among_b(a_16) == 0)
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_1;
- }
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- base.cursor = base.limit_backward;
- r_fix_endings();
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_vetrumai_urupukal() {
- var /** number */ among_var;
- B_found_vetrumai_urupu = false;
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_20);
- if (among_var == 0)
- {
- break lab1;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 3:
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- if (!(base.eq_s_b("\u0BAE")))
- {
- break lab2;
- }
- break lab1;
- }
- base.cursor = base.limit - v_3;
- }
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 4:
- if (base.current.length < 7)
- {
- break lab1;
- }
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 5:
- {
- var /** number */ v_4 = base.limit - base.cursor;
- lab3: {
- if (base.find_among_b(a_18) == 0)
- {
- break lab3;
- }
- break lab1;
- }
- base.cursor = base.limit - v_4;
- }
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 6:
- {
- var /** number */ v_5 = base.limit - base.cursor;
- lab4: {
- if (base.find_among_b(a_19) == 0)
- {
- break lab4;
- }
- break lab1;
- }
- base.cursor = base.limit - v_5;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u0BBF"))
- {
- return false;
- }
- break;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_6 = base.limit - base.cursor;
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u0BC8")))
- {
- return false;
- }
- lab5: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab6: {
- {
- var /** number */ v_8 = base.limit - base.cursor;
- lab7: {
- if (base.find_among_b(a_21) == 0)
- {
- break lab7;
- }
- break lab6;
- }
- base.cursor = base.limit - v_8;
- }
- break lab5;
- }
- base.cursor = base.limit - v_7;
- var /** number */ v_9 = base.limit - base.cursor;
- if (base.find_among_b(a_22) == 0)
- {
- return false;
- }
- if (!(base.eq_s_b("\u0BCD")))
- {
- return false;
- }
- base.cursor = base.limit - v_9;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- base.cursor = base.limit - v_6;
- }
- B_found_vetrumai_urupu = true;
- var /** number */ v_10 = base.limit - base.cursor;
- lab8: {
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u0BBF\u0BA9\u0BCD")))
- {
- break lab8;
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- }
- base.cursor = base.limit - v_10;
- base.cursor = base.limit_backward;
- r_fix_endings();
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_tense_suffixes() {
- B_found_a_match = true;
- while(true)
- {
- var /** number */ v_1 = base.cursor;
- lab0: {
- if (!B_found_a_match)
- {
- break lab0;
- }
- var /** number */ v_2 = base.cursor;
- r_remove_tense_suffix();
- base.cursor = v_2;
- continue;
- }
- base.cursor = v_1;
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_remove_tense_suffix() {
- var /** number */ among_var;
- B_found_a_match = false;
- if (!r_has_min_length())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- var /** number */ v_2 = base.limit - base.cursor;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_25);
- if (among_var == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab1: {
- if (base.find_among_b(a_23) == 0)
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_3;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 3:
- {
- var /** number */ v_4 = base.limit - base.cursor;
- lab2: {
- if (base.find_among_b(a_24) == 0)
- {
- break lab2;
- }
- break lab0;
- }
- base.cursor = base.limit - v_4;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 4:
- {
- var /** number */ v_5 = base.limit - base.cursor;
- lab3: {
- if (!(base.eq_s_b("\u0B9A")))
- {
- break lab3;
- }
- break lab0;
- }
- base.cursor = base.limit - v_5;
- }
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u0BCD"))
- {
- return false;
- }
- break;
- case 6:
- var /** number */ v_6 = base.limit - base.cursor;
- if (!(base.eq_s_b("\u0BCD")))
- {
- break lab0;
- }
- base.cursor = base.limit - v_6;
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- B_found_a_match = true;
- base.cursor = base.limit - v_2;
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_7 = base.limit - base.cursor;
- lab4: {
- base.ket = base.cursor;
- if (base.find_among_b(a_26) == 0)
- {
- break lab4;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- B_found_a_match = true;
- }
- base.cursor = base.limit - v_7;
- base.cursor = base.limit_backward;
- r_fix_endings();
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- B_found_vetrumai_urupu = false;
- var /** number */ v_1 = base.cursor;
- r_fix_ending();
- base.cursor = v_1;
- if (!r_has_min_length())
- {
- return false;
- }
- var /** number */ v_2 = base.cursor;
- r_remove_question_prefixes();
- base.cursor = v_2;
- var /** number */ v_3 = base.cursor;
- r_remove_pronoun_prefixes();
- base.cursor = v_3;
- var /** number */ v_4 = base.cursor;
- r_remove_question_suffixes();
- base.cursor = v_4;
- var /** number */ v_5 = base.cursor;
- r_remove_um();
- base.cursor = v_5;
- var /** number */ v_6 = base.cursor;
- r_remove_common_word_endings();
- base.cursor = v_6;
- var /** number */ v_7 = base.cursor;
- r_remove_vetrumai_urupukal();
- base.cursor = v_7;
- var /** number */ v_8 = base.cursor;
- r_remove_plural_suffix();
- base.cursor = v_8;
- var /** number */ v_9 = base.cursor;
- r_remove_command_suffixes();
- base.cursor = v_9;
- var /** number */ v_10 = base.cursor;
- r_remove_tense_suffixes();
- base.cursor = v_10;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['TamilStemmer'] = TamilStemmer;
diff --git a/js/turkish-stemmer.js b/js/turkish-stemmer.js
deleted file mode 100644
index 84f6255..0000000
--- a/js/turkish-stemmer.js
+++ /dev/null
@@ -1,2361 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var TurkishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["m", -1, -1],
- ["n", -1, -1],
- ["miz", -1, -1],
- ["niz", -1, -1],
- ["muz", -1, -1],
- ["nuz", -1, -1],
- ["m\u00FCz", -1, -1],
- ["n\u00FCz", -1, -1],
- ["m\u0131z", -1, -1],
- ["n\u0131z", -1, -1]
- ];
-
- /** @const */ var a_1 = [
- ["leri", -1, -1],
- ["lar\u0131", -1, -1]
- ];
-
- /** @const */ var a_2 = [
- ["ni", -1, -1],
- ["nu", -1, -1],
- ["n\u00FC", -1, -1],
- ["n\u0131", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["in", -1, -1],
- ["un", -1, -1],
- ["\u00FCn", -1, -1],
- ["\u0131n", -1, -1]
- ];
-
- /** @const */ var a_4 = [
- ["a", -1, -1],
- ["e", -1, -1]
- ];
-
- /** @const */ var a_5 = [
- ["na", -1, -1],
- ["ne", -1, -1]
- ];
-
- /** @const */ var a_6 = [
- ["da", -1, -1],
- ["ta", -1, -1],
- ["de", -1, -1],
- ["te", -1, -1]
- ];
-
- /** @const */ var a_7 = [
- ["nda", -1, -1],
- ["nde", -1, -1]
- ];
-
- /** @const */ var a_8 = [
- ["dan", -1, -1],
- ["tan", -1, -1],
- ["den", -1, -1],
- ["ten", -1, -1]
- ];
-
- /** @const */ var a_9 = [
- ["ndan", -1, -1],
- ["nden", -1, -1]
- ];
-
- /** @const */ var a_10 = [
- ["la", -1, -1],
- ["le", -1, -1]
- ];
-
- /** @const */ var a_11 = [
- ["ca", -1, -1],
- ["ce", -1, -1]
- ];
-
- /** @const */ var a_12 = [
- ["im", -1, -1],
- ["um", -1, -1],
- ["\u00FCm", -1, -1],
- ["\u0131m", -1, -1]
- ];
-
- /** @const */ var a_13 = [
- ["sin", -1, -1],
- ["sun", -1, -1],
- ["s\u00FCn", -1, -1],
- ["s\u0131n", -1, -1]
- ];
-
- /** @const */ var a_14 = [
- ["iz", -1, -1],
- ["uz", -1, -1],
- ["\u00FCz", -1, -1],
- ["\u0131z", -1, -1]
- ];
-
- /** @const */ var a_15 = [
- ["siniz", -1, -1],
- ["sunuz", -1, -1],
- ["s\u00FCn\u00FCz", -1, -1],
- ["s\u0131n\u0131z", -1, -1]
- ];
-
- /** @const */ var a_16 = [
- ["lar", -1, -1],
- ["ler", -1, -1]
- ];
-
- /** @const */ var a_17 = [
- ["niz", -1, -1],
- ["nuz", -1, -1],
- ["n\u00FCz", -1, -1],
- ["n\u0131z", -1, -1]
- ];
-
- /** @const */ var a_18 = [
- ["dir", -1, -1],
- ["tir", -1, -1],
- ["dur", -1, -1],
- ["tur", -1, -1],
- ["d\u00FCr", -1, -1],
- ["t\u00FCr", -1, -1],
- ["d\u0131r", -1, -1],
- ["t\u0131r", -1, -1]
- ];
-
- /** @const */ var a_19 = [
- ["cas\u0131na", -1, -1],
- ["cesine", -1, -1]
- ];
-
- /** @const */ var a_20 = [
- ["di", -1, -1],
- ["ti", -1, -1],
- ["dik", -1, -1],
- ["tik", -1, -1],
- ["duk", -1, -1],
- ["tuk", -1, -1],
- ["d\u00FCk", -1, -1],
- ["t\u00FCk", -1, -1],
- ["d\u0131k", -1, -1],
- ["t\u0131k", -1, -1],
- ["dim", -1, -1],
- ["tim", -1, -1],
- ["dum", -1, -1],
- ["tum", -1, -1],
- ["d\u00FCm", -1, -1],
- ["t\u00FCm", -1, -1],
- ["d\u0131m", -1, -1],
- ["t\u0131m", -1, -1],
- ["din", -1, -1],
- ["tin", -1, -1],
- ["dun", -1, -1],
- ["tun", -1, -1],
- ["d\u00FCn", -1, -1],
- ["t\u00FCn", -1, -1],
- ["d\u0131n", -1, -1],
- ["t\u0131n", -1, -1],
- ["du", -1, -1],
- ["tu", -1, -1],
- ["d\u00FC", -1, -1],
- ["t\u00FC", -1, -1],
- ["d\u0131", -1, -1],
- ["t\u0131", -1, -1]
- ];
-
- /** @const */ var a_21 = [
- ["sa", -1, -1],
- ["se", -1, -1],
- ["sak", -1, -1],
- ["sek", -1, -1],
- ["sam", -1, -1],
- ["sem", -1, -1],
- ["san", -1, -1],
- ["sen", -1, -1]
- ];
-
- /** @const */ var a_22 = [
- ["mi\u015F", -1, -1],
- ["mu\u015F", -1, -1],
- ["m\u00FC\u015F", -1, -1],
- ["m\u0131\u015F", -1, -1]
- ];
-
- /** @const */ var a_23 = [
- ["b", -1, 1],
- ["c", -1, 2],
- ["d", -1, 3],
- ["\u011F", -1, 4]
- ];
-
- /** @const */ var /** Array */ g_vowel = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1];
-
- /** @const */ var /** Array */ g_U = [1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1];
-
- /** @const */ var /** Array */ g_vowel1 = [1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
-
- /** @const */ var /** Array */ g_vowel2 = [17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130];
-
- /** @const */ var /** Array */ g_vowel3 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1];
-
- /** @const */ var /** Array */ g_vowel4 = [17];
-
- /** @const */ var /** Array */ g_vowel5 = [65];
-
- /** @const */ var /** Array */ g_vowel6 = [65];
-
- var /** boolean */ B_continue_stemming_noun_suffixes = false;
-
-
- /** @return {boolean} */
- function r_check_vowel_harmony() {
- var /** number */ v_1 = base.limit - base.cursor;
- golab0: while(true)
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab1: {
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break golab0;
- }
- base.cursor = base.limit - v_2;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- lab2: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab3: {
- if (!(base.eq_s_b("a")))
- {
- break lab3;
- }
- golab4: while(true)
- {
- var /** number */ v_4 = base.limit - base.cursor;
- lab5: {
- if (!(base.in_grouping_b(g_vowel1, 97, 305)))
- {
- break lab5;
- }
- base.cursor = base.limit - v_4;
- break golab4;
- }
- base.cursor = base.limit - v_4;
- if (base.cursor <= base.limit_backward)
- {
- break lab3;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab6: {
- if (!(base.eq_s_b("e")))
- {
- break lab6;
- }
- golab7: while(true)
- {
- var /** number */ v_5 = base.limit - base.cursor;
- lab8: {
- if (!(base.in_grouping_b(g_vowel2, 101, 252)))
- {
- break lab8;
- }
- base.cursor = base.limit - v_5;
- break golab7;
- }
- base.cursor = base.limit - v_5;
- if (base.cursor <= base.limit_backward)
- {
- break lab6;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab9: {
- if (!(base.eq_s_b("\u0131")))
- {
- break lab9;
- }
- golab10: while(true)
- {
- var /** number */ v_6 = base.limit - base.cursor;
- lab11: {
- if (!(base.in_grouping_b(g_vowel3, 97, 305)))
- {
- break lab11;
- }
- base.cursor = base.limit - v_6;
- break golab10;
- }
- base.cursor = base.limit - v_6;
- if (base.cursor <= base.limit_backward)
- {
- break lab9;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab12: {
- if (!(base.eq_s_b("i")))
- {
- break lab12;
- }
- golab13: while(true)
- {
- var /** number */ v_7 = base.limit - base.cursor;
- lab14: {
- if (!(base.in_grouping_b(g_vowel4, 101, 105)))
- {
- break lab14;
- }
- base.cursor = base.limit - v_7;
- break golab13;
- }
- base.cursor = base.limit - v_7;
- if (base.cursor <= base.limit_backward)
- {
- break lab12;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab15: {
- if (!(base.eq_s_b("o")))
- {
- break lab15;
- }
- golab16: while(true)
- {
- var /** number */ v_8 = base.limit - base.cursor;
- lab17: {
- if (!(base.in_grouping_b(g_vowel5, 111, 117)))
- {
- break lab17;
- }
- base.cursor = base.limit - v_8;
- break golab16;
- }
- base.cursor = base.limit - v_8;
- if (base.cursor <= base.limit_backward)
- {
- break lab15;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab18: {
- if (!(base.eq_s_b("\u00F6")))
- {
- break lab18;
- }
- golab19: while(true)
- {
- var /** number */ v_9 = base.limit - base.cursor;
- lab20: {
- if (!(base.in_grouping_b(g_vowel6, 246, 252)))
- {
- break lab20;
- }
- base.cursor = base.limit - v_9;
- break golab19;
- }
- base.cursor = base.limit - v_9;
- if (base.cursor <= base.limit_backward)
- {
- break lab18;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- lab21: {
- if (!(base.eq_s_b("u")))
- {
- break lab21;
- }
- golab22: while(true)
- {
- var /** number */ v_10 = base.limit - base.cursor;
- lab23: {
- if (!(base.in_grouping_b(g_vowel5, 111, 117)))
- {
- break lab23;
- }
- base.cursor = base.limit - v_10;
- break golab22;
- }
- base.cursor = base.limit - v_10;
- if (base.cursor <= base.limit_backward)
- {
- break lab21;
- }
- base.cursor--;
- }
- break lab2;
- }
- base.cursor = base.limit - v_3;
- if (!(base.eq_s_b("\u00FC")))
- {
- return false;
- }
- golab24: while(true)
- {
- var /** number */ v_11 = base.limit - base.cursor;
- lab25: {
- if (!(base.in_grouping_b(g_vowel6, 246, 252)))
- {
- break lab25;
- }
- base.cursor = base.limit - v_11;
- break golab24;
- }
- base.cursor = base.limit - v_11;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- }
- base.cursor = base.limit - v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_suffix_with_optional_n_consonant() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("n")))
- {
- break lab1;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.eq_s_b("n")))
- {
- break lab2;
- }
- base.cursor = base.limit - v_4;
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- return false;
- }
- base.cursor = base.limit - v_5;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_suffix_with_optional_s_consonant() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("s")))
- {
- break lab1;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.eq_s_b("s")))
- {
- break lab2;
- }
- base.cursor = base.limit - v_4;
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- return false;
- }
- base.cursor = base.limit - v_5;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_suffix_with_optional_y_consonant() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("y")))
- {
- break lab1;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.eq_s_b("y")))
- {
- break lab2;
- }
- base.cursor = base.limit - v_4;
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- return false;
- }
- base.cursor = base.limit - v_5;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_suffix_with_optional_U_vowel() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.in_grouping_b(g_U, 105, 305)))
- {
- break lab1;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- if (!(base.out_grouping_b(g_vowel, 97, 305)))
- {
- break lab1;
- }
- base.cursor = base.limit - v_2;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- {
- var /** number */ v_3 = base.limit - base.cursor;
- lab2: {
- var /** number */ v_4 = base.limit - base.cursor;
- if (!(base.in_grouping_b(g_U, 105, 305)))
- {
- break lab2;
- }
- base.cursor = base.limit - v_4;
- return false;
- }
- base.cursor = base.limit - v_3;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- if (!(base.out_grouping_b(g_vowel, 97, 305)))
- {
- return false;
- }
- base.cursor = base.limit - v_5;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_possessives() {
- if (base.find_among_b(a_0) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_U_vowel())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_sU() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (!(base.in_grouping_b(g_U, 105, 305)))
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_s_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_lArI() {
- if (base.find_among_b(a_1) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yU() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (!(base.in_grouping_b(g_U, 105, 305)))
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_nU() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_2) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_nUn() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_3) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_n_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_4) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_nA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_5) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_DA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_6) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ndA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_7) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_DAn() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_8) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ndAn() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_9) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ylA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_10) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ki() {
- if (!(base.eq_s_b("ki")))
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ncA() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_11) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_n_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yUm() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_12) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_sUn() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_13) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yUz() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_14) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_sUnUz() {
- if (base.find_among_b(a_15) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_lAr() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_16) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_nUz() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_17) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_DUr() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_18) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_cAsInA() {
- if (base.find_among_b(a_19) == 0)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yDU() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_20) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ysA() {
- if (base.find_among_b(a_21) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_ymUs_() {
- if (!r_check_vowel_harmony())
- {
- return false;
- }
- if (base.find_among_b(a_22) == 0)
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_yken() {
- if (!(base.eq_s_b("ken")))
- {
- return false;
- }
- if (!r_mark_suffix_with_optional_y_consonant())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_stem_nominal_verb_suffixes() {
- base.ket = base.cursor;
- B_continue_stemming_noun_suffixes = true;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- lab2: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab3: {
- if (!r_mark_ymUs_())
- {
- break lab3;
- }
- break lab2;
- }
- base.cursor = base.limit - v_2;
- lab4: {
- if (!r_mark_yDU())
- {
- break lab4;
- }
- break lab2;
- }
- base.cursor = base.limit - v_2;
- lab5: {
- if (!r_mark_ysA())
- {
- break lab5;
- }
- break lab2;
- }
- base.cursor = base.limit - v_2;
- if (!r_mark_yken())
- {
- break lab1;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab6: {
- if (!r_mark_cAsInA())
- {
- break lab6;
- }
- lab7: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab8: {
- if (!r_mark_sUnUz())
- {
- break lab8;
- }
- break lab7;
- }
- base.cursor = base.limit - v_3;
- lab9: {
- if (!r_mark_lAr())
- {
- break lab9;
- }
- break lab7;
- }
- base.cursor = base.limit - v_3;
- lab10: {
- if (!r_mark_yUm())
- {
- break lab10;
- }
- break lab7;
- }
- base.cursor = base.limit - v_3;
- lab11: {
- if (!r_mark_sUn())
- {
- break lab11;
- }
- break lab7;
- }
- base.cursor = base.limit - v_3;
- lab12: {
- if (!r_mark_yUz())
- {
- break lab12;
- }
- break lab7;
- }
- base.cursor = base.limit - v_3;
- }
- if (!r_mark_ymUs_())
- {
- break lab6;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab13: {
- if (!r_mark_lAr())
- {
- break lab13;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- lab14: {
- base.ket = base.cursor;
- lab15: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab16: {
- if (!r_mark_DUr())
- {
- break lab16;
- }
- break lab15;
- }
- base.cursor = base.limit - v_5;
- lab17: {
- if (!r_mark_yDU())
- {
- break lab17;
- }
- break lab15;
- }
- base.cursor = base.limit - v_5;
- lab18: {
- if (!r_mark_ysA())
- {
- break lab18;
- }
- break lab15;
- }
- base.cursor = base.limit - v_5;
- if (!r_mark_ymUs_())
- {
- base.cursor = base.limit - v_4;
- break lab14;
- }
- }
- }
- B_continue_stemming_noun_suffixes = false;
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab19: {
- if (!r_mark_nUz())
- {
- break lab19;
- }
- lab20: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab21: {
- if (!r_mark_yDU())
- {
- break lab21;
- }
- break lab20;
- }
- base.cursor = base.limit - v_6;
- if (!r_mark_ysA())
- {
- break lab19;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab22: {
- lab23: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab24: {
- if (!r_mark_sUnUz())
- {
- break lab24;
- }
- break lab23;
- }
- base.cursor = base.limit - v_7;
- lab25: {
- if (!r_mark_yUz())
- {
- break lab25;
- }
- break lab23;
- }
- base.cursor = base.limit - v_7;
- lab26: {
- if (!r_mark_sUn())
- {
- break lab26;
- }
- break lab23;
- }
- base.cursor = base.limit - v_7;
- if (!r_mark_yUm())
- {
- break lab22;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_8 = base.limit - base.cursor;
- lab27: {
- base.ket = base.cursor;
- if (!r_mark_ymUs_())
- {
- base.cursor = base.limit - v_8;
- break lab27;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!r_mark_DUr())
- {
- return false;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_9 = base.limit - base.cursor;
- lab28: {
- base.ket = base.cursor;
- lab29: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab30: {
- if (!r_mark_sUnUz())
- {
- break lab30;
- }
- break lab29;
- }
- base.cursor = base.limit - v_10;
- lab31: {
- if (!r_mark_lAr())
- {
- break lab31;
- }
- break lab29;
- }
- base.cursor = base.limit - v_10;
- lab32: {
- if (!r_mark_yUm())
- {
- break lab32;
- }
- break lab29;
- }
- base.cursor = base.limit - v_10;
- lab33: {
- if (!r_mark_sUn())
- {
- break lab33;
- }
- break lab29;
- }
- base.cursor = base.limit - v_10;
- lab34: {
- if (!r_mark_yUz())
- {
- break lab34;
- }
- break lab29;
- }
- base.cursor = base.limit - v_10;
- }
- if (!r_mark_ymUs_())
- {
- base.cursor = base.limit - v_9;
- break lab28;
- }
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_stem_suffix_chain_before_ki() {
- base.ket = base.cursor;
- if (!r_mark_ki())
- {
- return false;
- }
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!r_mark_DA())
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- base.ket = base.cursor;
- lab3: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab4: {
- if (!r_mark_lAr())
- {
- break lab4;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_4 = base.limit - base.cursor;
- lab5: {
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_4;
- break lab5;
- }
- }
- break lab3;
- }
- base.cursor = base.limit - v_3;
- if (!r_mark_possessives())
- {
- base.cursor = base.limit - v_2;
- break lab2;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_5 = base.limit - base.cursor;
- lab6: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_5;
- break lab6;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_5;
- break lab6;
- }
- }
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab7: {
- if (!r_mark_nUn())
- {
- break lab7;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_6 = base.limit - base.cursor;
- lab8: {
- base.ket = base.cursor;
- lab9: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab10: {
- if (!r_mark_lArI())
- {
- break lab10;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab9;
- }
- base.cursor = base.limit - v_7;
- lab11: {
- base.ket = base.cursor;
- lab12: {
- var /** number */ v_8 = base.limit - base.cursor;
- lab13: {
- if (!r_mark_possessives())
- {
- break lab13;
- }
- break lab12;
- }
- base.cursor = base.limit - v_8;
- if (!r_mark_sU())
- {
- break lab11;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_9 = base.limit - base.cursor;
- lab14: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_9;
- break lab14;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_9;
- break lab14;
- }
- }
- break lab9;
- }
- base.cursor = base.limit - v_7;
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_6;
- break lab8;
- }
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!r_mark_ndA())
- {
- return false;
- }
- lab15: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab16: {
- if (!r_mark_lArI())
- {
- break lab16;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab15;
- }
- base.cursor = base.limit - v_10;
- lab17: {
- if (!r_mark_sU())
- {
- break lab17;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_11 = base.limit - base.cursor;
- lab18: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_11;
- break lab18;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_11;
- break lab18;
- }
- }
- break lab15;
- }
- base.cursor = base.limit - v_10;
- if (!r_stem_suffix_chain_before_ki())
- {
- return false;
- }
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_stem_noun_suffixes() {
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- break lab1;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_2;
- break lab2;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab3: {
- base.ket = base.cursor;
- if (!r_mark_ncA())
- {
- break lab3;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab4: {
- lab5: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab6: {
- base.ket = base.cursor;
- if (!r_mark_lArI())
- {
- break lab6;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab5;
- }
- base.cursor = base.limit - v_4;
- lab7: {
- base.ket = base.cursor;
- lab8: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab9: {
- if (!r_mark_possessives())
- {
- break lab9;
- }
- break lab8;
- }
- base.cursor = base.limit - v_5;
- if (!r_mark_sU())
- {
- break lab7;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_6 = base.limit - base.cursor;
- lab10: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_6;
- break lab10;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_6;
- break lab10;
- }
- }
- break lab5;
- }
- base.cursor = base.limit - v_4;
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_3;
- break lab4;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_3;
- break lab4;
- }
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab11: {
- base.ket = base.cursor;
- lab12: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab13: {
- if (!r_mark_ndA())
- {
- break lab13;
- }
- break lab12;
- }
- base.cursor = base.limit - v_7;
- if (!r_mark_nA())
- {
- break lab11;
- }
- }
- lab14: {
- var /** number */ v_8 = base.limit - base.cursor;
- lab15: {
- if (!r_mark_lArI())
- {
- break lab15;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab14;
- }
- base.cursor = base.limit - v_8;
- lab16: {
- if (!r_mark_sU())
- {
- break lab16;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_9 = base.limit - base.cursor;
- lab17: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_9;
- break lab17;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_9;
- break lab17;
- }
- }
- break lab14;
- }
- base.cursor = base.limit - v_8;
- if (!r_stem_suffix_chain_before_ki())
- {
- break lab11;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab18: {
- base.ket = base.cursor;
- lab19: {
- var /** number */ v_10 = base.limit - base.cursor;
- lab20: {
- if (!r_mark_ndAn())
- {
- break lab20;
- }
- break lab19;
- }
- base.cursor = base.limit - v_10;
- if (!r_mark_nU())
- {
- break lab18;
- }
- }
- lab21: {
- var /** number */ v_11 = base.limit - base.cursor;
- lab22: {
- if (!r_mark_sU())
- {
- break lab22;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_12 = base.limit - base.cursor;
- lab23: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_12;
- break lab23;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_12;
- break lab23;
- }
- }
- break lab21;
- }
- base.cursor = base.limit - v_11;
- if (!r_mark_lArI())
- {
- break lab18;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab24: {
- base.ket = base.cursor;
- if (!r_mark_DAn())
- {
- break lab24;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_13 = base.limit - base.cursor;
- lab25: {
- base.ket = base.cursor;
- lab26: {
- var /** number */ v_14 = base.limit - base.cursor;
- lab27: {
- if (!r_mark_possessives())
- {
- break lab27;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_15 = base.limit - base.cursor;
- lab28: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_15;
- break lab28;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_15;
- break lab28;
- }
- }
- break lab26;
- }
- base.cursor = base.limit - v_14;
- lab29: {
- if (!r_mark_lAr())
- {
- break lab29;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_16 = base.limit - base.cursor;
- lab30: {
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_16;
- break lab30;
- }
- }
- break lab26;
- }
- base.cursor = base.limit - v_14;
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_13;
- break lab25;
- }
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab31: {
- base.ket = base.cursor;
- lab32: {
- var /** number */ v_17 = base.limit - base.cursor;
- lab33: {
- if (!r_mark_nUn())
- {
- break lab33;
- }
- break lab32;
- }
- base.cursor = base.limit - v_17;
- if (!r_mark_ylA())
- {
- break lab31;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_18 = base.limit - base.cursor;
- lab34: {
- lab35: {
- var /** number */ v_19 = base.limit - base.cursor;
- lab36: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- break lab36;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- break lab36;
- }
- break lab35;
- }
- base.cursor = base.limit - v_19;
- lab37: {
- base.ket = base.cursor;
- lab38: {
- var /** number */ v_20 = base.limit - base.cursor;
- lab39: {
- if (!r_mark_possessives())
- {
- break lab39;
- }
- break lab38;
- }
- base.cursor = base.limit - v_20;
- if (!r_mark_sU())
- {
- break lab37;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_21 = base.limit - base.cursor;
- lab40: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_21;
- break lab40;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_21;
- break lab40;
- }
- }
- break lab35;
- }
- base.cursor = base.limit - v_19;
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_18;
- break lab34;
- }
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab41: {
- base.ket = base.cursor;
- if (!r_mark_lArI())
- {
- break lab41;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab42: {
- if (!r_stem_suffix_chain_before_ki())
- {
- break lab42;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- lab43: {
- base.ket = base.cursor;
- lab44: {
- var /** number */ v_22 = base.limit - base.cursor;
- lab45: {
- if (!r_mark_DA())
- {
- break lab45;
- }
- break lab44;
- }
- base.cursor = base.limit - v_22;
- lab46: {
- if (!r_mark_yU())
- {
- break lab46;
- }
- break lab44;
- }
- base.cursor = base.limit - v_22;
- if (!r_mark_yA())
- {
- break lab43;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_23 = base.limit - base.cursor;
- lab47: {
- base.ket = base.cursor;
- lab48: {
- var /** number */ v_24 = base.limit - base.cursor;
- lab49: {
- if (!r_mark_possessives())
- {
- break lab49;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_25 = base.limit - base.cursor;
- lab50: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_25;
- break lab50;
- }
- }
- break lab48;
- }
- base.cursor = base.limit - v_24;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_23;
- break lab47;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_23;
- break lab47;
- }
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- base.ket = base.cursor;
- lab51: {
- var /** number */ v_26 = base.limit - base.cursor;
- lab52: {
- if (!r_mark_possessives())
- {
- break lab52;
- }
- break lab51;
- }
- base.cursor = base.limit - v_26;
- if (!r_mark_sU())
- {
- return false;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- var /** number */ v_27 = base.limit - base.cursor;
- lab53: {
- base.ket = base.cursor;
- if (!r_mark_lAr())
- {
- base.cursor = base.limit - v_27;
- break lab53;
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- if (!r_stem_suffix_chain_before_ki())
- {
- base.cursor = base.limit - v_27;
- break lab53;
- }
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_post_process_last_consonants() {
- var /** number */ among_var;
- base.ket = base.cursor;
- among_var = base.find_among_b(a_23);
- if (among_var == 0)
- {
- return false;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("p"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u00E7"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("t"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("k"))
- {
- return false;
- }
- break;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_append_U_to_stems_ending_with_d_or_g() {
- base.ket = base.cursor;
- base.bra = base.cursor;
- lab0: {
- var /** number */ v_1 = base.limit - base.cursor;
- lab1: {
- if (!(base.eq_s_b("d")))
- {
- break lab1;
- }
- break lab0;
- }
- base.cursor = base.limit - v_1;
- if (!(base.eq_s_b("g")))
- {
- return false;
- }
- }
- golab2: while(true)
- {
- var /** number */ v_2 = base.limit - base.cursor;
- lab3: {
- if (!(base.in_grouping_b(g_vowel, 97, 305)))
- {
- break lab3;
- }
- base.cursor = base.limit - v_2;
- break golab2;
- }
- base.cursor = base.limit - v_2;
- if (base.cursor <= base.limit_backward)
- {
- return false;
- }
- base.cursor--;
- }
- lab4: {
- var /** number */ v_3 = base.limit - base.cursor;
- lab5: {
- lab6: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab7: {
- if (!(base.eq_s_b("a")))
- {
- break lab7;
- }
- break lab6;
- }
- base.cursor = base.limit - v_4;
- if (!(base.eq_s_b("\u0131")))
- {
- break lab5;
- }
- }
- if (!base.slice_from("\u0131"))
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_3;
- lab8: {
- lab9: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab10: {
- if (!(base.eq_s_b("e")))
- {
- break lab10;
- }
- break lab9;
- }
- base.cursor = base.limit - v_5;
- if (!(base.eq_s_b("i")))
- {
- break lab8;
- }
- }
- if (!base.slice_from("i"))
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_3;
- lab11: {
- lab12: {
- var /** number */ v_6 = base.limit - base.cursor;
- lab13: {
- if (!(base.eq_s_b("o")))
- {
- break lab13;
- }
- break lab12;
- }
- base.cursor = base.limit - v_6;
- if (!(base.eq_s_b("u")))
- {
- break lab11;
- }
- }
- if (!base.slice_from("u"))
- {
- return false;
- }
- break lab4;
- }
- base.cursor = base.limit - v_3;
- lab14: {
- var /** number */ v_7 = base.limit - base.cursor;
- lab15: {
- if (!(base.eq_s_b("\u00F6")))
- {
- break lab15;
- }
- break lab14;
- }
- base.cursor = base.limit - v_7;
- if (!(base.eq_s_b("\u00FC")))
- {
- return false;
- }
- }
- if (!base.slice_from("\u00FC"))
- {
- return false;
- }
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_is_reserved_word() {
- if (!(base.eq_s_b("ad")))
- {
- return false;
- }
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!(base.eq_s_b("soy")))
- {
- base.cursor = base.limit - v_1;
- break lab0;
- }
- }
- if (base.cursor > base.limit_backward)
- {
- return false;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_more_than_one_syllable_word() {
- var /** number */ v_1 = base.cursor;
- for (var /** number */ v_2 = 2; v_2 > 0; v_2--)
- {
- golab0: while(true)
- {
- lab1: {
- if (!(base.in_grouping(g_vowel, 97, 305)))
- {
- break lab1;
- }
- break golab0;
- }
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- }
- base.cursor = v_1;
- return true;
- };
-
- /** @return {boolean} */
- function r_postlude() {
- base.limit_backward = base.cursor; base.cursor = base.limit;
- {
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- if (!r_is_reserved_word())
- {
- break lab0;
- }
- return false;
- }
- base.cursor = base.limit - v_1;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- r_append_U_to_stems_ending_with_d_or_g();
- base.cursor = base.limit - v_2;
- var /** number */ v_3 = base.limit - base.cursor;
- r_post_process_last_consonants();
- base.cursor = base.limit - v_3;
- base.cursor = base.limit_backward;
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- if (!r_more_than_one_syllable_word())
- {
- return false;
- }
- base.limit_backward = base.cursor; base.cursor = base.limit;
- var /** number */ v_1 = base.limit - base.cursor;
- r_stem_nominal_verb_suffixes();
- base.cursor = base.limit - v_1;
- if (!B_continue_stemming_noun_suffixes)
- {
- return false;
- }
- var /** number */ v_2 = base.limit - base.cursor;
- r_stem_noun_suffixes();
- base.cursor = base.limit - v_2;
- base.cursor = base.limit_backward;
- if (!r_postlude())
- {
- return false;
- }
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['TurkishStemmer'] = TurkishStemmer;
diff --git a/js/yiddish-stemmer.js b/js/yiddish-stemmer.js
deleted file mode 100644
index f21669d..0000000
--- a/js/yiddish-stemmer.js
+++ /dev/null
@@ -1,1173 +0,0 @@
-// Generated by Snowball 2.2.0 - https://snowballstem.org/
-
-/**@constructor*/
-var YiddishStemmer = function() {
- var base = new BaseStemmer();
- /** @const */ var a_0 = [
- ["\u05D5\u05D5", -1, 1],
- ["\u05D5\u05D9", -1, 2],
- ["\u05D9\u05D9", -1, 3],
- ["\u05DA", -1, 4],
- ["\u05DD", -1, 5],
- ["\u05DF", -1, 6],
- ["\u05E3", -1, 7],
- ["\u05E5", -1, 8]
- ];
-
- /** @const */ var a_1 = [
- ["\u05D0\u05D3\u05D5\u05E8\u05DB", -1, 1],
- ["\u05D0\u05D4\u05D9\u05E0", -1, 1],
- ["\u05D0\u05D4\u05E2\u05E8", -1, 1],
- ["\u05D0\u05D4\u05F2\u05DE", -1, 1],
- ["\u05D0\u05D5\u05DE", -1, 1],
- ["\u05D0\u05D5\u05E0\u05D8\u05E2\u05E8", -1, 1],
- ["\u05D0\u05D9\u05D1\u05E2\u05E8", -1, 1],
- ["\u05D0\u05E0", -1, 1],
- ["\u05D0\u05E0\u05D8", 7, 1],
- ["\u05D0\u05E0\u05D8\u05E7\u05E2\u05D2\u05E0", 8, 1],
- ["\u05D0\u05E0\u05D9\u05D3\u05E2\u05E8", 7, 1],
- ["\u05D0\u05E4", -1, 1],
- ["\u05D0\u05E4\u05D9\u05E8", 11, 1],
- ["\u05D0\u05E7\u05E2\u05D2\u05E0", -1, 1],
- ["\u05D0\u05E8\u05D0\u05E4", -1, 1],
- ["\u05D0\u05E8\u05D5\u05DE", -1, 1],
- ["\u05D0\u05E8\u05D5\u05E0\u05D8\u05E2\u05E8", -1, 1],
- ["\u05D0\u05E8\u05D9\u05D1\u05E2\u05E8", -1, 1],
- ["\u05D0\u05E8\u05F1\u05E1", -1, 1],
- ["\u05D0\u05E8\u05F1\u05E4", -1, 1],
- ["\u05D0\u05E8\u05F2\u05E0", -1, 1],
- ["\u05D0\u05F0\u05E2\u05E7", -1, 1],
- ["\u05D0\u05F1\u05E1", -1, 1],
- ["\u05D0\u05F1\u05E4", -1, 1],
- ["\u05D0\u05F2\u05E0", -1, 1],
- ["\u05D1\u05D0", -1, 1],
- ["\u05D1\u05F2", -1, 1],
- ["\u05D3\u05D5\u05E8\u05DB", -1, 1],
- ["\u05D3\u05E2\u05E8", -1, 1],
- ["\u05DE\u05D9\u05D8", -1, 1],
- ["\u05E0\u05D0\u05DB", -1, 1],
- ["\u05E4\u05D0\u05E8", -1, 1],
- ["\u05E4\u05D0\u05E8\u05D1\u05F2", 31, 1],
- ["\u05E4\u05D0\u05E8\u05F1\u05E1", 31, 1],
- ["\u05E4\u05D5\u05E0\u05D0\u05E0\u05D3\u05E2\u05E8", -1, 1],
- ["\u05E6\u05D5", -1, 1],
- ["\u05E6\u05D5\u05D6\u05D0\u05DE\u05E2\u05E0", 35, 1],
- ["\u05E6\u05D5\u05E0\u05F1\u05E4", 35, 1],
- ["\u05E6\u05D5\u05E8\u05D9\u05E7", 35, 1],
- ["\u05E6\u05E2", -1, 1]
- ];
-
- /** @const */ var a_2 = [
- ["\u05D3\u05D6\u05E9", -1, -1],
- ["\u05E9\u05D8\u05E8", -1, -1],
- ["\u05E9\u05D8\u05E9", -1, -1],
- ["\u05E9\u05E4\u05E8", -1, -1]
- ];
-
- /** @const */ var a_3 = [
- ["\u05E7\u05DC\u05D9\u05D1", -1, 9],
- ["\u05E8\u05D9\u05D1", -1, 10],
- ["\u05D8\u05E8\u05D9\u05D1", 1, 7],
- ["\u05E9\u05E8\u05D9\u05D1", 1, 15],
- ["\u05D4\u05F1\u05D1", -1, 23],
- ["\u05E9\u05F0\u05D9\u05D2", -1, 12],
- ["\u05D2\u05D0\u05E0\u05D2", -1, 1],
- ["\u05D6\u05D5\u05E0\u05D2", -1, 18],
- ["\u05E9\u05DC\u05D5\u05E0\u05D2", -1, 21],
- ["\u05E6\u05F0\u05D5\u05E0\u05D2", -1, 20],
- ["\u05D1\u05F1\u05D2", -1, 22],
- ["\u05D1\u05D5\u05E0\u05D3", -1, 16],
- ["\u05F0\u05D9\u05D6", -1, 6],
- ["\u05D1\u05D9\u05D8", -1, 4],
- ["\u05DC\u05D9\u05D8", -1, 8],
- ["\u05DE\u05D9\u05D8", -1, 3],
- ["\u05E9\u05E0\u05D9\u05D8", -1, 14],
- ["\u05E0\u05D5\u05DE", -1, 2],
- ["\u05E9\u05D8\u05D0\u05E0", -1, 25],
- ["\u05D1\u05D9\u05E1", -1, 5],
- ["\u05E9\u05DE\u05D9\u05E1", -1, 13],
- ["\u05E8\u05D9\u05E1", -1, 11],
- ["\u05D8\u05E8\u05D5\u05E0\u05E7", -1, 19],
- ["\u05E4\u05D0\u05E8\u05DC\u05F1\u05E8", -1, 24],
- ["\u05E9\u05F0\u05F1\u05E8", -1, 26],
- ["\u05F0\u05D5\u05D8\u05E9", -1, 17]
- ];
-
- /** @const */ var a_4 = [
- ["\u05D5\u05E0\u05D2", -1, 1],
- ["\u05E1\u05D8\u05D5", -1, 1],
- ["\u05D8", -1, 1],
- ["\u05D1\u05E8\u05D0\u05DB\u05D8", 2, 31],
- ["\u05E1\u05D8", 2, 1],
- ["\u05D9\u05E1\u05D8", 4, 33],
- ["\u05E2\u05D8", 2, 1],
- ["\u05E9\u05D0\u05E4\u05D8", 2, 1],
- ["\u05D4\u05F2\u05D8", 2, 1],
- ["\u05E7\u05F2\u05D8", 2, 1],
- ["\u05D9\u05E7\u05F2\u05D8", 9, 1],
- ["\u05DC\u05E2\u05DB", -1, 1],
- ["\u05E2\u05DC\u05E2\u05DB", 11, 1],
- ["\u05D9\u05D6\u05DE", -1, 1],
- ["\u05D9\u05DE", -1, 1],
- ["\u05E2\u05DE", -1, 1],
- ["\u05E2\u05E0\u05E2\u05DE", 15, 3],
- ["\u05D8\u05E2\u05E0\u05E2\u05DE", 16, 4],
- ["\u05E0", -1, 1],
- ["\u05E7\u05DC\u05D9\u05D1\u05E0", 18, 14],
- ["\u05E8\u05D9\u05D1\u05E0", 18, 15],
- ["\u05D8\u05E8\u05D9\u05D1\u05E0", 20, 12],
- ["\u05E9\u05E8\u05D9\u05D1\u05E0", 20, 7],
- ["\u05D4\u05F1\u05D1\u05E0", 18, 27],
- ["\u05E9\u05F0\u05D9\u05D2\u05E0", 18, 17],
- ["\u05D6\u05D5\u05E0\u05D2\u05E0", 18, 22],
- ["\u05E9\u05DC\u05D5\u05E0\u05D2\u05E0", 18, 25],
- ["\u05E6\u05F0\u05D5\u05E0\u05D2\u05E0", 18, 24],
- ["\u05D1\u05F1\u05D2\u05E0", 18, 26],
- ["\u05D1\u05D5\u05E0\u05D3\u05E0", 18, 20],
- ["\u05F0\u05D9\u05D6\u05E0", 18, 11],
- ["\u05D8\u05E0", 18, 4],
- ["GE\u05D1\u05D9\u05D8\u05E0", 31, 9],
- ["GE\u05DC\u05D9\u05D8\u05E0", 31, 13],
- ["GE\u05DE\u05D9\u05D8\u05E0", 31, 8],
- ["\u05E9\u05E0\u05D9\u05D8\u05E0", 31, 19],
- ["\u05E1\u05D8\u05E0", 31, 1],
- ["\u05D9\u05E1\u05D8\u05E0", 36, 1],
- ["\u05E2\u05D8\u05E0", 31, 1],
- ["GE\u05D1\u05D9\u05E1\u05E0", 18, 10],
- ["\u05E9\u05DE\u05D9\u05E1\u05E0", 18, 18],
- ["GE\u05E8\u05D9\u05E1\u05E0", 18, 16],
- ["\u05E2\u05E0", 18, 1],
- ["\u05D2\u05D0\u05E0\u05D2\u05E2\u05E0", 42, 5],
- ["\u05E2\u05DC\u05E2\u05E0", 42, 1],
- ["\u05E0\u05D5\u05DE\u05E2\u05E0", 42, 6],
- ["\u05D9\u05D6\u05DE\u05E2\u05E0", 42, 1],
- ["\u05E9\u05D8\u05D0\u05E0\u05E2\u05E0", 42, 29],
- ["\u05D8\u05E8\u05D5\u05E0\u05E7\u05E0", 18, 23],
- ["\u05E4\u05D0\u05E8\u05DC\u05F1\u05E8\u05E0", 18, 28],
- ["\u05E9\u05F0\u05F1\u05E8\u05E0", 18, 30],
- ["\u05F0\u05D5\u05D8\u05E9\u05E0", 18, 21],
- ["\u05D2\u05F2\u05E0", 18, 5],
- ["\u05E1", -1, 1],
- ["\u05D8\u05E1", 53, 4],
- ["\u05E2\u05D8\u05E1", 54, 1],
- ["\u05E0\u05E1", 53, 1],
- ["\u05D8\u05E0\u05E1", 56, 4],
- ["\u05E2\u05E0\u05E1", 56, 3],
- ["\u05E2\u05E1", 53, 1],
- ["\u05D9\u05E2\u05E1", 59, 2],
- ["\u05E2\u05DC\u05E2\u05E1", 59, 1],
- ["\u05E2\u05E8\u05E1", 53, 1],
- ["\u05E2\u05E0\u05E2\u05E8\u05E1", 62, 1],
- ["\u05E2", -1, 1],
- ["\u05D8\u05E2", 64, 4],
- ["\u05E1\u05D8\u05E2", 65, 1],
- ["\u05E2\u05D8\u05E2", 65, 1],
- ["\u05D9\u05E2", 64, -1],
- ["\u05E2\u05DC\u05E2", 64, 1],
- ["\u05E2\u05E0\u05E2", 64, 3],
- ["\u05D8\u05E2\u05E0\u05E2", 70, 4],
- ["\u05E2\u05E8", -1, 1],
- ["\u05D8\u05E2\u05E8", 72, 4],
- ["\u05E1\u05D8\u05E2\u05E8", 73, 1],
- ["\u05E2\u05D8\u05E2\u05E8", 73, 1],
- ["\u05E2\u05E0\u05E2\u05E8", 72, 3],
- ["\u05D8\u05E2\u05E0\u05E2\u05E8", 76, 4],
- ["\u05D5\u05EA", -1, 32]
- ];
-
- /** @const */ var a_5 = [
- ["\u05D5\u05E0\u05D2", -1, 1],
- ["\u05E9\u05D0\u05E4\u05D8", -1, 1],
- ["\u05D4\u05F2\u05D8", -1, 1],
- ["\u05E7\u05F2\u05D8", -1, 1],
- ["\u05D9\u05E7\u05F2\u05D8", 3, 1],
- ["\u05DC", -1, 2]
- ];
-
- /** @const */ var a_6 = [
- ["\u05D9\u05D2", -1, 1],
- ["\u05D9\u05E7", -1, 1],
- ["\u05D3\u05D9\u05E7", 1, 1],
- ["\u05E0\u05D3\u05D9\u05E7", 2, 1],
- ["\u05E2\u05E0\u05D3\u05D9\u05E7", 3, 1],
- ["\u05D1\u05DC\u05D9\u05E7", 1, -1],
- ["\u05D2\u05DC\u05D9\u05E7", 1, -1],
- ["\u05E0\u05D9\u05E7", 1, 1],
- ["\u05D9\u05E9", -1, 1]
- ];
-
- /** @const */ var /** Array */ g_niked = [255, 155, 6];
-
- /** @const */ var /** Array */ g_vowel = [33, 2, 4, 0, 6];
-
- /** @const */ var /** Array */ g_consonant = [239, 254, 253, 131];
-
- var /** number */ I_x = 0;
- var /** number */ I_p1 = 0;
-
-
- /** @return {boolean} */
- function r_prelude() {
- var /** number */ among_var;
- var /** number */ v_1 = base.cursor;
- lab0: {
- while(true)
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- golab2: while(true)
- {
- var /** number */ v_3 = base.cursor;
- lab3: {
- base.bra = base.cursor;
- among_var = base.find_among(a_0);
- if (among_var == 0)
- {
- break lab3;
- }
- base.ket = base.cursor;
- switch (among_var) {
- case 1:
- {
- var /** number */ v_4 = base.cursor;
- lab4: {
- if (!(base.eq_s("\u05BC")))
- {
- break lab4;
- }
- break lab3;
- }
- base.cursor = v_4;
- }
- if (!base.slice_from("\u05F0"))
- {
- return false;
- }
- break;
- case 2:
- {
- var /** number */ v_5 = base.cursor;
- lab5: {
- if (!(base.eq_s("\u05B4")))
- {
- break lab5;
- }
- break lab3;
- }
- base.cursor = v_5;
- }
- if (!base.slice_from("\u05F1"))
- {
- return false;
- }
- break;
- case 3:
- {
- var /** number */ v_6 = base.cursor;
- lab6: {
- if (!(base.eq_s("\u05B4")))
- {
- break lab6;
- }
- break lab3;
- }
- base.cursor = v_6;
- }
- if (!base.slice_from("\u05F2"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u05DB"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u05DE"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u05E0"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u05E4"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u05E6"))
- {
- return false;
- }
- break;
- }
- base.cursor = v_3;
- break golab2;
- }
- base.cursor = v_3;
- if (base.cursor >= base.limit)
- {
- break lab1;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_2;
- break;
- }
- }
- base.cursor = v_1;
- var /** number */ v_7 = base.cursor;
- lab7: {
- while(true)
- {
- var /** number */ v_8 = base.cursor;
- lab8: {
- golab9: while(true)
- {
- var /** number */ v_9 = base.cursor;
- lab10: {
- base.bra = base.cursor;
- if (!(base.in_grouping(g_niked, 1456, 1474)))
- {
- break lab10;
- }
- base.ket = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.cursor = v_9;
- break golab9;
- }
- base.cursor = v_9;
- if (base.cursor >= base.limit)
- {
- break lab8;
- }
- base.cursor++;
- }
- continue;
- }
- base.cursor = v_8;
- break;
- }
- }
- base.cursor = v_7;
- return true;
- };
-
- /** @return {boolean} */
- function r_mark_regions() {
- I_p1 = base.limit;
- var /** number */ v_1 = base.cursor;
- lab0: {
- base.bra = base.cursor;
- if (!(base.eq_s("\u05D2\u05E2")))
- {
- base.cursor = v_1;
- break lab0;
- }
- base.ket = base.cursor;
- {
- var /** number */ v_2 = base.cursor;
- lab1: {
- lab2: {
- var /** number */ v_3 = base.cursor;
- lab3: {
- if (!(base.eq_s("\u05DC\u05D8")))
- {
- break lab3;
- }
- break lab2;
- }
- base.cursor = v_3;
- if (!(base.eq_s("\u05D1\u05E0")))
- {
- break lab1;
- }
- }
- base.cursor = v_1;
- break lab0;
- }
- base.cursor = v_2;
- }
- if (!base.slice_from("GE"))
- {
- return false;
- }
- }
- var /** number */ v_4 = base.cursor;
- lab4: {
- if (base.find_among(a_1) == 0)
- {
- base.cursor = v_4;
- break lab4;
- }
- lab5: {
- var /** number */ v_5 = base.cursor;
- lab6: {
- var /** number */ v_6 = base.cursor;
- lab7: {
- var /** number */ v_7 = base.cursor;
- lab8: {
- if (!(base.eq_s("\u05E6\u05D5\u05D2\u05E0")))
- {
- break lab8;
- }
- break lab7;
- }
- base.cursor = v_7;
- lab9: {
- if (!(base.eq_s("\u05E6\u05D5\u05E7\u05D8")))
- {
- break lab9;
- }
- break lab7;
- }
- base.cursor = v_7;
- if (!(base.eq_s("\u05E6\u05D5\u05E7\u05E0")))
- {
- break lab6;
- }
- }
- if (base.cursor < base.limit)
- {
- break lab6;
- }
- base.cursor = v_6;
- break lab5;
- }
- base.cursor = v_5;
- lab10: {
- var /** number */ v_8 = base.cursor;
- if (!(base.eq_s("\u05D2\u05E2\u05D1\u05E0")))
- {
- break lab10;
- }
- base.cursor = v_8;
- break lab5;
- }
- base.cursor = v_5;
- lab11: {
- base.bra = base.cursor;
- if (!(base.eq_s("\u05D2\u05E2")))
- {
- break lab11;
- }
- base.ket = base.cursor;
- if (!base.slice_from("GE"))
- {
- return false;
- }
- break lab5;
- }
- base.cursor = v_5;
- base.bra = base.cursor;
- if (!(base.eq_s("\u05E6\u05D5")))
- {
- base.cursor = v_4;
- break lab4;
- }
- base.ket = base.cursor;
- if (!base.slice_from("TSU"))
- {
- return false;
- }
- }
- }
- var /** number */ v_9 = base.cursor;
- {
- var /** number */ c1 = base.cursor + 3;
- if (c1 > base.limit)
- {
- return false;
- }
- base.cursor = c1;
- }
- I_x = base.cursor;
- base.cursor = v_9;
- var /** number */ v_10 = base.cursor;
- lab12: {
- if (base.find_among(a_2) == 0)
- {
- base.cursor = v_10;
- break lab12;
- }
- }
- {
- var /** number */ v_11 = base.cursor;
- lab13: {
- if (!(base.in_grouping(g_consonant, 1489, 1520)))
- {
- break lab13;
- }
- if (!(base.in_grouping(g_consonant, 1489, 1520)))
- {
- break lab13;
- }
- if (!(base.in_grouping(g_consonant, 1489, 1520)))
- {
- break lab13;
- }
- I_p1 = base.cursor;
- return false;
- }
- base.cursor = v_11;
- }
- golab14: while(true)
- {
- var /** number */ v_12 = base.cursor;
- lab15: {
- if (!(base.in_grouping(g_vowel, 1488, 1522)))
- {
- break lab15;
- }
- base.cursor = v_12;
- break golab14;
- }
- base.cursor = v_12;
- if (base.cursor >= base.limit)
- {
- return false;
- }
- base.cursor++;
- }
- while(true)
- {
- lab16: {
- if (!(base.in_grouping(g_vowel, 1488, 1522)))
- {
- break lab16;
- }
- continue;
- }
- break;
- }
- I_p1 = base.cursor;
- lab17: {
- if (I_p1 >= I_x)
- {
- break lab17;
- }
- I_p1 = I_x;
- }
- return true;
- };
-
- /** @return {boolean} */
- function r_R1() {
- return I_p1 <= base.cursor;
- };
-
- /** @return {boolean} */
- function r_R1plus3() {
- return I_p1 <= (base.cursor + 3);
- };
-
- /** @return {boolean} */
- function r_standard_suffix() {
- var /** number */ among_var;
- var /** number */ v_1 = base.limit - base.cursor;
- lab0: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_4);
- if (among_var == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_from("\u05D9\u05E2"))
- {
- return false;
- }
- break;
- case 3:
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- base.ket = base.cursor;
- among_var = base.find_among_b(a_3);
- if (among_var == 0)
- {
- break lab0;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!base.slice_from("\u05D2\u05F2"))
- {
- return false;
- }
- break;
- case 2:
- if (!base.slice_from("\u05E0\u05E2\u05DE"))
- {
- return false;
- }
- break;
- case 3:
- if (!base.slice_from("\u05DE\u05F2\u05D3"))
- {
- return false;
- }
- break;
- case 4:
- if (!base.slice_from("\u05D1\u05F2\u05D8"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u05D1\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u05F0\u05F2\u05D6"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u05D8\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u05DC\u05F2\u05D8"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("\u05E7\u05DC\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("\u05E8\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("\u05E9\u05F0\u05F2\u05D2"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("\u05E9\u05DE\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("\u05E9\u05E0\u05F2\u05D3"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("\u05E9\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("\u05D1\u05D9\u05E0\u05D3"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("\u05F0\u05D9\u05D8\u05E9"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("\u05D6\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("\u05D8\u05E8\u05D9\u05E0\u05E7"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("\u05E6\u05F0\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("\u05E9\u05DC\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("\u05D1\u05F2\u05D2"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("\u05D4\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("\u05E4\u05D0\u05E8\u05DC\u05D9\u05E8"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("\u05E9\u05D8\u05F2"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("\u05E9\u05F0\u05E2\u05E8"))
- {
- return false;
- }
- break;
- }
- break;
- case 4:
- lab1: {
- var /** number */ v_2 = base.limit - base.cursor;
- lab2: {
- if (!r_R1())
- {
- break lab2;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break lab1;
- }
- base.cursor = base.limit - v_2;
- if (!base.slice_from("\u05D8"))
- {
- return false;
- }
- }
- base.ket = base.cursor;
- if (!(base.eq_s_b("\u05D1\u05E8\u05D0\u05DB")))
- {
- break lab0;
- }
- var /** number */ v_3 = base.limit - base.cursor;
- lab3: {
- if (!(base.eq_s_b("\u05D2\u05E2")))
- {
- base.cursor = base.limit - v_3;
- break lab3;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_from("\u05D1\u05E8\u05E2\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 5:
- if (!base.slice_from("\u05D2\u05F2"))
- {
- return false;
- }
- break;
- case 6:
- if (!base.slice_from("\u05E0\u05E2\u05DE"))
- {
- return false;
- }
- break;
- case 7:
- if (!base.slice_from("\u05E9\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 8:
- if (!base.slice_from("\u05DE\u05F2\u05D3"))
- {
- return false;
- }
- break;
- case 9:
- if (!base.slice_from("\u05D1\u05F2\u05D8"))
- {
- return false;
- }
- break;
- case 10:
- if (!base.slice_from("\u05D1\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 11:
- if (!base.slice_from("\u05F0\u05F2\u05D6"))
- {
- return false;
- }
- break;
- case 12:
- if (!base.slice_from("\u05D8\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 13:
- if (!base.slice_from("\u05DC\u05F2\u05D8"))
- {
- return false;
- }
- break;
- case 14:
- if (!base.slice_from("\u05E7\u05DC\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 15:
- if (!base.slice_from("\u05E8\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 16:
- if (!base.slice_from("\u05E8\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 17:
- if (!base.slice_from("\u05E9\u05F0\u05F2\u05D2"))
- {
- return false;
- }
- break;
- case 18:
- if (!base.slice_from("\u05E9\u05DE\u05F2\u05E1"))
- {
- return false;
- }
- break;
- case 19:
- if (!base.slice_from("\u05E9\u05E0\u05F2\u05D3"))
- {
- return false;
- }
- break;
- case 20:
- if (!base.slice_from("\u05D1\u05D9\u05E0\u05D3"))
- {
- return false;
- }
- break;
- case 21:
- if (!base.slice_from("\u05F0\u05D9\u05D8\u05E9"))
- {
- return false;
- }
- break;
- case 22:
- if (!base.slice_from("\u05D6\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 23:
- if (!base.slice_from("\u05D8\u05E8\u05D9\u05E0\u05E7"))
- {
- return false;
- }
- break;
- case 24:
- if (!base.slice_from("\u05E6\u05F0\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 25:
- if (!base.slice_from("\u05E9\u05DC\u05D9\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 26:
- if (!base.slice_from("\u05D1\u05F2\u05D2"))
- {
- return false;
- }
- break;
- case 27:
- if (!base.slice_from("\u05D4\u05F2\u05D1"))
- {
- return false;
- }
- break;
- case 28:
- if (!base.slice_from("\u05E4\u05D0\u05E8\u05DC\u05D9\u05E8"))
- {
- return false;
- }
- break;
- case 29:
- if (!base.slice_from("\u05E9\u05D8\u05F2"))
- {
- return false;
- }
- break;
- case 30:
- if (!base.slice_from("\u05E9\u05F0\u05E2\u05E8"))
- {
- return false;
- }
- break;
- case 31:
- if (!base.slice_from("\u05D1\u05E8\u05E2\u05E0\u05D2"))
- {
- return false;
- }
- break;
- case 32:
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_from("\u05D4"))
- {
- return false;
- }
- break;
- case 33:
- lab4: {
- var /** number */ v_4 = base.limit - base.cursor;
- lab5: {
- lab6: {
- var /** number */ v_5 = base.limit - base.cursor;
- lab7: {
- if (!(base.eq_s_b("\u05D2")))
- {
- break lab7;
- }
- break lab6;
- }
- base.cursor = base.limit - v_5;
- if (!(base.eq_s_b("\u05E9")))
- {
- break lab5;
- }
- }
- var /** number */ v_6 = base.limit - base.cursor;
- lab8: {
- if (!r_R1plus3())
- {
- base.cursor = base.limit - v_6;
- break lab8;
- }
- if (!base.slice_from("\u05D9\u05E1"))
- {
- return false;
- }
- }
- break lab4;
- }
- base.cursor = base.limit - v_4;
- if (!r_R1())
- {
- break lab0;
- }
- if (!base.slice_del())
- {
- return false;
- }
- }
- break;
- }
- }
- base.cursor = base.limit - v_1;
- var /** number */ v_7 = base.limit - base.cursor;
- lab9: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_5);
- if (among_var == 0)
- {
- break lab9;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- break lab9;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- case 2:
- if (!r_R1())
- {
- break lab9;
- }
- if (!(base.in_grouping_b(g_consonant, 1489, 1520)))
- {
- break lab9;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_7;
- var /** number */ v_8 = base.limit - base.cursor;
- lab10: {
- base.ket = base.cursor;
- among_var = base.find_among_b(a_6);
- if (among_var == 0)
- {
- break lab10;
- }
- base.bra = base.cursor;
- switch (among_var) {
- case 1:
- if (!r_R1())
- {
- break lab10;
- }
- if (!base.slice_del())
- {
- return false;
- }
- break;
- }
- }
- base.cursor = base.limit - v_8;
- var /** number */ v_9 = base.limit - base.cursor;
- lab11: {
- while(true)
- {
- var /** number */ v_10 = base.limit - base.cursor;
- lab12: {
- golab13: while(true)
- {
- var /** number */ v_11 = base.limit - base.cursor;
- lab14: {
- base.ket = base.cursor;
- lab15: {
- var /** number */ v_12 = base.limit - base.cursor;
- lab16: {
- if (!(base.eq_s_b("GE")))
- {
- break lab16;
- }
- break lab15;
- }
- base.cursor = base.limit - v_12;
- if (!(base.eq_s_b("TSU")))
- {
- break lab14;
- }
- }
- base.bra = base.cursor;
- if (!base.slice_del())
- {
- return false;
- }
- base.cursor = base.limit - v_11;
- break golab13;
- }
- base.cursor = base.limit - v_11;
- if (base.cursor <= base.limit_backward)
- {
- break lab12;
- }
- base.cursor--;
- }
- continue;
- }
- base.cursor = base.limit - v_10;
- break;
- }
- }
- base.cursor = base.limit - v_9;
- return true;
- };
-
- this.stem = /** @return {boolean} */ function() {
- r_prelude();
- var /** number */ v_2 = base.cursor;
- r_mark_regions();
- base.cursor = v_2;
- base.limit_backward = base.cursor; base.cursor = base.limit;
- r_standard_suffix();
- base.cursor = base.limit_backward;
- return true;
- };
-
- /**@return{string}*/
- this['stemWord'] = function(/**string*/word) {
- base.setCurrent(word);
- this.stem();
- return base.getCurrent();
- };
-};
-
-window['YiddishStemmer'] = YiddishStemmer;
diff --git a/otherapps/romanian/index.tt b/otherapps/romanian/index.tt
deleted file mode 100644
index 95e66ea..0000000
--- a/otherapps/romanian/index.tt
+++ /dev/null
@@ -1,180 +0,0 @@
-[% header('Two Romanian stemmers') %]
-
-Links to resources
-
-
-
-
-In swift succession, we received in 2006 two stemmers for Romanian
-written in Snowball.
-Here is the original correspondence,
-
-
-
-From: Erwin Glockner <eglockne@ix.urz.uni-heidelberg.de>
-To: snowball-discuss
-Date: Wed, 07 Jun 2006 00:06:30 +0200
-Subject: [Snowball-discuss] romanian stemmer
-
-Hello everyone,
-
-my name is Erwn Glockner, I'm a student of computational linguistics in
-Heidelberg, Germany. Together with my fellow students Doina Gliga and
-Marina Stegarescu we started to write a romanian stemmer in Snowball.
-We planned to finish the stemmer until end of this month. We would be
-happy if the stemmer would be accepted as part of the Snowball-distribution.
-There is still some work to do, e.g. evaluating the stemmer, making a
-stopwords-list, unicode support, etc. After finishing this we will send
-you our stemmer with the corresponding files, but I couldn't find any
-email address to whom the stemmer should be sent to.
-Could please someone tell me the address(es)?
-
-With kind regards,
-E. Glockner, D. Gliga, M. Stegarescu.
-
-
-
-From: Erwin Glockner <eglockne@ix.urz.uni-heidelberg.de>
-To: richard@lemurconsulting.com,
- martin.porter@grapeshot.co.uk
-Date: Tue Jul 18 19:43:39 2006
-Subject: romanian stemmer
-
-Dear Mr. Porter, dear Mr. Boulton,
-
-we finally finished the Romanian stemmer. Unfortunately evaluation took
-more time than expected.
-However, it was an interesting experience creating the stemmer, and we
-are happy to send you the result of our work.
-The attachment-file is a Tarball-zipped file with (hopefully) all files
-needed. The files and the stemmer as well are encoded in UTF-8. Please
-inform us if something is missing.
-
-We would be happy if the Romanian stemmer would be accepted and
-integrated into the official Snowball distribution. We agree of course
-to license the stemmer under the same terms as the existing snowball
-software.
-
-We're looking forward to hear from you soon.
-
-
-With kind regards,
-
-Marina, Doina and Erwin.
-
-Attachment: [romanian1.tgz]
-
-
-
-From: Irina Tirdea <irina.tirdea@gmail.com>
-To: richard@lemurconsulting.com,
- martin.porter@grapeshot.co.uk
-Date: Mon Jul 31 10:19:51 2006
-Subject: Romanian stemmer
-
-Hello,
-
-My name is Irina Tirdea and I have developed a Romanian stemmer in Snowball
-as part of my bachelor thesis, in Bucharest, Romania. I am sending you the
-code attached (with vocabulary and stop word list files) and I hope you will
-accept and integrate it as a part of the Snowball project. I am ready to
-release the stemmer under the BSD license, just as the Snowball software.
-The files have been written in UTF-8 encoding (on a Linux system).
-
-Looking forward to hear from you.
-
-Kind regards,
-Irina Tirdea
-
-Attachment: [romanian2.tgz]
-
-
-
-From: martin.porter@grapeshot.co.uk (Martin Porter)
-To: snowball-discuss
-Cc: atordai@science.uva.nl,
- eglockne@ix.urz.uni-heidelberg.de,
- irina.tirdea@gmail.com
-Date: Mon Jul 31 10:43:05 BST 2006
-Subject: Tardy response to submissions to Snowball
-
-I am sending this general email as a kind of apology, for having done nothing
-so far on the following generously sent Snowball submissions:
-
-7 June, from E. Glockner: a Romanian stemmer
-8 June, from A. Tordai: a Hungarian stemmer
-
-and this morning another Romanian stemmer arrived,
-
-31 July, from I. Tirdea, a Romanian stemmer
-
-After the first submission I promised to look at it "next week", so Mr Glockner
-has probably been wondering what has happened. [. . .] I will make a point of
-looking at these submissions this week,
-
-More soon,
-
-Martin
-
-
-
-From: martin.porter@grapeshot.co.uk (Martin Porter)
-To: snowball-discuss
-Cc: irina.tirdea@gmail.com,
- eglockne@ix.urz.uni-heidelberg.de,
- mstegare@hotmail.com,
- doina_gliga@yahoo.co.uk,
- eglockner@hotmail.com
-Date: Wed Sep 06 12:39:16 BST 2006
-Subject: Romanian stemmer
-
-To the originators of the Romanian stemmers,
-
-I have now found time to do some preliminary work on the Romanian stemmer. I
-should explain that part of the complication has been the receipt, no more
-than ten days apart, of two Romanian stemmers in Snowball, the first
-(romanian1) from [Glockner, Gliga, and Stegarescu] in Heidelberg, the second
-(romanian2) from Tirdea in Bucharest.
-
-[. . . .]
-
-I have put together a vocabulary by combining the vocabularies provided with
-romanian1 and romanian2. This appears in column 1. Column 2 is the stemmed
-form produced by romanian1, and column 3 the stemmed form produced by
-romanian2. If the entry in column 3 is blank, both stemmers are producing the
-same result.
-
-You might care to compare the two approaches.
-
-My own feeling is that romanian1 does a more thorough job of ending removal,
-but unlike romanian2 has a habit of discarding too much from short words.
-aberant->ab, abatere->ab, aburi->ab are examples of this. In romanian1 the R2
-test is rarely used (it seems to me that 'R1 or R2' is equivalent to 'R1',
-since p2 is never to the left of p1.)
-
-I might have a go at making some modifications here. Needless to say, I am
-not familiar with Romanian, but the similarity to the other Romance
-languages, especially Italian, enables one to grasp the essential features of
-the morphology.
-
-What we would like to do is to have a single stemmer for release from the
-snowball site, if that is possible, and giving all necessary credits, along
-the lines of the recent addition,
-
-http://snowballstem.org/algorithms/hungarian/stemmer.html
-
-Hope to hear from you,
-
-Martin Porter
-
-
-
-Finally we decided to produce our own Romanian stemmer as described on the
-Romanian stemmer page. The submitted stemmers both contain stop word lists,
-available inside the tarballs.
-
-
-[% footer %]
diff --git a/otherapps/schinke/index.tt b/otherapps/schinke/index.tt
deleted file mode 100644
index 6589bf4..0000000
--- a/otherapps/schinke/index.tt
+++ /dev/null
@@ -1,168 +0,0 @@
-[% header('The Schinke Latin stemming algorithm') %]
-
-Links to resources
-
-
-
-
-(A note by Martin Porter.)
-
-
-
-The Schinke Latin stemming algorithm is described in,
-
-
-
- Schinke R, Greengrass M, Robertson AM and Willett P (1996) A stemming algorithm for Latin text
- databases. Journal of Documentation, 52: 172-187.
-
-
-
-It has the feature that it stems each word to two forms, noun and verb. For example,
-
-
-
- NOUN VERB
- ---- ----
- aquila aquil aquila
- portat portat porta
- portis port por
-
-
-
-Here (slightly reformatted) are the rules of the stemmer,
-
-
-
-1. (start)
-
-2. Convert all occurrences of the letters 'j' or 'v' to 'i' or 'u',
- respectively.
-
-3. If the word ends in '-que' then
- if the word is on the list shown in Figure 4, then
- write the original word to both the noun-based and verb-based
- stem dictionaries and go to 8.
- else remove '-que'
-
- [Figure 4 was
-
- atque quoque neque itaque absque apsque abusque adaeque adusque denique
- deque susque oblique peraeque plenisque quandoque quisque quaeque
- cuiusque cuique quemque quamque quaque quique quorumque quarumque
- quibusque quosque quasque quotusquisque quousque ubique undique usque
- uterque utique utroque utribique torque coque concoque contorque
- detorque decoque excoque extorque obtorque optorque retorque recoque
- attorque incoque intorque praetorque]
-
-4. Match the end of the word against the suffix list show in Figure 6(a),
- removing the longest matching suffix, (if any).
-
- [Figure 6(a) was
-
- -ibus -ius -ae -am -as -em -es -ia
- -is -nt -os -ud -um -us -a -e
- -i -o -u]
-
-5. If the resulting stem contains at least two characters then write this stem
- to the noun-based stem dictionary.
-
-6. Match the end of the word against the suffix list show in Figure 6(b),
- identifying the longest matching suffix, (if any).
-
- [Figure 6(b) was
-
- -iuntur-beris -erunt -untur -iunt -mini -ntur -stis
- -bor -ero -mur -mus -ris -sti -tis -tur
- -unt -bo -ns -nt -ri -m -r -s
- -t]
-
- If any of the following suffixes are found then convert them as shown:
-
- '-iuntur', '-erunt', '-untur', '-iunt', and '-unt', to '-i';
- '-beris', '-bor', and '-bo' to '-bi';
- '-ero' to '-eri'
-
- else remove the suffix in the normal way.
-
-7. If the resulting stem contains at least two characters then write this stem
- to the verb-based stem dictionary.
-
-8. (end)
-
-
-
-Unfortunately I was not able to make the rules match the examples given, which
-led to the following email correspondence,
-
-
-
-From: Martin Porter
-To: Peter Willett
-Date: Mon Sep 10 15:11:51 2001
-Subject: Re: Stemming algorithms
-
-> ... I'm no longer working in the IR area,
->spending all of my time on computational chemistry/drug discovery
->research but I guess that Mark Sanderson would be interested in
->Snowball - do you mind if I pass your email onto him?
-
-Peter,
-
-Well, actually, I do have a question, if you can cast your mind back. I've
-implemented the Latin Stemmer in Snowball (see below: you'll have to guess the
-semantics, but I'm sure you'll agree the syntax looks nice), and find that Fig
-5 of the 1996 Schinke paper doesn't correspond to the algorithm of fig 7, but to
-the algorithm with the extra rules concerning -ba-, -bi-, -sse- mentioned on
-page 182. Which is the "correct" algorithm - with or without those rules? If
-with, what is the exact criterion for their removal? A bigger problem is why
-the -nt is not removed from 'Apparebunt', given -nt as an ending in 6(a). Is
--nt a misprint?
-
-Sorry to bother you with this, but the paper says you are the one "to whom all
-correspondence should be addressed" :-)
-
-Martin
-
-
- Here is your algorithm in Snowball. The generated code will do about 1 million
- Latin word in 5 seconds:
-
- -------
-
-
-[% highlight_file('schinke') %]
-
-
-
-From: Peter Willett
-To: Martin Porter
-Date: Mon Sep 10 20:25:24 2001
-Subject: Re: Stemming algorithms
-
-Martin
-
-Sorry - I just cannot answer. Robertson has retired to Dorset while
-Schinke is now in - I think - Canada
-
-Peter
-
-
-
-Following this, I was unable to contact Schinke, and so the problems have
-remained unresolved.
-
-
-
-The linked zip file contains the stemmer,
-generated C version, and sample data.
-(The stemmer differs slightly from the version in the email above in that
-it assembles the noun- and verb-forms of the stem in a single string with
-space separation.)
-voc.txt
is a sample vocabulary, and joined.txt
the vocabulary
-joined with the two stemmed forms as three column output.
-
-
-[% footer %]
diff --git a/runtime/use.tt b/runtime/use.tt
deleted file mode 100644
index 1a00df1..0000000
--- a/runtime/use.tt
+++ /dev/null
@@ -1,381 +0,0 @@
-[% header('Using Snowball') %]
-
-Links to resources
-
-
-
-Compiling Snowball
-
-
-When you download Snowball,
-it already contains a make file to allow you to build it, like so:
-
-
-
- make
-
-
-
-You can confirm it's working with a simple test like so:
-
-
-
- echo "running" | ./stemwords -l en
-
-
-
-which should output: run
-
-
-
-There's no built in way to install snowball currently - you can either copy
-the snowball binary to somewhere that's on your PATH
-(e.g. on a typical Linux machine: sudo cp snowball /usr/local/bin)
-or just run it from the source tree with ./snowball).
-
-
-Running Snowball
-
-
-The snowball compiler has the following command line syntax,
-
-
-
-[% snowball_help | html %]
-
-
-
-For example,
-
-
-
- snowball danish.sbl -o q/danish
- snowball danish.sbl -syntax
- snowball danish.sbl -output q/danish -ep danish_
-
-
-
-The first argument, SOURCE_FILE
, is the name of the Snowball file to be compiled. Unless you specify a different programming language to
-generate code for, the default is to generate ISO C which results in two output
-files, a C source in OUTPUT_BASE.c
and a corresponding header file in OUTPUT_BASE.h
. This is similar for other
-programming languages, e.g. if option -java
is
-present, Java output is produced in OUTPUT_BASE.java
.
-
-
-
-Some options are only valid when generating code for particular programming
-languages. For example, the -widechars
,
- -utf8
, -eprefix
and
- -vprefix
options are specific to C and C++.
-
-
-ISO C generation
-
-
-In the absence of the -eprefix
and -vprefix
options, the list of
-declared externals in the Snowball program, for example,
-
-
-[% highlight("
- externals ( stem_1 stem_2 moderate )
-") %]
-
-
-gives rise to a header file containing,
-
-
-[% highlight("
- extern struct SN_env * create_env(void);
- extern void close_env(struct SN_env * z);
-
- extern int moderate(struct SN_env * z);
- extern int stem_2(struct SN_env * z);
- extern int stem_1(struct SN_env * z);
-", "c") %]
-
-
-If -eprefix
is used, its string, S1
, is prefixed to each external
-name, for example
-
-
-
- -eprefix Khotanese_
-
-
-
-would give rise to the header file,
-
-
-[% highlight("
- extern struct SN_env * Khotanese_create_env(void);
- extern void Khotanese_close_env(struct SN_env * z);
-
- extern int Khotanese_moderate(struct SN_env * z);
- extern int Khotanese_stem_2(struct SN_env * z);
- extern int Khotanese_stem_1(struct SN_env * z);
-", "c") %]
-
-
-If -vprefix
is used, all Snowball strings, integers and booleans give
-rise to a #define
line in the header file. For example
-
-
-
- -eprefix Khotanese_ -vprefix Khotanese_variable
-
-
-
-would give rise the header file,
-
-
-[% highlight("
- extern struct SN_env * Khotanese_create_env(void);
- extern void Khotanese_close_env(struct SN_env * z);
-
- #define Khotanese_variable_ch (S[0])
- #define Khotanese_variable_Y_found (B[0])
- #define Khotanese_variable_p2 (I[1])
- #define Khotanese_variable_p1 (I[0])
- extern int Khotanese_stem(struct SN_env * z);
-", "c") %]
-
-
-The -utf8
and -widechars
options affects how
-the generated C/C++ code expects strings to be represented - UTF-8 or
-wide-character Unicode (stored using 2 bytes per codepoint), or if neither is
-specified, one byte per codepoint using either ISO-8859-1 or another encoding.
-
-
-
-For other programming languages, one of these three options is effectively
-implicitly hard-coded (except wide-characters may be wider) - e.g. C#, Java,
-Javascript and Python use wide characters; Ada, Go and Rust use UTF-8; Pascal
-uses ISO-8859-1. Since Snowball 2.0 it's possible with a little care to write
-Snowball code that works regardless of how characters are represented. See
-section 12 of the Snowball manual for
-more details.
-
-
-
-The -runtime
option is used to prepend a path to any #include
-lines in the generated code, and is useful when the runtime header files (i.e.
-those files in the runtime directory in the standard distribution) are not
-in the same location as the generated source files. It is used when
-building the libstemmer library, and may be useful for other projects.
-
-
-
-
-Other options
-
-
-If -syntax
is used the other options are ignored, and the syntax tree
-of the Snowball program is directed to stdout
. This can be a handy way
-of checking that you have got the bracketing right in the program you have
-written.
-
-
-
-Any number of -include
options may be present, for example,
-
-
-
- snowball testfile -output test -ep danish_ \
- -include /home/martin/Snowball/codesets \
- -include extras
-
-
-
-Each -include
is followed by a directory name. With a chain of
-directories D1
, D2
... Dn
, a Snowball get
directive,
-
-
-[% highlight("
- get 'F'
-") %]
-
-
-causes F
to be searched for in the successive locations,
-
-
-
- F
- D1/F
- D2/F
- ...
- Dn/F
-
-
-
-— that is, the current directory, followed in turn by directories D1
to
-Dn
.
-
-
-The Snowball API
-
-
-To access Snowball from C, include the header api.h
, and any headers
-generated from the Snowball scripts you wish to use. api.h
declares
-
-
-[% highlight("
- struct SN_env { /* ... */ };
- extern void SN_set_current(struct SN_env * z, int size, char * s);
-", "c") %]
-
-
-Continuing the previous example, you set up an environment to call the
-resources of the Khotanese module with
-
-
-[% highlight("
- struct SN_env * z;
- z = Khotanese_create_env();
-", "c") %]
-
-
-Snowball has the concept of a ‘current string’.
-This can be set up by,
-
-
-[% highlight("
- SN_set_current(z, i, b);
-", "c") %]
-
-
-This defines the current string as the i
bytes of data starting at
-address b
. The externals can then be called,
-
-
-[% highlight("
- Khotanese_moderate(z);
- /* ... */
- Khotanese_stem_1(z);
-", "c") %]
-
-
-They give a 1 or 0 result, corresponding to the t or f result of
-the Snowball routine.
-
-
-
-And later,
-
-
-[% highlight("
- Khotanese_close_env(z);
-", "c") %]
-
-
-To release the space raised by z back to the system. You can do this for a
-number of Snowball modules at the same time: you will need a separate
-struct SN_env * z;
for each module.
-
-
-
-The current string is given by the z->l
bytes of data starting at z->p
.
-The string is not zero-terminated, but you can zero terminate it yourself with
-
-
-[% highlight("
- z->p[z->l] = 0;
-", "c") %]
-
-
-(There is always room for this last zero byte.) For example,
-
-
-[% highlight('
- SN_set_current(z, strlen(s), s);
- Khotanese_stem_1(z);
- z->p[z->l] = 0;
- printf("Khotanese-1' _ " stems '%s' to '%s'" _ '\n", s, z->p);
-', "c") %]
-
-
-The values of the other variables can be accessed via the #define
-settings that result from the -vprefix
option, although this should not
-usually be necessary:
-
-
-[% highlight('
- printf("p1 is %d\n", z->Khotanese_variable_p1);
-', "c") %]
-
-
-The stemming scripts on this Web site use Snowball very simply.
--vprefix
is left unset, and -eprefix
is set to the name of the
-script (usually the language the script is for).
-
-
-
-
-Debugging snowball scripts
-
-
-In the rare event that your Snowball script does not run perfectly the first time:
-
-
-
-Remember that the option -syntax
prints out the syntax tree. A question
-mark can be included in Snowball as a command, and it will generate a call
-debug(...)
. The defined debug
in runtime/utilities.c
(usually
-commented out) can then be used. It causes the
-current string to sent to stdout
, with square brackets marking the
-slice and vertical bar the position of c. Curly brackets mark the
-end-limits of the string, which may be less than the whole string because
-of the action of setlimit
.
-
-
-
-At present there is no way of reporting the value of an integer or boolean.
-
-
-
-If desperate, you can put debugging lines into the generated C program.
-You can pass -comments
to the snowball compiler to get it to
-generate comments showing the correspondence with the Snowball source which
-makes it easier to find where to add such debugging code.
-
-
-Compiler bugs
-
-
-If you hit a snowball compiler bug, try to
-capture it in a small script before notifying us.
-
-
-Known problems in Snowball
-
-
-The main one is that it is possible to ‘pull the rug from under your own feet’ in
-constructions like this:
-
-
-[% highlight('
- [ do something ]
- do something_else
- ( C1 delete C2 ) or ( C3 )
-') %]
-
-
-Suppose C1
gives t, the delete removes the slice established on the first
-line, and C2
gives f, so C3 is done with c set back to the value it had
-before C1
was obeyed — but this old value does not take account of the byte shift
-caused by the delete. This problem was foreseen from the beginning when designing
-Snowball, and recognised as a minor issue because it is an unnatural thing to want to
-do. (C3
should not be an alternative to something which has deletion as an
-occasional side-effect.) It may be addressed in the future.
-
-
-[% footer %]
diff --git a/texts/apostrophe.tt b/texts/apostrophe.tt
deleted file mode 100644
index 3948dce..0000000
--- a/texts/apostrophe.tt
+++ /dev/null
@@ -1,75 +0,0 @@
-[% header('The apostrophe character') %]
-
-
-Representing apostrophe is problematical for various reasons,
-
-
-
--
There are two Unicode characters for apostrophe, U+0027 (also ASCII hex
-27), and U+2019. Compare,
-
-
-
- Hamlet's father's ghost (U+0027)
- Hamlet’s father’s ghost (U+2019)
-
-
- -
Although conceptually different from an apostrophe, a single closing
-quote is also represented by character U+2019.
-
-
- -
Character U+0027 is used for apostrophe, single closing quote and
-single opening quote (U+2018).
-
-
- -
A fourth character, U+201B, like U+2018 but with the tail ‘rising’
-instead of ‘descending’, is also sometimes used as apostrophe (in the
-house style of certain publishers, for surnames like M’Coy and so on.)
-
-
-
-
-In the English stemming algorithm, it is assumed that apostrophe is
-represented by U+0027. This makes it ASCII compatible. Clearly other codes
-for apostrophe can be mapped to this code prior to stemming.
-
-
-
-In English orthography, apostrophe has one of three functions.
-
-
-
-It indicates a contraction in what is now accepted as a single word:
-o’clock, O’Reilly, M’Coy. Except in proper names such forms
-are rare: the apostrophe in Hallowe’en is disappearing, and in
-’bus has disappeared.
-
-
-It indicates a standard contraction with auxiliary or modal verbs:
-you’re, isn’t, we’d. There are about forty of these forms in
-contemporary English, and their use is increasing as they displace the full
-forms that were at one time used in formal documents. Although they can be
-reduced to word pairs, it is more convenient to treat them as single items
-(usually stopwords) in IR work. And then preserving the apostrophe is
-important, so that he’ll, she’ll, we’ll are not equated with
-hell, shell, well etc.
-
-
-It is used to form the ‘English genitive’, John's book, the horses’
-hooves etc. This is a development of (1), where historically the apostrophe
-stood for an elided e. (Similarly the printed form ’d for ed was
-very common before the nineteenth century.) Although in decline (witness pigs
-trotters, Girls School Trust), its use continues in contemporary
-English, where it is fiercely promoted as correct grammar, despite (or it might
-be closer to the truth to say because of) its complete semantic redundancy.
-
-
-
-
-For these reasons, the English stemmer treats apostrophe as a letter, removing
-it from the beginning of a word, where it might have stood for an opening
-quote, from the end of the word, where it might have stood for a closing quote,
-or been an apostrophe following s. The form ’s is also treated as an ending.
-
-
-[% footer %]
diff --git a/texts/earlyenglish.tt b/texts/earlyenglish.tt
deleted file mode 100644
index 827a911..0000000
--- a/texts/earlyenglish.tt
+++ /dev/null
@@ -1,135 +0,0 @@
-[% header('Stemming early English') %]
-
-Links to resources
-
-
-
-
-The question occasionally arises of how far the English (or earlier Porter)
-stemming algorithm can be adapted to handle older forms of the English
-language.
-
-
-
-Historically, English is usually divided into three periods of development,
-
-
-
-- Old English (or Anglo-Saxon), the language of Beowulf,
-
- Middle English, the language of Chaucer,
-
- Modern English, the language of Shakespeare, Dickens, and people today.
-
-
-
-Old English is so different from Modern English that it may be regarded as a
-distinct language.
-
-
-
-Middle English is problematical for a number of reasons. There is no standard
-spelling in the original texts, and the grammatical differences between Middle
-and Modern English prevent the spelling from being simply ‘modernised’. It is
-however possible to normalise the spelling according to some modern scheme, but
-again there is no standard modern scheme. Middle
-English itself had great regional variations, so that for example the
-English of Chaucer and his contemporary the Gawain poet (both late 14th century)
-are strikingly different. Finally, grammar was fluid even for one writer, so
-Chaucer might use they love or they loven, he
-sitteth or he sit.
-
-
-
-We may take Modern English to mean English which can be cast into a modern
-spelling form without too much damage being done to the original. From this
-point of view Shakespeare and the Authorised Version of the Bible are in Modern
-English. The ending structure of words in early Modern English differ from
-contemporary English in the est and eth endings of verbs in the present
-indicative,
-
-
--
- I bring
- thou bringest
- he bringeth
- we bring
- you bring
- they bring
-
-
-
-Both of these endings underwent rapid decline. The eth form occurs in
-Shakespeare, but is much rarer than the modern s form. The language of the
-Authorised Version,
-in which both forms abound,
-seemed archaic even on its first publication. Consequently
-the eth form survives now only in the language of the traditional Bible and
-Book of Common Prayer. The est form disappeared more slowly, as the use of
-thou became displaced by you in conversation.
-
-
-
-To put the endings into the
-Porter stemmer,
-the rules
-
-
--
-Step 1b
-
- (m>0) EED | | → | | EE
- |
(*v*) ED | | → | |
- |
(*v*) ING | | → | |
- |
-
-
-
-should be extended to
-
-
--
-Step 1b
-
- (m>0) EED | | → | | EE
- |
(*v*) ED | | → | |
- |
(*v*) ING | | → | |
- |
(*v*) EST | | → | |
- |
(*v*) ETH | | → | |
- |
-
-
-
-And to put the endings into the
-English stemmer,
-the list
-
-
--
-ed edly ing ingly
-
-of Step 1b should be extended to
--
-ed edly ing ingly est eth
-
-
-
-As far as the Snowball scripts are concerned, the endings 'est' 'eth'
must
-be added against ending 'ing'
.
-
-
-
-The inclusion of these endings does produce certain ‘side effects’. est is
-the ending of adjectival superlatives (greatest, unkindest), where it
-will also be removed. Words like brandreth, deforest will be mis-stemmed.
-Nevertheless, for the vocabulary of the Bible, the inclusion of these extra
-endings is not harmful (see
-this demonstration —
-for example, search for the text love in 1000 verses).
-
-[% footer %]
diff --git a/texts/glossary.tt b/texts/glossary.tt
deleted file mode 100644
index 0860abb..0000000
--- a/texts/glossary.tt
+++ /dev/null
@@ -1,106 +0,0 @@
-[% header('Glossary') %]
-
-
a-suffix
--
-
-An a-suffix, or attached suffix, is a particle word attached to another
-word. (In the stemming literature they sometimes get referred to as
-‘enclitics’.) In Italian, for example, personal pronouns attach to
-certain verb forms:
-
-
-
- mandargli = | | mandare + gli | | = | | to send + to him
- |
mandarglielo = | | mandare + gli + lo | | = | | to send + it + to him
- |
-
-
-a-suffixes appear in Italian and Spanish, and also in Portuguese, although
-in Portuguese they are separated by hyphen from the preceding word, which
-makes them easy to eliminate.
-
-
-
-i-suffix
--
-
-An i-suffix, or inflectional suffix, forms part of the basic grammar of a
-language, and is applicable to all words of a certain grammatical type,
-with perhaps a small number of exceptions. In English for example, the past
-of a verb is formed by adding ed. Certain modifications may be required
-in the stem:
-
-
-
- fit + ed | | → | | fitted (double t)
- |
love + ed | | → | | loved (drop the final e of love)
- |
-
-
-d-suffix
-
--
-
-A d-suffix, or derivational suffix, enables a new word, often with a
-different grammatical category, or with a different sense, to be built from
-another word. Whether a d-suffix can be attached is discovered not from
-the rules of grammar, but by referring to a dictionary. So in English,
-ness can be added to certain adjectives to form corresponding nouns
-(littleness, kindness, foolishness ...) but not to all adjectives (not for
-example, to big, cruel, wise ...) d-suffixes can be used to change
-meaning, often in rather exotic ways. So in italian astro means a sham
-form of something else:
-
-
-
- medico + astro | | = | | medicastro | | = | | quack doctor
- |
poeta + astro | | = | | poetastro | | = | | poetaster
- |
-
-
-Indo-European languages
-
--
-
-Most European and many Asian languages belong to the Indo-European language
-group. Historically, it includes the Latin, Greek, Persian and Sanskrit of
-the ancient world, and with the rise of the European empires, languages of
-this group are now dominant in the Americas, Australia and large parts of
-Africa. Indo-European languages are therefore the main languages of modern
-Western culture, and they are all similarly amenable to stemming.
-
-
-
-The Indo-European group has many recognisable sub-groups, for example
-Romance (Italian, French, Spanish ...), Slavonic (Russian, Polish,
-Czech ...), Celtic (Irish Gaelic, Scottish Gaelic, Welsh ...). The
-Germanic sub-group includes German and Dutch, and the Scandinavian
-languages are also usually classed as Germanic, although for convenience we
-have made a separate grouping of them on the Snowball site. English is also
-classed as Germanic, although it has been classed separately by us. This is
-not for reasons of narrow chauvinism, but because the suffix structure of
-English clearly lies mid-way between the Germanic and Romance groups, and it
-therefore requires separate treatment.
-
-
-
-Uralic languages
-
--
-
-The Uralic languages are spoken mainly in Northern Russia and Europe. They
-are divided into Samoyed, spoken mainly in the Siberian region, and
-Finno-Ugric, spoken mainly in Europe. Although the number of languages in
-the group is substantial, the total number of speakers is relatively small.
-The best known Uralic languages are perhaps Hungarian, Finnish and
-Estonian. Finnish and Estonian are in fact fairly similar. On the other
-hand Hungarian and Finnish are as different as are, say, French and Persian
-in the Indo-European group.
-
-
-
-Like the Indo-European languages, the Uralic languages are amenable to
-stemming.
-
-
-[% footer %]
diff --git a/texts/howtohelp.tt b/texts/howtohelp.tt
deleted file mode 100644
index 5231d72..0000000
--- a/texts/howtohelp.tt
+++ /dev/null
@@ -1,65 +0,0 @@
-[% header('Snowball: How You Can Help') %]
-
-
-For the work on this site there are two possible lines of development, one is
-Snowball itself — the language and compiler — and the other is the
-stemmers which are written in Snowball. At the moment it is the latter that
-is the real area of interest.
-
-
-
-It is useful to have suggestions about improvements to the existing
-stemmers, especially for the ones which are not English. However, the
-process of piecemeal improvement can be taken too far, and it is important
-in making these suggestions to recognise the inevitable limitations of
-accuracy of algorithmic stemmers. But more importantly: —
-
-
-
-Stemming algorithms have a well-understood place in IR (Information
-Retrieval), and as language-specific tools in an IR system, they have an
-extremely useful part to play. It is therefore something of a scandal that
-there are so very few stemming algorithms which are readily available, so
-if you want to make a contribution to Snowball, the best thing you can do
-is to create a good quality stemmer for a new language. This must
-include an algorithmic description of the stemmer, an implementation in
-Snowball, and a representative language vocabulary of about 30,000 words
-that can be used as part of a standard test.
-
-
-
-Alternatively, you might come up with the algorithm and be able to provide
-representative texts from which to derive the vocabulary, but hesitate
-about the Snowball implementation. If so, get in touch, and we might be
-able to complete the work collaboratively.
-
-
-
-We are also interested in:
-
-
-
-- Significant applications developed with the Snowball stemmers
-
-
- Stemmers held on other sites that derive from Snowball work
-
-
- Other useful stemming resources
-
-
-
-It may seem like stating the obvious, but if you do hit a technical
-problem, please, please send in a full notice of the system being used,
-the activity you were engaged on, and the errors that you encounter.
-
-
-
-Finally, if you want to contribute to this site, you must be prepared to
-release under the BSD license (i.e. to make your work free).
-
-
-
-Martin Porter
-Richard Boulton
-
-
-[% footer %]
diff --git a/texts/introduction.tt b/texts/introduction.tt
deleted file mode 100644
index 16b7727..0000000
--- a/texts/introduction.tt
+++ /dev/null
@@ -1,923 +0,0 @@
-[% header('Snowball: A language for stemming algorithms') %]
-
-Links
-
-
-
-
-M.F. Porter
-October 2001
-
-
-Summary
-
-
- Algorithmic stemmers continue to have great utility in IR, despite the
- promise of out-performance by dictionary-based stemmers. Nevertheless,
- there are few algorithmic descriptions of stemmers, and even when they
- exist they are liable to misinterpretation. Here we look at the ideas
- underlying stemming, and on this website define a language, Snowball,
- in which stemmers can be exactly defined, and from which fast stemmer
- programs in ANSI C or Java can be generated. A range of stemmers is presented
- in parallel algorithmic and Snowball form, including the original
- Porter stemmer for English.
-
-
-1 Introduction
-
-
-There are two main reasons for creating Snowball. One is the lack of
-readily available stemming algorithms for languages
-other than English. The other is the consciousness of a certain failure on
-my part in promoting exact implementations of the stemming
-algorithm described in (Porter 1980), which has come to be called the
-Porter stemming algorithm. The first point needs some qualification: a
-great deal of work has been done on stemmers in a wide range of natural
-languages, both in their development and evaluation, (a complete
-bibliography cannot be attempted here). But it is rare to see a stemmer
-laid out in an unambiguous algorithmic form from which encodings in C,
-Java, Perl etc might easily be made. When exact descriptions are
-attempted, it is often with approaches to stemming that are
-relatively simple, for example the Latin stemmer of Schinke (Schinke 1996),
-or the Slovene stemmer of Popovic (Popovic 1990). A more complex, and
-therefore more characteristic stemmer is the Kraaij-Pohlmann stemmer for
-Dutch (Kraaij 1994), which is presented as open source code in ANSI C. To
-extract an algorithmic description of their stemmer from the source code
-proves to be quite hard.
-
-
-
-The disparity between the Porter stemmer definition and many of its
-purported implementations is much wider than is generally realised in the
-IR community. Three problems seem to compound: one is a misunderstanding
-of the meaning of the original algorithm, another is bugs in the
-encodings, and a third is the almost irresistible urge of programmers
-to add improvements.
-
-
-
-For example, a Perl script advertised on the Web as an
-implementation of the Porter algorithm was tested in October 2001, and it was
-found that 14 percent of words were stemmed incorrectly when given a large sample
-vocabulary. Most words of English have
-very simple endings, so this means that it was effectively getting everything
-wrong. At certain points on the Web are demonstrations of the Porter stemmer.
-You type some English into a box and the stemmed words are displayed. These
-are frequently faulty. (A good test is to type in agreement. It should stem
-to agreement — the same word. If it stems to agreem there is an
-error.) Researchers frequently pick up faulty versions of the stemmer and
-report that they have applied ‘Porter stemming’, with the result that their
-experiments are not quite repeatable. Researchers who work on stemming will
-sometimes give incorrect examples of the behaviour of the Porter stemmer in
-their published works.
-
-
-
-To address all these problems I have tried to develop a rigorous system
-for defining stemming algorithms. A language, Snowball, has been invented,
-in which the rules of stemming algorithms can be expressed in a natural
-way. Snowball is quite small, and can be learned by an experienced
-programmer in an hour or so. On this website a number of foreign language
-stemmers is presented (a) in Snowball, and (b) in a less formal
-English-language description. (b) can be thought of as the program
-comments for (a). A Snowball compiler translates each Snowball
-definition into (c) an equivalent program in ANSI C or Java. Finally (d)
-standard vocabularies of words and their stemmed equivalents are provided
-for each stemmer. The combination of (a), (b), (c) and (d)
-can be used to pin down the definition of a stemmer exactly, and it is
-hoped that Snowball itself will be a useful resource in creating stemmers
-in the future.
-
-
-2 Some ideas underlying stemming
-
-
-Work in stemming has produced a number of different approaches, albeit tied
-together by a number of common assumptions. It is worthwhile looking at some
-of them to see exactly where Snowball fits into the whole picture.
-
-
-
-A point tacitly assumed in almost all of the stemming literature is that
-stemmers are based upon the written, and not the spoken, form of the
-language. This is also the assumption here. Historically,
-grammarians often regarded the written language as the real language and
-the spoken as a mere derivative form. Almost in reaction, many modern
-linguists have taken a precisely opposite view (Palmer, 1965 pp 2-3). A
-more balanced position is that the two languages are distinct though
-connected, and require separate treatment. One can in fact imagine parallel
-stemming algorithms for the spoken language, or rather for the phoneme
-sequence into which the spoken language is transformed. Stress and
-intonation could be used as clues for an indexing process in the same way
-that punctuation and capitalisation are used as clues in the written
-language. But currently stemmers work on the written language for the good
-reason that there is so much of it available in machine readable form from
-which to build our IR systems. Inevitably therefore the stemmers get
-caught up in accidental details of orthography. In English, removing the
-ing from rotting should be followed by undoubling the tt,
-whereas in rolling we do not undouble the ll. In French, removing
-the er from ennuyer should be followed by changing the y to
-i, so that the resulting word conflates with ennui, and so on.
-
-
-
-The idea of stemming is to improve IR performance generally by bringing
-under one heading variant forms of a word which share a common meaning.
-Harman (1991) was first to present compelling evidence that it may not do
-so, when her experiments discovered no significant improvement with the
-use of stemming.
-Similarly Lennon (1981) discovered no appreciable difference between different
-stemmers running on a constant collection.
-Later work has modified this position however. Krovetz
-(1995) found significant, although sometimes small, improvements across a
-range of test collections. What he did discover is that the degree of
-improvement varies considerably between different collections.
-These tests were however done on collections in
-English, and the reasonable assumption of IR researchers has always been that for
-languages that are more highly inflected than English (and nearly all
-are), greater improvements will be observed when stemming is applied. My
-own view is that stemming helps regularise the
-vocabulary of an IR system, and this leads to advantages that are not
-easily quantifiable through standard IR experiments. For example, it helps
-in presenting lists of terms associated with the query back to the IR user
-in a relevance feedback cycle, which is one of the underlying ideas of the
-probabilistic model. More will be said on the use of a stemmed vocabulary
-in section 5.
-
-
-
-Stemming is not a concept applicable to all languages. It is not, for
-example, applicable in Chinese. But to languages of the Indo-European (*)
-group (and most of the stemmers on this site are for Indo-European
-languages), a common
-pattern of word structure does emerge. Assuming words are written left to
-right, the stem, or root of a word is on the left, and zero or more
-suffixes may be added on the right. If the root is modified by this
-process it will normally be at its right hand end. And also prefixes may
-be added on the left. So unhappiness has a prefix un, a suffix
-ness, and the y of happy has become i with the addition of
-the suffix. Usually, prefixes alter meaning radically, so they are best
-left in place (German and Dutch ge is an exception here). But suffixes
-can, in certain circumstances, be removed. So for example happy and
-happiness have closely related meanings, and we may wish to stem both
-forms to happy, or happi. Infixes can occur, although rarely:
-ge in German and Dutch, and zu in German.
-
-
-
-One can make some distinction between root and stem. Lovins (1968)
-sees the root as the stem minus any prefixes. But here we will
-think of the stem as the residue of the stemming process, and the root as the
-inner word from which the stemmed word derives, so we think of root to
-some extent in an etymological way. It must be admitted that when you
-start thinking hard about these concepts root, stem, suffix,
-prefix ... they turn out to be very difficult indeed to define.
-Nor do definitions, even if we arrive at them, help us much. After all, suffix
-stripping is a practical aid in IR, not an exercise in linguistics or
-etymology. This is especially true of the central concept of root. We
-think of the etymological root of a word as something we can discover with
-certainty from a dictionary, forgetting that etymology itself is a subject
-with its own doubts and controversies (Jesperson 1922, Chapter XVI).
-Indeed, Jesperson goes so far as to say that
-
-
-
-
- ‘It is of course impossible to say how great a proportion of the
- etymologies given in dictionaries should strictly be classed under
- each of the following heads: (1) certain, (2) probable, (3)
- possible, (4) improbable, (5) impossible — but I am afraid the
- first two classes would be the least numerous.’
-
-
-
-
-Here we will simply assume a common sense understanding of
-the basic idea of stem and suffix, and hope that this proves sufficient
-for designing and discussing stemming algorithms.
-
-
-
-We can separate suffixes out into three basic classes, which will be
-called d-, i- and a-suffixes.
-
-
-
-An a-suffix, or attached suffix, is a particle word attached to another
-word. (In the stemming literature they sometimes get referred to as
-‘enclitics’.) In Italian, for example, personal pronouns attach to
-certain verb forms:
-
-
-
- mandargli = | | mandare + gli | | = | | to send + to him
- |
mandarglielo = | | mandare + gli + lo | | = | | to send + it + to him
- |
-
-
-a-suffixes appear in Italian and Spanish, and also in Portuguese, although
-in Portuguese they are separated by hyphen from the preceding word, which
-makes them easy to eliminate.
-
-
-
-An i-suffix, or inflectional suffix, forms part of the basic grammar of a
-language, and is applicable to all words of a certain grammatical type,
-with perhaps a small number of exceptions. In English for example, the past
-of a verb is formed by adding ed. Certain modifications may be required
-in the stem:
-
-
-
- fit + ed | | → | | fitted (double t)
- |
love + ed | | → | | loved (drop the final e of love)
- |
-
-
-but otherwise the rule applies in a regular way to all verbs in
-contemporary English, with about 150 (Palmer, 1965) exceptional forms,
-
-
-
- bear | | beat | | become | | begin | | bend | | ....
- |
bore | | beat | | became | | began | | bent
- |
-
-
-A d-suffix, or derivational suffix, enables a new word, often with a
-different grammatical category, or with a different sense, to be built from
-another word. Whether a d-suffix can be attached is discovered not from
-the rules of grammar, but by referring to a dictionary. So in English,
-ness can be added to certain adjectives to form corresponding nouns
-(littleness, kindness, foolishness ...) but not to all adjectives (not for
-example, to big, cruel, wise ...) d-suffixes can be used to change
-meaning, often in rather exotic ways. So in Italian astro means a sham
-form of something else:
-
-
-
- medico + astro | | = | | medicastro | | = | | quack doctor
- |
poeta + astro | | = | | poetastro | | = | | poetaster
- |
-
-
-Generally i-suffixes follow d-suffixes. i-suffixes can precede d-suffixes,
-for example lovingly, devotedness, but such cases are exceptional. To
-be a little more precise, d-suffixes can sometimes be added to
-participles. devoted, used adjectivally, is a participle derived from the
-verb devote, and ly can be added to turn the adjective into an adverb,
-or ness to turn it into a noun. The same feature occurs in other
-Indo-European languages.
-
-
-
-Sometimes it is hard to say whether a suffix is a d-suffix or i-suffix,
-the comparative and superlative endings er, est of English for example.
-
-
-
-A d-suffix can serve more than one function. In English, for example,
-ly standardly turns an adjective into an adverb (greatly), but it
-can also turn a noun into an adjective (kingly). In French, ement
-also standardly turns an adjective into an adverb (grandement), but it
-can also turn a verb into a noun (rapprochement). (Referring to the
-French stemmer, this double use is ultimately why ement is tested for
-being in the RV rather than the R2 region of the word being
-stemmed.)
-
-
-
-It is quite common for an i-suffix to serve more than one function.
-In English, s can either be (1) a verb ending attached to third person
-singular forms (runs, sings), (2) a noun ending indicating the plural
-(dogs, cats) or (3) a noun ending indicating the possessive
-(boy’s, girls’). By an orthographic convention now several hundred
-years old, the possessive is written with an apostrophe, but
-nowadays this is
-frequently omitted in familiar phrases (a girls school). (Usage (3) is
-relatively rare compared with (1) and (2): there are only nine uses of
-’s in this document.)
-
-
-
-Since the normal order of suffixes is d, i and a, we
-can expect them to be removed
-from the right in the order a, i and d. Usually we want to remove
-all a- and i-suffixes, and some of the d-suffixes.
-
-
-
-If the stemming process reduces two words to the same stem, they are said
-to be conflated.
-
-
-3 Stemming errors, and the use of dictionaries
-
-
-One way of thinking of the relation between terms and documents in an IR
-system is to see the documents as being about concepts, and the terms as
-words that describe the concepts. Then, of course, one word can cover many
-concepts, so pound can mean a unit of currency, a weight, an enclosure,
-or a beating. Pound is a homonym. And one concept can be described by
-many words, as with money, capital, cash, currency. These words
-are synonyms. There is a many-many mapping therefore between the set of
-terms and the set of concepts. Stemming is a process that transforms this
-mapping to advantage, on the whole reducing the number of synonyms, but
-occasionally creating new homonyms. It is worth remembering that what are
-called stemming errors are usually just the introduction of new homonyms into
-vocabularies that already contain very large numbers of homonyms.
-
-
-
-Words which have no place in this term-concept mapping are those which
-describe no concepts. The particle words of grammar, the, of,
-and
-..., known in IR as stopwords, fall into this category. Stopwords can be
-useful for retrieval but only in searching for phrases, ‘to be or not to
-be’, ‘do as you would be done by’ etc. This suggests that stemming
-stopwords is not useful. More will be said on stopwords in section 7.
-
-
-
-In the literature, a distinction is often made between
-under-stemming, which is the error of taking off too small a suffix, and
-over-stemming, which is the error of taking off too much. In French, for
-example, croûtons is the plural of croûton, ‘a crust’, so to remove
-ons would be over-stemming, while croulons is a verb form of crouler,
-‘to totter’, so to remove s would be under-stemming. We would like to
-introduce a further distinction between mis-stemming and over-stemming.
-Mis-stemming is taking off what looks like an ending, but is really part
-of the stem. Over-stemming is taking off a true ending which results in
-the conflation of words of different meanings.
-
-
-
-So for example ly can be removed from cheaply, but not from reply,
-because in reply ly is not a suffix. If it was removed, reply would
-conflate with rep, (the commonly used short form of representative).
-Here we have a case of mis-stemming.
-
-
-
-To illustrate over-stemming, look at these four words,
-
-
-
- | | verb | | adjective
-
- |
First pair: | | prove | | provable
- |
Second pair: | | probe | | probable
- |
-
-
-Morphologically, the two pairs are exactly parallel (in the written, if not
-the spoken language). They also have a common etymology. All four words
-derive from the Latin probare, ‘to prove or to test’, and the idea of
-testing connects the meanings of the words. But the meanings are not parallel.
-provable means ‘able to be proved’; probable does not mean ‘able to be
-probed’. Most people would judge conflation of the first pair as correct,
-and of the second pair, incorrect. In other words, to remove able from
-probable is a case of over-stemming.
-
-
-
-We can try to avoid mis-stemming and over-stemming by using a dictionary.
-The dictionary can tell us that reply does not derive from rep, and
-that the meanings of probe and probable are well separated in modern
-English. It is important to realise however that a dictionary does not give
-a complete solution here, but can be a tool to improve the conflation
-process.
-
-
-
-In Krovetz’s dictionary experiments (Krovetz 1995), he noted that in
-looking up a past participle like suited, one is led either to suit or
-to suite as plausible infinitive forms. suite can be rejected,
-however, because the dictionary tells us that
-although it is a word of English
-it is not a verb form. Cases
-like this (and Krovetz found about 60) had to be treated as exceptions. But
-the form routed could
-either derive from the verb rout or the verb route:
-
-
-
- At Waterloo Napoleon’s forces were routed
- The cars were routed off the motorway
-
-
-
-Such cases in English are extremely rare, but they are commoner in more
-highly inflected languages. In French for example, affiliez can either be
-the verb affiler, to sharpen, with imperfect ending iez, or the verb
-affilier, to affiliate, with present indicative ending ez:
-
-
-
- vous affiliez | | = | | vous affil-iez | | = | | you sharpened
- |
vous affiliez | | = | | vous affili-ez | | = | | you affiliate
- |
-
-
-If the second is intended, removal of iez is mis-stemming.
-
-
-
-With over-stemming we must rely upon the dictionary to separate meanings.
-There are different ways of doing this, but all involve some degree of
-reliance upon the lexicographers. Krovetz’s methods are no doubt best,
-because the most objective: he uses several measures, but they are based on
-the idea of measuring the similarity in
-meaning of two words by the degree of overlap among the words used to define
-them, and this is at a good remove from a lexicographer’s subjective
-judgement about semantic similarity.
-
-
-
-There is an interesting difference between mis-stemming and over-stemming
-to do with language history. The morphology of a language changes less
-rapidly than the meanings of the words in it. When extended to include a
-few archaic endings, such as ick as an alternative to ic, a stemmer for
-contemporary English can be applied to the English of 300 years ago.
-Mis-stemmings will be roughly the same, but the pattern of over-stemming will
-be different because of the changing meaning of words in the language. For
-example, relativity in the 19th century merely meant ‘the condition of
-being relative to’. With that meaning, it is acceptable to conflate it
-with relative.
-But with the 20th century meaning brought to it by
-Einstein, stemming to relativ is over-stemming.
-Here we see the word with the suffix changing its meaning, but it can happen
-the other way round. transpire has come to mean ‘happen’, and its old
-meaning of ’exhalation’ or ‘breathing out’ is now effectively lost.
-(That is the bitter reality, although dictionaries still try to persuade us
-otherwise). But transpiration still carries the earlier meaning.
-So what was formerly an acceptable stemming may be judged now as
-an over-stemming, not because the word being stemmed has changed its meaning,
-but because some cognate word has changed its meaning.
-
-
-
-In these examples we are presenting words as if they had single meanings, but
-the true picture is more complicated. Krovetz uses a model of word
-meanings which is extremely helpful here. He makes a distinction between
-homonyms and polysemes. The meaning of homonyms are quite unrelated.
-For example, ground in the sense of ‘earth’, and ‘ground’ as the past
-participle of ‘grind’ are homonyms. Etymologically homonyms have different
-stories, and they usually have separate entries in a dictionary. But each
-homonym form can have a range of polysemic forms, corresponding to different
-shades of meaning. So ground can mean the earth’s surface, or the bottom
-of the sea, or soil, or any base, and so the basis of an argument, and so on.
-Over time new polysemes appear and old ones die. At any moment, the use of a
-word will be common in some polysemic forms and rare in others. If a suffix is
-attached to a word the new word will get a different set of polysemes. For
-example, grounds = ground + s acquires the sense of ‘dregs’ and
-‘estate lands’, loses the sense of ‘earth’, and shares the sense of
-‘basis’.
-
-
-
-Consider the conflation of mobility with mobile. mobile has
-acquired two new polysemes not shared with mobility. One is the ‘mobile
-art object’, common in the nursery. This arrived in the 1960s, and is
-still in use. The other is the ‘mobile phone’ which is now very dominant,
-although it may decline in the future when it has been replaced by some new
-gadget with a different name. We might draw a graph of the degree of
-separation of the meanings of mobility and mobile against time,
-which would depend upon the number of polysemes and the intensity of their
-use. What seemed like a valid conflation of the two words in 1940 may seem
-to be invalid today.
-
-
-
-In general therefore one can say that judgements about whether words are
-over-stemmed change with time as the meanings of words in the language
-change.
-
-
-
-The use of a dictionary should reduce errors of mis-stemming and errors of
-over-stemming. And, for English at least, the mis-stemming errors should
-reduce well, even if there are problems with over-stemming errors. Of
-course, it depends on the quality of the dictionary. A dictionary will need
-to be very comprehensive, fully up-to-date, and with good word definitions
-to achieve the best results.
-
-
-
-Historically, stemmers have often been thought of as either
-dictionary-based or algorithmic. The presentation of studies of stemming
-in the literature has perhaps helped to create this division. In the
-Lovins’ stemmer the algorithmic description is central. In accounts of
-dictionary-based stemmers the emphasis tends to be on dictionary content
-and structure, and IR effectiveness. Savoy’s French stemmer (Savoy, 1993)
-is a good example of this. But the two approaches are not really distinct.
-An algorithmic stemmer can include long exception lists that are
-effectively mini-dictionaries, and a dictionary-based stemmer usually
-needs a process for removing at least i-suffixes to make the look-up
-in the dictionary possible. In fact in a language in which proper names
-are inflected (Latin, Finnish, Russian ...), a dictionary-based stemmer
-will need to remove i-suffixes independently of dictionary look-up,
-because the proper names will not of course be in the dictionary.
-
-
-
-The stemmers available on the Snowball website are all purely
-algorithmic. They can be extended to include built-in exception lists, they
-could be used in combination with a full dictionary, but they are still
-presented here in their simplest possible form. Being purely algorithmic,
-they are, or ought to be, inferior to the performance of well-constructed
-dictionary-based stemmers. But they are still very useful, for the
-following reasons:
-
-
-
-Algorithmic stemmers are (or can be made) very lean and very fast. The
-stemmers presented here generate code that will process about a million
-words in six seconds on a conventional 500MHz PC. Nowadays we can generate
-very large IR systems with quite modest resources, and tools that assist in
-this have value.
-
-
-Despite the errors they can be seen to make, algorithmic stemmers still
-give good practical results. As Krovetz (1995) says in surprise of the
-algorithmic stemmer, ‘Why does it do so well?’ (page 89).
-
-
-Dictionary-based stemmers require dictionary maintenance, to keep up
-with an ever-changing language, and this is actually quite a problem. It
-is not just that a dictionary created to assist stemming today will
-probably require major updating in a few years time, but that a dictionary
-in use for this purpose today may already be several years out of date.
-
-
-
-
-We can hazard an answer to Krovetz’s question, as to why algorithmic
-stemmers perform as well as they do, when they reveal so many cases of
-under-, over- and mis-stemming. Under-stemming is a fault, but by itself
-it will not degrade the performance of an IR system. Because of
-under-stemming words may fail
-to conflate that ought to have conflated, but you are, in a sense, no
-worse off than you were before. Mis-stemming is more serious, but again
-mis-stemming does not really matter unless it leads to false conflations,
-and that frequently does not happen. For example, removing the ate
-ending in English, can result in useful conflations (luxury,
-luxuriate; affection, affectionate), but very often produces
-stems that are not English words
-(enerv-ate, accommod-ate,
-deliber-ate etc). In the literature, these are normally
-classed as stemming errors — overstemming — although in our nomenclature
-they are examples of mis-stemming.
-However these residual stems,
-enerv, accommod,
-deliber ... do not conflate with other word forms, and so behave in
-an IR system in the same way as if they still retained their ate
-ending. No false conflations arise, and so there is no over-stemming here.
-
-
-
-To summarise, one can say that just as a word can be over-stemmed
-but not mis-stemmed (relativity → relative), so it can be
-mis-stemmed but not over-stemmed (enervate → enerv). And, of
-course, even over-stemming does not matter, if the over-stemmed word falsely
-conflates with other words that exist in the language, but are not
-encountered in the IR
-system which is being used.
-
-
-
-Of the three types of error,
-over-stemming is the most important, and
-using a dictionary does not eliminate all over-stemmings, but does reduce their
-incidence.
-
-
-4 Stemming as part of an indexing process
-
-
-Stemming is part of a composite process of extracting words from text and
-turning them into index terms in an IR system. Because stemming is somewhat
-complex and specialised, it is usually studied in isolation. Even so, it
-cannot really be separated from other aspect of the indexing process:
-
-
-
-What is a word? For indexing purposes, a word in a European language is
-a sequence of letters bounded by non-letters. But in English, an internal
-apostrophe does not split a word, although it is not classed as a letter.
-The treatment of these word boundary characters affects the stemmer. For
-example, the Kraaij Pohlmann stemmer for Dutch (Kraaij, 1994, 1995) removes hyphen and
-treats apostrophe as part of the alphabet (so ’s, ’tje and ’je are three
-of their endings). The Dutch stemmer presented here assumes hyphen and
-apostrophe have already been removed from the word to be stemmed.
-
-
-What is a letter? Clearly letters define words, but different languages
-use different letters, much confusion coming from the varied use of
-accented Roman letters.
-
-
-
-English speakers, perhaps influenced by the ASCII character set, typically regard
-their alphabet of a to z as the norm, and other forms (for example, Danish
-å and ø, or German ß) as somewhat abnormal. But this is
-an insular point of view. In Italian, for example, the letters
-j, k, w, x and y are not part of the alphabet, and are
-only seen in foreign words. We also tend to regard other alphabets as only
-used for isolated languages, and that is not strictly true. Cyrillic is
-used for a range of languages other than Russian, among which additional
-letters and accented forms abound.
-
-
-
-In English, a broad definition of letter would be anything that could be
-accepted as a pronounceable element of a word. This would include
-accented Roman letters (naïve, Fauré), and certain ligature
-forms (encyclopædia). It would exclude letters
-of foreign alphabets, such as Greek and Cyrillic.
-The a to z alphabet is one of those where letters come in
-two styles, upper and lower case, which historically correspond (very roughly) to the
-shapes you get if you use a chisel or a pen. Across all languages, the
-exact relation of upper to lower case is not so easy to define. In Italian,
-for example, an accented lower case letter is sometimes represented in
-upper case by the unaccented letter followed by an apostrophe. (I have
-seen this convention used in modern Italian news stories in machine
-readable form.)
-
-
-
-In fact the Porter stemmer (which is for English) assumes the word being stemmed is
-unaccented and in lower case. More exactly, a, e, i, o,
-u,
-and sometimes y, are
-treated as vowels, and any other character gets treated as a consonant.
-Each stemmer presented here assumes some degree of normalisation before it
-receives the word, which is roughly (a) put all letters into lower case,
-and (b) remove accents from letter-accent combinations that do not form
-part of the alphabet of the language. Each stemmer declares the
-letter-accent combinations for its language, and this can be used as a
-guide for the normalisation, but even so, we can see from
-the discussion above that (a) and (b) are not trivial
-operations, and need to be done with care.
-
-
-
-(Incidentally, because the stemmers work on lower case words, turning
-letters to upper case is sometimes used internally for flagging purposes.)
-
-
-Identifying stopwords. Invariant stopwords are more easily found before
-stemming is applied, but inflecting stopwords (for example, German kein, keine, keinem,
-keinen ... ) may be easier to find after — because there are fewer forms.
-There is a case for building stopword identification into the stemming
-process. See section 7.
-
-
-Conflating irregular forms. More will be said on this in section 6.
-
-
-
-5 The use of stemmed words
-
-
-The idea of how stemmed words might be employed in an IR system has
-evolved slightly over the years. The Lovins stemmer (Lovins 1968) was
-developed not for indexing document texts, but the subject terms attached
-to them. With queries stemmed in the same way, the user needed no special
-knowledge of the form of the subject terms. Rijsbergen (1979, Chapter 2)
-assumes document text analysis: stopwords are removed, the remaining words
-are stemmed, and the resulting set of stemmed word constitute the IR index
-(and this style of use is widespread today). More flexibility however is
-obtained by indexing all words in a text in an unstemmed form, and
-keeping a separate two-column relation which connects the words to their
-stemmed equivalents. The relation can be denoted by R(s, w), which means
-that s is the stemmed form of word w. From the relation we can get, for
-any word w, its unique stemmed form, stem(w), and for any stem s, the set
-of words, words(s), that stem to s.
-
-
-
-The user should not have to see the stemmed form of a word. If a list of
-stems is to be presented back for query expansion, in place of
-a stem, s, the user should be shown a single representative from the set
-words(s), the one of highest frequency perhaps. The user should also
-be able to choose for the whole query, or at a lower level for each word
-in a query, whether or not it should be stemmed. In the absence of such
-choices, the system can make its own
-decisions.
-Perhaps single word queries would not undergo
-stemming; long queries would; stopwords would be removed
-except in phrases. In query expansion, the system would work with stemmed
-forms, ignoring stopwords.
-
-
-
-Query expansion with stemming results in a much cleaner vocabulary list
-than without, and this is a main strength of using a stemming process.
-
-
-
-A question arises: if the user never sees the stemmed form, does its
-appearance matter? The answer must be no, although
-the Porter stemmer tries to make the unstemmed forms guessable from the stemmed
-forms. For example, from appropri you can guess appropriate. At least,
-trying to achieve this effect acts as a useful control. Similarly with the
-other stemmers presented here, an attempt has been made to keep the
-appearance of the stemmed forms as familiar as possible.
-
-
-6 Irregular grammatical forms
-
-
-All languages contain irregularities, but to what extent should they be
-accommodated in a stemming algorithm? An English stemmer, for example, can
-convert regular plurals to singular form without difficulty (boys, girls,
-hands ...). Should it do the same with irregular plurals (men, children,
-feet, ...)? Here we have irregular cases with i-suffixes, but there are
-irregularities with d-suffixes, which Lovins calls ‘spelling exceptions’.
-absorb/absorption and conceive/conception are examples of this.
-Etymologically, the explanation of the first is that the Latin root,
-sorbere, is an irregular verb, and of the second that the word
-conceive comes to us from the French rather than straight from the Latin.
-It is interesting that, even with no knowledge of the etymology, we do
-recognise the connection between the words.
-
-
-
-Lovins tries to solve spelling exceptions by formulating general respelling
-rules (turn rpt into rb for example), but it might be easier to have
-simply a list of exceptional stems.
-
-
-
-The Porter stemmer does not handle irregularities at all, but from the
-author’s own experience, this has never been an area of complaint.
-Complaints in fact are always about false conflations, for example new
-and news.
-
-
-
-Possibly Lovins was right in wanting to resolve d-suffix irregularities,
-and not being concerned about i-suffix irregularities. i-suffix
-irregularities in English go with short, old words, that are either in very
-common use (man/men, woman/women, see/saw ...) or are used only rarely
-(ox/oxen, louse/lice, forsake/forsook ...). The latter class can be
-ignored, and the former has its own problems which are not always solved
-by stemming. For example man is a verb, and saw can mean a cutting
-instrument, or, as a verb, can mean to use such an instrument. Conflation
-of these forms frequently leads to an error like mis-stemming therefore.
-
-
-
-An algorithmic stemmer really needs holes where the irregular forms can be
-plugged in as necessary. This is more serviceable than attempting to
-embed special lists of these irregular forms into software.
-
-
-7 Stopwords
-
-
-We have suggested that stemming stopwords is not useful. There is a
-grammatical connection between being and be, but conflation of the two
-forms has little use in IR because they have no shared meaning that would
-entitle us to think of them as synonyms. being and be have a
-morphological connection as well, but that is not true of am and was,
-although they have a grammatical connection. Generally speaking,
-inflectional stopwords exhibit many irregularities, which means that
-stemming is not only not useful, but not possible, unless one builds into
-the stemmer tables of exceptions.
-
-
-
-Switching from English to French, consider être, the equivalent form
-of be. It has about 40 different forms, including,
-
-
--
- suis es sommes serez étaient fus furent sois été
-
-
-
-(and suis incidentally is a homonym, as part of the verb suivre.)
-Passing all forms through a rule-based stemmer creates something of a
-mess. An alternative approach is to recognise this group of words, and
-other groups, and take special action. The recognition could take place
-inside the stemmer, or be done before the stemmer is called. One special
-action would be to stem (perhaps one should say ‘map’) all the forms to a
-standard form, ETRE, to indicate that they are parts of the verb être.
-Deciding what to do with the term ETRE, and it would probably be to
-discard it, would be done outside the stemming process. Another special
-action would be to recognize a whole class of stopwords and simply discard
-them.
-
-
-
-The strategy adopted will depend upon the underlying IR model, so what one
-needs is the flexibility to create modified forms of a standard stemmer.
-Usually we present Snowball stemmers in their unadorned form. Thereafter,
-the addition of stopword tables is quite easy.
-
-
-8 Rare forms
-
-
-Stemmers do not need to handle linguistic forms that turn up only very
-rarely, but in practice it is hard to design a stemmer with all rare forms
-eliminated without there appearing to be some gaps in the thinking. For
-this reason one should not worry too much about their occasional presence.
-For example, in contemporary Portuguese, use of the second person plural
-form of verbs has almost completely disappeared. Even so, endings for
-those forms are included in the Portuguese stemmer. They appear in all the
-grammar books, and will in any case be found in older texts. The habit of
-putting in rare forms to ‘complete the picture’ is well established, and
-usually passes unnoticed. An example is the list of English stopwords in
-van Rijsbergen (1979). This includes yourselves, by analogy with
-himself, herself etc., although yourselves is actually quite a rare
-word in English.
-
-
-References
-
-
-Farber DJ, Griswold RE and Polonsky IP (1964) SNOBOL, a string manipulation
-language. Journal of the Association for Computing Machinery, 11: 21-30.
-
-
-
-Griswold RE, Poage JF and Polonsky IP (1968) The SNOBOL4 programming
-language. Prentice-Hall, New Jersey.
-
-
-
-Harman D (1991) How effective is suffixing? Journal of the American
-Society for Information Science, 42: 7-15.
-
-
-
-Jesperson O (1921) Language, its nature, origin and development. George
-Allen & Unwin, London.
-
-
-
-Kraaij W and Pohlmann R. (1994) Porter’s stemming algorithm for Dutch. In
-Noordman LGM and de Vroomen WAM, eds. Informatiewetenschap 1994:
-Wetenschappelijke bijdragen aan de derde STINFON Conferentie, Tilburg,
-1994. pp. 167-180.
-
-
-
-Kraaij W and Pohlmann R (1995) Evaluation of a Dutch stemming algorithm.
-Rowley J, ed. The New Review of Document and Text Management, volume 1,
-Taylor Graham, London, 1995. pp. 25-43,
-
-
-
-Krovetz B (1995) Word sense disambiguation for large text databases. PhD
-Thesis. Department of Computer Science, University of Massachusetts
-Amherst.
-
-
-
-Lennon M, Pierce DS, Tarry BD and Willett P (1981) An evaluation of some
-conflation algorithms for information retrieval. Journal of Information
-Science, 3: 177-183.
-
-
-
-Lovins JB (1968) Development of a stemming algorithm. Mechanical
-Translation and Computational Linguistics, 11: 22-31.
-
-
-
-Palmer FR (1965) A linguistic study of the English verb. Longmans, London.
-
-
-
-Popovic M and Willett P (1990) Processing of documents and queries in a
-Slovene language free text retrieval system. Literary and Linguistic
-Computing, 5: 182-190.
-
-
-
-Porter MF (1980) An algorithm for suffix stripping. Program, 14: 130-137.
-
-
-
-Rijsbergen CJ (1979) Information retrieval. Second edition. Butterworths,
-London.
-
-
-
-Savoy J (1993) Stemming of French words based on grammatical categories.
-Journal of the American Society for Information Science, 44: 1-9.
-
-
-
-Schinke R, Greengrass M, Robertson AM and Willett P (1996) A stemming
-algorithm for Latin text databases. Journal of Documentation, 52:
-172-187.
-
-
-[% footer %]
diff --git a/texts/r1r2.tt b/texts/r1r2.tt
deleted file mode 100644
index 6b2df04..0000000
--- a/texts/r1r2.tt
+++ /dev/null
@@ -1,78 +0,0 @@
-[% header('Defining R1 and R2') %]
-
-
-Most of the stemmers make use of at least one of the region definitions R1 and
-R2. They are defined as follows:
-
-
-
-R1 is the region after the first non-vowel following a vowel, or is the null
-region at the end of the word if there is no such non-vowel.
-
-
-
-R2 is the region after the first non-vowel following a vowel in R1, or is
-the null region at the end of the word if there is no such non-vowel.
-
-
-
-The definition of vowel varies from language to language. In French, for
-example, é is a vowel, and in Italian i between two other vowels is not a
-vowel. The class of letters that constitute vowels is made clear in each stemmer.
-
-
-
-Below, R1 and R2 are shown for a number of English words,
-
-
-
- b e a u t i f u l
- |<------------->| R1
- |<----->| R2
-
-
-
-Letter t is the first non-vowel following a vowel in beautiful, so R1
-is iful. In iful, the letter f is the first non-vowel following a
-vowel, so R2 is ul.
-
-
-
- b e a u t y
- |<->| R1
- ->|<- R2
-
-
-
-In beauty, the last letter y is classed as a vowel. Again, letter t is
-the first non-vowel following a vowel, so R1 is just the last letter, y.
-R1 contains no non-vowel, so R2 is the null region at the end of the word.
-
-
-
- b e a u
- ->|<- R1
- ->|<- R2
-
-In beau, R1 and R2 are both null.
-
-
-
-Other examples:
-
-
-
- a n i m a d v e r s i o n
- |<----------------------------------------->| R1
- |<--------------------------------->| R2
-
- s p r i n k l e d
- |<------------->| R1
- ->|<- R2
-
- e u c h a r i s t
- |<--------------------->| R1
- |<--------->| R2
-
-
-[% footer %]
diff --git a/texts/vowelmarking.tt b/texts/vowelmarking.tt
deleted file mode 100644
index 13a24d2..0000000
--- a/texts/vowelmarking.tt
+++ /dev/null
@@ -1,74 +0,0 @@
-[% header('Marking vowels as consonants') %]
-
-
-Some of the algorithms begin with a step which puts letters which are
-normally classed as vowels into upper case to indicate that they are are to be
-treated as consonants (the assumption being that the words are presented to
-the stemmers in lower case). Upper case therefore acts as a flag indicating a
-consonant.
-
-
-
-For example, the English stemmer begins with the step
-
-
- Set initial y, or y after a vowel, to Y,
-
-giving rise to the following changes,
-
-
-
- youth | | → | | Youth
- |
boy | | → | | boY
- |
boyish | | → | | boYish
- |
fly | | → | | fly
- |
flying | | → | | flying
- |
syzygy | | → | | syzygy
- |
-
-
-This process works from left to right, and
-if a word contains Vyy, where V is a vowel, the first y is put
-into upper case, but the second y is left alone, since it is preceded by
-upper case Y which is a consonant. A sequence Vyyyyy... would be
-changed to VYyYyY....
-
-
-
-The combination yy never occurs in English, although it might appear in
-foreign words:
-
-
-
-
-
-(A sayyid, my dictionary tells me, is a descendant of Mohammed's daughter
-Fatima.) But the left-to-right process is significant in other languages, for
-example French. In French the rule for marking vowels as consonants is,
-
-
--
- Put into upper case u or i preceded and followed by a vowel, and
- y preceded or followed by a vowel. Put u after q into upper
- case.
-
-
-
-which gives rise to,
-
-
-
- ennuie | | → | | ennuIe
- |
inquiétude | | → | | inqUiétude
- |
-
-
-In the first word, i is put into upper case since it has a vowel on both
-sides of it.
-In the second word, u after q is put into upper case, and again the
-following i is left alone, since it is preceded by upper case U which
-is a consonant.
-
-
-[% footer %]