4 年前 · e820682a65
--- a/dictionary-reader/pom.xml
+++ b/dictionary-reader/pom.xml
@@ -0,0 +1,31 @@
 
				+<?xml version="1.0"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+    <parent>
			
 
				+        <artifactId>morphology</artifactId>
			
 
				+        <groupId>org.apache.lucene.morphology</groupId>
			
 
				+        <version>1.5</version>
			
 
				+    </parent>
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <groupId>org.apache.lucene.morphology</groupId>
			
 
				+    <artifactId>dictionary-reader</artifactId>
			
 
				+    <name>dictionary-reader</name>
			
 
				+    <version>1.5</version>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+
			
 
				+    <dependencies>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>russian</artifactId>
			
 
				+            <version>1.5</version>
			
 
				+        </dependency>
			
 
				+
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>english</artifactId>
			
 
				+            <version>1.5</version>
			
 
				+        </dependency>
			
 
				+    </dependencies>
			
 
				+
			
 
				+
			
 
				+</project>
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
@@ -0,0 +1,137 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov 
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.lucene.morphology.dictionary;
			
 
				+
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.FileInputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStreamReader;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.HashSet;
			
 
				+import java.util.List;
			
 
				+import java.util.Set;
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * This class contain logic how read
			
 
				+ * dictonary and produce word with it all forms.
			
 
				+ */
			
 
				+public class DictionaryReader {
			
 
				+    private String fileName;
			
 
				+    private String fileEncoding = "windows-1251";
			
 
				+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
			
 
				+    private Set<String> ignoredForm = new HashSet<String>();
			
 
				+
			
 
				+    public DictionaryReader(String fileName, Set<String> ignoredForm) {
			
 
				+        this.fileName = fileName;
			
 
				+        this.ignoredForm = ignoredForm;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    public void process(WordProcessor wordProcessor) throws IOException {
			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
			
 
				+        readFlexias(bufferedReader);
			
 
				+        skipBlock(bufferedReader);
			
 
				+        skipBlock(bufferedReader);
			
 
				+        readPrefix(bufferedReader);
			
 
				+        readWords(bufferedReader, wordProcessor);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
			
 
				+        String s = reader.readLine();
			
 
				+        int count = Integer.valueOf(s);
			
 
				+        int actual = 0;
			
 
				+        for (int i = 0; i < count; i++) {
			
 
				+            s = reader.readLine();
			
 
				+            if (i % 10000 == 0) System.out.println("Proccess " + i + " wordBase of " + count);
			
 
				+
			
 
				+            WordCard card = buildForm(s);
			
 
				+
			
 
				+            if (card == null) {
			
 
				+                continue;
			
 
				+            }
			
 
				+
			
 
				+            wordProcessor.process(card);
			
 
				+            actual++;
			
 
				+
			
 
				+        }
			
 
				+        System.out.println("Finished word processing actual words " + actual);
			
 
				+    }
			
 
				+
			
 
				+    private WordCard buildForm(String s) {
			
 
				+        String[] wd = s.split(" ");
			
 
				+        String wordBase = wd[0].toLowerCase();
			
 
				+        if (wordBase.startsWith("-")) return null;
			
 
				+        wordBase = "#".equals(wordBase) ? "" : wordBase;
			
 
				+        List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
			
 
				+        FlexiaModel flexiaModel = models.get(0);
			
 
				+        if (models.size() == 0 || ignoredForm.contains(flexiaModel.getCode())) {
			
 
				+            return null;
			
 
				+        }
			
 
				+
			
 
				+        WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
			
 
				+
			
 
				+        for (FlexiaModel fm : models) {
			
 
				+            card.addFlexia(fm);
			
 
				+        }
			
 
				+        return card;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    private void skipBlock(BufferedReader reader) throws IOException {
			
 
				+        String s = reader.readLine();
			
 
				+        int count = Integer.valueOf(s);
			
 
				+        for (int i = 0; i < count; i++) {
			
 
				+            reader.readLine();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    private void readPrefix(BufferedReader reader) throws IOException {
			
 
				+        String s = reader.readLine();
			
 
				+        int count = Integer.valueOf(s);
			
 
				+        for (int i = 0; i < count; i++) {
			
 
				+            reader.readLine();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private void readFlexias(BufferedReader reader) throws IOException {
			
 
				+        String s = reader.readLine();
			
 
				+        int count = Integer.valueOf(s);
			
 
				+        for (int i = 0; i < count; i++) {
			
 
				+            s = reader.readLine();
			
 
				+            ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>();
			
 
				+            wordsFlexias.add(flexiaModelArrayList);
			
 
				+            for (String line : s.split("%")) {
			
 
				+                addFlexia(flexiaModelArrayList, line);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
			
 
				+        String[] fl = line.split("\\*");
			
 
				+        // we inored all forms thats
			
 
				+        if (fl.length == 3) {
			
 
				+            //System.out.println(line);
			
 
				+            flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
			
 
				+        }
			
 
				+        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
@@ -0,0 +1,91 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+/**

			
 
				+ * Represent information of how word form created form it imutible part.

			
 
				+ */

			
 
				+public class FlexiaModel {

			
 
				+    private String code;

			
 
				+    private String suffix;

			
 
				+    private String prefix;

			
 
				+

			
 
				+    public FlexiaModel(String code, String suffix, String prefix) {

			
 
				+        this.code = code;

			
 
				+        this.suffix = suffix;

			
 
				+        this.prefix = prefix;

			
 
				+    }

			
 
				+

			
 
				+    public String getCode() {

			
 
				+        return code;

			
 
				+    }

			
 
				+

			
 
				+    public void setCode(String code) {

			
 
				+        this.code = code;

			
 
				+    }

			
 
				+

			
 
				+    public String getSuffix() {

			
 
				+        return suffix;

			
 
				+    }

			
 
				+

			
 
				+    public void setSuffix(String suffix) {

			
 
				+        this.suffix = suffix;

			
 
				+    }

			
 
				+

			
 
				+    public String getPrefix() {

			
 
				+        return prefix;

			
 
				+    }

			
 
				+

			
 
				+    public void setPrefix(String prefix) {

			
 
				+        this.prefix = prefix;

			
 
				+    }

			
 
				+

			
 
				+    public String create(String s) {

			
 
				+        return prefix + s + suffix;

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public String toString() {

			
 
				+        return "FlexiaModel{" +

			
 
				+                "code='" + code + '\'' +

			
 
				+                ", suffix='" + suffix + '\'' +

			
 
				+                ", prefix='" + prefix + '\'' +

			
 
				+                '}';

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public boolean equals(Object o) {

			
 
				+        if (this == o) return true;

			
 
				+        if (o == null || getClass() != o.getClass()) return false;

			
 
				+

			
 
				+        FlexiaModel that = (FlexiaModel) o;

			
 
				+

			
 
				+        if (code != null ? !code.equals(that.code) : that.code != null) return false;

			
 
				+        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;

			
 
				+        if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;

			
 
				+

			
 
				+        return true;

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public int hashCode() {

			
 
				+        int result = code != null ? code.hashCode() : 0;

			
 
				+        result = 31 * result + (suffix != null ? suffix.hashCode() : 0);

			
 
				+        result = 31 * result + (prefix != null ? prefix.hashCode() : 0);

			
 
				+        return result;

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
@@ -0,0 +1,72 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import java.io.BufferedReader;

			
 
				+import java.io.FileInputStream;

			
 
				+import java.io.IOException;

			
 
				+import java.io.InputStreamReader;

			
 
				+import java.util.ArrayList;

			
 
				+import java.util.HashMap;

			
 
				+import java.util.List;

			
 
				+import java.util.Map;

			
 
				+

			
 
				+

			
 
				+public class GrammarReader {

			
 
				+    private String fileName;

			
 
				+    private String fileEncoding = "windows-1251";

			
 
				+    private List<String> grammarInfo = new ArrayList<String>();

			
 
				+    private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();

			
 
				+

			
 
				+    public GrammarReader(String fileName) throws IOException {

			
 
				+        this.fileName = fileName;

			
 
				+        setUp();

			
 
				+    }

			
 
				+

			
 
				+    public GrammarReader(String fileName, String fileEncoding) throws IOException {

			
 
				+        this.fileName = fileName;

			
 
				+        this.fileEncoding = fileEncoding;

			
 
				+        setUp();

			
 
				+    }

			
 
				+

			
 
				+    private void setUp() throws IOException {

			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));

			
 
				+        String line = bufferedReader.readLine();

			
 
				+        while (line != null) {

			
 
				+            line = line.trim();

			
 
				+            if (!line.startsWith("//") && line.length() > 0) {

			
 
				+                String[] strings = line.split(" ", 2);

			
 
				+                Integer i = grammarInfo.size();

			
 
				+                inverseIndex.put(strings[0], i);

			
 
				+                grammarInfo.add(i, strings[1]);

			
 
				+            }

			
 
				+            line = bufferedReader.readLine();

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    public List<String> getGrammarInfo() {

			
 
				+        return grammarInfo;

			
 
				+    }

			
 
				+

			
 
				+    public String[] getGrammarInfoAsArray() {

			
 
				+        return grammarInfo.toArray(new String[grammarInfo.size()]);

			
 
				+    }

			
 
				+

			
 
				+    public Map<String, Integer> getGrammarInverseIndex() {

			
 
				+        return inverseIndex;

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
@@ -0,0 +1,44 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import java.util.Arrays;

			
 
				+import java.util.LinkedList;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class RemoveFlexiaWithPrefixes extends WordFilter {

			
 
				+

			
 
				+    public RemoveFlexiaWithPrefixes(WordProcessor wordProcessor) {

			
 
				+        super(wordProcessor);

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public List<WordCard> transform(WordCard wordCard) {

			
 
				+

			
 
				+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();

			
 
				+        for (FlexiaModel fm : wordCard.getWordsForms()) {

			
 
				+            if (fm.getPrefix().length() > 0) {

			
 
				+                flexiaModelsToRemove.add(fm);

			
 
				+            }

			
 
				+        }

			
 
				+        for (FlexiaModel fm : flexiaModelsToRemove) {

			
 
				+            wordCard.removeFlexia(fm);

			
 
				+        }

			
 
				+

			
 
				+        return new LinkedList<WordCard>(Arrays.asList(wordCard));

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
@@ -0,0 +1,61 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import java.io.BufferedReader;

			
 
				+import java.io.IOException;

			
 
				+import java.io.InputStreamReader;

			
 
				+import java.util.LinkedList;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class RussianAdvSplitterFilter extends WordFilter {

			
 
				+    private String code;

			
 
				+

			
 
				+    public RussianAdvSplitterFilter(WordProcessor wordProcessor) throws IOException {

			
 
				+        super(wordProcessor);

			
 
				+        code = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("/russian-adv-main-code.txt"), "windows-1251")).readLine();

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public List<WordCard> transform(WordCard wordCard) {

			
 
				+        LinkedList<WordCard> result = new LinkedList<WordCard>();

			
 
				+        result.add(wordCard);

			
 
				+

			
 
				+        String baseWord = "";

			
 
				+        String canonicalForm = "";

			
 
				+        String canonicalSuffix = "";

			
 
				+        List<FlexiaModel> flexiaModels = new LinkedList<FlexiaModel>();

			
 
				+        for (FlexiaModel flexiaModel : wordCard.getWordsForms()) {

			
 
				+            if (flexiaModel.getPrefix().length() > 0) {

			
 
				+                flexiaModels.add(new FlexiaModel(flexiaModel.getCode(), flexiaModel.getSuffix(), ""));

			
 
				+            }

			
 
				+            if (flexiaModel.getPrefix().length() > 0 && flexiaModel.getCode().equals(code)) {

			
 
				+                baseWord = flexiaModel.getPrefix() + wordCard.getBase();

			
 
				+                canonicalForm = flexiaModel.getCode();

			
 
				+                canonicalSuffix = flexiaModel.getSuffix();

			
 
				+            }

			
 
				+        }

			
 
				+

			
 
				+        if (baseWord.length() > 0) {

			
 
				+            WordCard wc = new WordCard(canonicalForm, baseWord, canonicalSuffix);

			
 
				+            wc.setWordsForms(flexiaModels);

			
 
				+            result.add(wc);

			
 
				+        }

			
 
				+

			
 
				+        return result;

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
@@ -0,0 +1,154 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.lucene.morphology.dictionary;
			
 
				+
			
 
				+
			
 
				+import org.apache.lucene.morphology.Heuristic;
			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;
			
 
				+import org.apache.lucene.morphology.MorphologyImpl;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.*;
			
 
				+
			
 
				+
			
 
				+//todo made refactoring this class
			
 
				+public class StatisticsCollector implements WordProcessor {
			
 
				+    private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<String, Set<Heuristic>>();
			
 
				+    private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<Set<Heuristic>, Integer>();
			
 
				+    private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
			
 
				+    private GrammarReader grammarReader;
			
 
				+    private LetterDecoderEncoder decoderEncoder;
			
 
				+
			
 
				+
			
 
				+    public StatisticsCollector(GrammarReader grammarReader, LetterDecoderEncoder decoderEncoder) {
			
 
				+        this.grammarReader = grammarReader;
			
 
				+        this.decoderEncoder = decoderEncoder;
			
 
				+    }
			
 
				+
			
 
				+    public void process(WordCard wordCard) throws IOException {
			
 
				+        cleanWordCard(wordCard);
			
 
				+        String normalStringMorph = wordCard.getWordsForms().get(0).getCode();
			
 
				+
			
 
				+        for (FlexiaModel fm : wordCard.getWordsForms()) {
			
 
				+            Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
			
 
				+            String form = revertWord(fm.create(wordCard.getBase()));
			
 
				+            Set<Heuristic> suffixHeuristics = inverseIndex.get(form);
			
 
				+            if (suffixHeuristics == null) {
			
 
				+                suffixHeuristics = new HashSet<Heuristic>();
			
 
				+                inverseIndex.put(form, suffixHeuristics);
			
 
				+            }
			
 
				+            suffixHeuristics.add(heuristic);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private void cleanWordCard(WordCard wordCard) {
			
 
				+        wordCard.setBase(cleanString(wordCard.getBase()));
			
 
				+        wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));
			
 
				+        wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
			
 
				+        List<FlexiaModel> models = wordCard.getWordsForms();
			
 
				+        for (FlexiaModel m : models) {
			
 
				+            m.setSuffix(cleanString(m.getSuffix()));
			
 
				+            m.setPrefix(cleanString(m.getPrefix()));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    public void saveHeuristic(String fileName) throws IOException {
			
 
				+
			
 
				+        Map<Integer, Integer> dist = new TreeMap<Integer, Integer>();
			
 
				+        Set<Heuristic> prevSet = null;
			
 
				+        int count = 0;
			
 
				+        for (String key : inverseIndex.keySet()) {
			
 
				+            Set<Heuristic> currentSet = inverseIndex.get(key);
			
 
				+            if (!currentSet.equals(prevSet)) {
			
 
				+                Integer d = dist.get(key.length());
			
 
				+                dist.put(key.length(), 1 + (d == null ? 0 : d));
			
 
				+                prevSet = currentSet;
			
 
				+                count++;
			
 
				+                if (!ruleInverseIndex.containsKey(currentSet)) {
			
 
				+                    ruleInverseIndex.put(currentSet, rules.size());
			
 
				+                    rules.add(currentSet);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        System.out.println("Word with diffirent rules " + count);
			
 
				+        System.out.println("All ivers words " + inverseIndex.size());
			
 
				+        System.out.println(dist);
			
 
				+        System.out.println("diffirent rule count " + ruleInverseIndex.size());
			
 
				+        Heuristic[][] heuristics = new Heuristic[ruleInverseIndex.size()][];
			
 
				+        int index = 0;
			
 
				+        for (Set<Heuristic> hs : rules) {
			
 
				+            heuristics[index] = new Heuristic[hs.size()];
			
 
				+            int indexj = 0;
			
 
				+            for (Heuristic h : hs) {
			
 
				+                heuristics[index][indexj] = h;
			
 
				+                indexj++;
			
 
				+            }
			
 
				+            index++;
			
 
				+        }
			
 
				+
			
 
				+        int[][] ints = new int[count][];
			
 
				+        short[] rulesId = new short[count];
			
 
				+        count = 0;
			
 
				+        prevSet = null;
			
 
				+        for (String key : inverseIndex.keySet()) {
			
 
				+            Set<Heuristic> currentSet = inverseIndex.get(key);
			
 
				+            if (!currentSet.equals(prevSet)) {
			
 
				+                int[] word = decoderEncoder.encodeToArray(key);
			
 
				+                ints[count] = word;
			
 
				+                rulesId[count] = (short) ruleInverseIndex.get(currentSet).intValue();
			
 
				+                count++;
			
 
				+                prevSet = currentSet;
			
 
				+            }
			
 
				+        }
			
 
				+        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammarReader.getGrammarInfoAsArray());
			
 
				+        morphology.writeToFile(fileName);
			
 
				+    }
			
 
				+
			
 
				+    private String revertWord(String s) {
			
 
				+        String result = "";
			
 
				+        for (int i = 1; i <= s.length(); i++) {
			
 
				+            result += s.charAt(s.length() - i);
			
 
				+        }
			
 
				+        return result;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    private Heuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
			
 
				+        String form = fm.create(wordBase);
			
 
				+        String normalForm = wordBase + canonicalSuffix;
			
 
				+        Integer length = getCommonLength(form, normalForm);
			
 
				+        Integer actualSuffixLengh = form.length() - length;
			
 
				+        String actualNormalSuffix = normalForm.substring(length);
			
 
				+        Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
			
 
				+        Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
			
 
				+        return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
			
 
				+    }
			
 
				+
			
 
				+    public static Integer getCommonLength(String s1, String s2) {
			
 
				+        Integer length = Math.min(s1.length(), s2.length());
			
 
				+        for (int i = 0; i < length; i++) {
			
 
				+            if (s1.charAt(i) != s2.charAt(i)) return i;
			
 
				+        }
			
 
				+        return length;
			
 
				+    }
			
 
				+
			
 
				+    private String cleanString(String s) {
			
 
				+        return decoderEncoder.cleanString(s);
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
@@ -0,0 +1,86 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov 
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.lucene.morphology.dictionary;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * Represent word and all it forms.
			
 
				+ */
			
 
				+public class WordCard {
			
 
				+    private String canonicalForm;
			
 
				+    private String base;
			
 
				+    private String canonicalSuffix;
			
 
				+    private List<FlexiaModel> wordsForms = new ArrayList<FlexiaModel>();
			
 
				+
			
 
				+    public WordCard(String canonicalForm, String base, String canonicalSuffix) {
			
 
				+        this.canonicalForm = canonicalForm;
			
 
				+        this.canonicalSuffix = canonicalSuffix;
			
 
				+        this.base = base;
			
 
				+    }
			
 
				+
			
 
				+    public void addFlexia(FlexiaModel flexiaModel) {
			
 
				+        wordsForms.add(flexiaModel);
			
 
				+    }
			
 
				+
			
 
				+    public void removeFlexia(FlexiaModel flexiaModel) {
			
 
				+        wordsForms.remove(flexiaModel);
			
 
				+    }
			
 
				+
			
 
				+    public String getCanonicalForm() {
			
 
				+        return canonicalForm;
			
 
				+    }
			
 
				+
			
 
				+    public String getCanonicalSuffix() {
			
 
				+        return canonicalSuffix;
			
 
				+    }
			
 
				+
			
 
				+    public String getBase() {
			
 
				+        return base;
			
 
				+    }
			
 
				+
			
 
				+    public List<FlexiaModel> getWordsForms() {
			
 
				+        return wordsForms;
			
 
				+    }
			
 
				+
			
 
				+    public void setCanonicalForm(String canonicalForm) {
			
 
				+        this.canonicalForm = canonicalForm;
			
 
				+    }
			
 
				+
			
 
				+    public void setBase(String base) {
			
 
				+        this.base = base;
			
 
				+    }
			
 
				+
			
 
				+    public void setCanonicalSuffix(String canonicalSuffix) {
			
 
				+        this.canonicalSuffix = canonicalSuffix;
			
 
				+    }
			
 
				+
			
 
				+    public void setWordsForms(List<FlexiaModel> wordsForms) {
			
 
				+        this.wordsForms = wordsForms;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public String toString() {
			
 
				+        return "WordCard{" +
			
 
				+                "canonicalForm='" + canonicalForm + '\'' +
			
 
				+                ", base='" + base + '\'' +
			
 
				+                ", canonicalSuffix='" + canonicalSuffix + '\'' +
			
 
				+                ", wordsForms=" + wordsForms +
			
 
				+                '}';
			
 
				+    }
			
 
				+}
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
@@ -0,0 +1,53 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;

			
 
				+

			
 
				+import java.util.Arrays;

			
 
				+import java.util.Collections;

			
 
				+import java.util.LinkedList;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class WordCleaner extends WordFilter {

			
 
				+

			
 
				+    private LetterDecoderEncoder decoderEncoder;

			
 
				+

			
 
				+    public WordCleaner(LetterDecoderEncoder decoderEncoder, WordProcessor wordProcessor) {

			
 
				+        super(wordProcessor);

			
 
				+        this.decoderEncoder = decoderEncoder;

			
 
				+    }

			
 
				+

			
 
				+    public List<WordCard> transform(WordCard wordCard) {

			
 
				+        String word = wordCard.getBase() + wordCard.getCanonicalSuffix();

			
 
				+

			
 
				+        if (word.contains("-")) return Collections.emptyList();

			
 
				+        if (!decoderEncoder.checkString(word)) return Collections.emptyList();

			
 
				+

			
 
				+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();

			
 
				+        for (FlexiaModel fm : wordCard.getWordsForms()) {

			
 
				+            if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) {

			
 
				+                flexiaModelsToRemove.add(fm);

			
 
				+            }

			
 
				+        }

			
 
				+        for (FlexiaModel fm : flexiaModelsToRemove) {

			
 
				+            wordCard.removeFlexia(fm);

			
 
				+        }

			
 
				+

			
 
				+        return new LinkedList<WordCard>(Arrays.asList(wordCard));

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordFilter.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordFilter.java
@@ -0,0 +1,37 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+abstract public class WordFilter implements WordProcessor {

			
 
				+    private WordProcessor wordProcessor;

			
 
				+

			
 
				+    public WordFilter(WordProcessor wordProcessor) {

			
 
				+        this.wordProcessor = wordProcessor;

			
 
				+    }

			
 
				+

			
 
				+    abstract public List<WordCard> transform(WordCard wordCard);

			
 
				+

			
 
				+    public void process(WordCard wordCard) throws IOException {

			
 
				+        for (WordCard wc : transform(wordCard)) {

			
 
				+            wordProcessor.process(wc);

			
 
				+        }

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
@@ -0,0 +1,27 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov 
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.lucene.morphology.dictionary;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * Interface allows get information from
			
 
				+ */
			
 
				+public interface WordProcessor {
			
 
				+
			
 
				+    public void process(WordCard wordCard) throws IOException;
			
 
				+}
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
@@ -0,0 +1,52 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.dictionary;

			
 
				+

			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;

			
 
				+

			
 
				+import java.util.Arrays;

			
 
				+import java.util.LinkedList;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class WordStringCleaner extends WordFilter {

			
 
				+

			
 
				+    private LetterDecoderEncoder decoderEncoder;

			
 
				+

			
 
				+    public WordStringCleaner(LetterDecoderEncoder decoderEncoder, WordProcessor wordProcessor) {

			
 
				+        super(wordProcessor);

			
 
				+        this.decoderEncoder = decoderEncoder;

			
 
				+    }

			
 
				+

			
 
				+    public List<WordCard> transform(WordCard wordCard) {

			
 
				+        wordCard.setBase(cleanString(wordCard.getBase()));

			
 
				+        wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));

			
 
				+        wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));

			
 
				+        List<FlexiaModel> models = wordCard.getWordsForms();

			
 
				+        for (FlexiaModel m : models) {

			
 
				+            m.setSuffix(cleanString(m.getSuffix()));

			
 
				+            m.setPrefix(cleanString(m.getPrefix()));

			
 
				+            //made correct code

			
 
				+            m.setCode(m.getCode().substring(0, 2));

			
 
				+        }

			
 
				+        return new LinkedList<WordCard>(Arrays.asList(wordCard));

			
 
				+    }

			
 
				+

			
 
				+

			
 
				+    private String cleanString(String s) {

			
 
				+        return decoderEncoder.cleanString(s);

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
@@ -0,0 +1,42 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.generator;

			
 
				+

			
 
				+import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;

			
 
				+import org.apache.lucene.morphology.dictionary.*;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.util.HashSet;

			
 
				+

			
 
				+

			
 
				+public class EnglishHeuristicBuilder {

			
 
				+    public static void main(String[] args) throws IOException {

			
 
				+

			
 
				+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");

			
 
				+        EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();

			
 
				+

			
 
				+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());

			
 
				+

			
 
				+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);

			
 
				+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);

			
 
				+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);

			
 
				+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);

			
 
				+        dictionaryReader.process(removeFlexiaWithPrefixes);

			
 
				+        statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");

			
 
				+

			
 
				+    }

			
 
				+}
			
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
@@ -0,0 +1,42 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.generator;

			
 
				+

			
 
				+import org.apache.lucene.morphology.dictionary.*;

			
 
				+import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.util.HashSet;

			
 
				+

			
 
				+

			
 
				+public class RussianHeuristicBuilder {

			
 
				+    public static void main(String[] args) throws IOException {

			
 
				+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");

			
 
				+        RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();

			
 
				+

			
 
				+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());

			
 
				+

			
 
				+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);

			
 
				+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);

			
 
				+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);

			
 
				+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);

			
 
				+        RussianAdvSplitterFilter russianAdvSplitterFilter = new RussianAdvSplitterFilter(removeFlexiaWithPrefixes);

			
 
				+        dictionaryReader.process(russianAdvSplitterFilter);

			
 
				+        statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");

			
 
				+

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/main/resources/russian-adv-main-code.txt
+++ b/dictionary-reader/src/main/resources/russian-adv-main-code.txt
@@ -0,0 +1 @@
 
				+葯
			
--- a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
@@ -0,0 +1,150 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene;

			
 
				+

			
 
				+import org.apache.lucene.morphology.*;

			
 
				+import org.apache.lucene.morphology.dictionary.*;

			
 
				+import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;

			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;

			
 
				+import org.apache.lucene.morphology.english.EnglishMorphology;

			
 
				+import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;

			
 
				+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;

			
 
				+import org.apache.lucene.morphology.russian.RussianMorphology;

			
 
				+import org.junit.Before;

			
 
				+import org.junit.Test;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.util.HashSet;

			
 
				+import java.util.List;

			
 
				+import java.util.Map;

			
 
				+import java.util.concurrent.atomic.AtomicLong;

			
 
				+

			
 
				+import static org.hamcrest.Matchers.hasItem;

			
 
				+import static org.junit.Assert.assertThat;

			
 
				+

			
 
				+

			
 
				+public class TestAllWords {

			
 
				+

			
 
				+    String prefix = "";

			
 
				+

			
 
				+    @Before

			
 
				+    public void setUp() {

			
 
				+        System.out.println(System.getProperty("user.dir"));

			
 
				+        prefix = System.getProperty("user.dir").endsWith("dictionary-reader") ? "../" : "";

			
 
				+

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldEnglishMorphologyIncludeAllWordsFormsWithMorphInfo() throws IOException {

			
 
				+        final MorphologyImpl morphology = new EnglishMorphology();

			
 
				+        LetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();

			
 
				+        String pathToGramma = prefix + "dictonary/Dicts/Morph/egramtab.tab";

			
 
				+        String pathToDict = prefix + "dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd";

			
 
				+

			
 
				+        testFullGramma(morphology, decoderEncoder, pathToGramma, pathToDict);

			
 
				+

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldRussianMorphologyIncludeAllWordsFormsWithMorphInfo() throws IOException {

			
 
				+        final MorphologyImpl morphology = new RussianMorphology();

			
 
				+        LetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();

			
 
				+        String pathToGramma = prefix + "dictonary/Dicts/Morph/rgramtab.tab";

			
 
				+        String pathToDict = prefix + "dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd";

			
 
				+

			
 
				+        testFullGramma(morphology, decoderEncoder, pathToGramma, pathToDict);

			
 
				+    }

			
 
				+

			
 
				+    private void testFullGramma(final MorphologyImpl morphology, LetterDecoderEncoder decoderEncoder, String pathToGramma, String pathToDict) throws IOException {

			
 
				+        GrammarReader grammarInfo = new GrammarReader(pathToGramma);

			
 
				+        final List<String> morphInfo = grammarInfo.getGrammarInfo();

			
 
				+        final Map<String, Integer> inversIndex = grammarInfo.getGrammarInverseIndex();

			
 
				+

			
 
				+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet<String>());

			
 
				+

			
 
				+        final AtomicLong wordCount = new AtomicLong(0);

			
 
				+        Long startTime = System.currentTimeMillis();

			
 
				+

			
 
				+        WordProcessor wordProcessor = new WordProcessor() {

			
 
				+            public void process(WordCard wordCard) throws IOException {

			
 
				+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();

			
 
				+                for (FlexiaModel fm : wordCard.getWordsForms()) {

			
 
				+                    String wordForm = wordCard.getBase() + fm.getSuffix();

			
 
				+                    String morph = morphInfo.get(inversIndex.get(fm.getCode()));

			
 
				+                    assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));

			
 
				+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));

			
 
				+                    wordCount.set(2L + wordCount.get());

			
 
				+                }

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, wordProcessor);

			
 
				+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);

			
 
				+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);

			
 
				+        dictionaryReader.process(removeFlexiaWithPrefixes);

			
 
				+        long time = System.currentTimeMillis() - startTime;

			
 
				+        System.out.println("Done " + wordCount.get() + " in " + time + " ms. " + wordCount.get() / (time / 1000.0) + " word per second");

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldEnglishLuceneMorphologyIncludeAllWords() throws IOException {

			
 
				+        final LuceneMorphology morphology = new EnglishLuceneMorphology();

			
 
				+

			
 
				+        LetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();

			
 
				+        String pathToDic = prefix + "dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd";

			
 
				+

			
 
				+        testAllWordForLucene(morphology, decoderEncoder, pathToDic);

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldIncludeAllWordsRussianInLuceneMorophology() throws IOException {

			
 
				+        final LuceneMorphology morphology = new RussianLuceneMorphology();

			
 
				+

			
 
				+        LetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();

			
 
				+

			
 
				+        String pathToDic = prefix + "dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd";

			
 
				+

			
 
				+        testAllWordForLucene(morphology, decoderEncoder, pathToDic);

			
 
				+

			
 
				+    }

			
 
				+

			
 
				+    private void testAllWordForLucene(final LuceneMorphology morphology, LetterDecoderEncoder decoderEncoder, String pathToDic) throws IOException {

			
 
				+        final AtomicLong wordCount = new AtomicLong(0);

			
 
				+        Long startTime = System.currentTimeMillis();

			
 
				+

			
 
				+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<String>());

			
 
				+        WordProcessor wordProcessor = new WordProcessor() {

			
 
				+            public void process(WordCard wordCard) throws IOException {

			
 
				+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();

			
 
				+                for (FlexiaModel fm : wordCard.getWordsForms()) {

			
 
				+                    String wordForm = wordCard.getBase() + fm.getSuffix();

			
 
				+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));

			
 
				+                    wordCount.set(1L + wordCount.get());

			
 
				+                }

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, wordProcessor);

			
 
				+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);

			
 
				+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);

			
 
				+        dictionaryReader.process(removeFlexiaWithPrefixes);

			
 
				+

			
 
				+        long time = System.currentTimeMillis() - startTime;

			
 
				+        System.out.println("Done " + wordCount.get() + " in " + time + " ms. " + wordCount.get() / (time / 1000.0) + " word per second");

			
 
				+    }

			
 
				+

			
 
				+

			
 
				+}

			
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
@@ -0,0 +1,163 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+import org.apache.lucene.analysis.Analyzer;

			
 
				+import org.apache.lucene.analysis.BaseTokenStreamTestCase;

			
 
				+import org.apache.lucene.analysis.CharArraySet;

			
 
				+import org.apache.lucene.analysis.LowerCaseFilter;

			
 
				+import org.apache.lucene.analysis.TokenFilter;

			
 
				+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;

			
 
				+import org.apache.lucene.analysis.standard.StandardTokenizer;

			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

			
 
				+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;

			
 
				+import org.apache.lucene.morphology.analyzer.MorphologyFilter;

			
 
				+import org.apache.lucene.morphology.english.EnglishAnalyzer;

			
 
				+import org.apache.lucene.analysis.TokenStream;

			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;

			
 
				+import org.apache.lucene.morphology.russian.RussianAnalyzer;

			
 
				+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;

			
 
				+import org.junit.Test;

			
 
				+

			
 
				+import java.io.*;

			
 
				+import java.util.*;

			
 
				+

			
 
				+import static org.hamcrest.Matchers.equalTo;

			
 
				+

			
 
				+

			
 
				+public class AnalyzersTest extends BaseTokenStreamTestCase {

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldGiveCorrectWordsForEnglish() throws IOException {

			
 
				+        Analyzer morphlogyAnalyzer = new EnglishAnalyzer();

			
 
				+        String answerPath = "/english/english-analyzer-answer.txt";

			
 
				+        String testPath = "/english/english-analyzer-data.txt";

			
 
				+

			
 
				+        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldGiveCorrectWordsForRussian() throws IOException {

			
 
				+        Analyzer morphlogyAnalyzer = new RussianAnalyzer();

			
 
				+        String answerPath = "/russian/russian-analyzer-answer.txt";

			
 
				+        String testPath = "/russian/russian-analyzer-data.txt";

			
 
				+

			
 
				+        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void emptyStringTest() throws IOException {

			
 
				+        LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();

			
 
				+        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();

			
 
				+

			
 
				+        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);

			
 
				+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), "UTF-8");

			
 
				+        TokenStream stream = russianAnalyzer.tokenStream(null, reader);

			
 
				+        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);

			
 
				+

			
 
				+        englishFilter.reset();

			
 
				+        while (englishFilter.incrementToken()) {

			
 
				+            System.out.println(englishFilter.toString());

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {

			
 
				+        Analyzer morphlogyAnalyzer = new RussianAnalyzer();

			
 
				+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), "UTF-8");

			
 
				+

			
 
				+        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);

			
 
				+        tokenStream.reset();

			
 
				+        Set<String> foromsOfWine = new HashSet<String>();

			
 
				+        foromsOfWine.add("вина");

			
 
				+        foromsOfWine.add("винo");

			
 
				+        boolean wordSeen = false;

			
 
				+        while (tokenStream.incrementToken()) {

			
 
				+            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);

			
 
				+            PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);

			
 
				+            if(foromsOfWine.contains(charTerm.toString()) && wordSeen){

			
 
				+                assertThat(position.getPositionIncrement(),equalTo(0));

			
 
				+            }

			
 
				+            if(foromsOfWine.contains(charTerm.toString())){

			
 
				+                wordSeen = true;

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    private void testAnalayzer(Analyzer morphlogyAnalyzer, String answerPath, String testPath) throws IOException {

			
 
				+        InputStream stream = this.getClass().getResourceAsStream(answerPath);

			
 
				+        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));

			
 
				+        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");

			
 
				+        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));

			
 
				+        stream.close();

			
 
				+

			
 
				+        stream = this.getClass().getResourceAsStream(testPath);

			
 
				+

			
 
				+        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");

			
 
				+

			
 
				+        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);

			
 
				+        tokenStream.reset();

			
 
				+        HashSet<String> result = new HashSet<String>();

			
 
				+        while (tokenStream.incrementToken()) {

			
 
				+            CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);

			
 
				+            result.add(attribute1.toString());

			
 
				+        }

			
 
				+

			
 
				+        stream.close();

			
 
				+

			
 
				+        assertThat(result, equalTo(answer));

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void testPositionIncrement() throws IOException {

			
 
				+        EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();

			
 
				+        assertTokenStreamContents(

			
 
				+                englishAnalyzer.tokenStream("test", "There are tests!"),

			
 
				+                new String[]{"there", "are", "be", "test"},

			
 
				+                new int[]{0, 6, 6, 10},

			
 
				+                new int[]{5, 9, 9, 15},

			
 
				+                new String[]{"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"},

			
 
				+                new int[]{1, 1, 0, 1}

			
 
				+        );

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void testKeywordHandling() throws IOException {

			
 
				+        Analyzer analyzer = new EnglishKeywordTestAnalyzer();

			
 
				+        assertTokenStreamContents(

			
 
				+                analyzer.tokenStream("test", "Tests shouldn't be stemmed, but tests should!"),

			
 
				+                new String[]{"tests", "shouldn't", "be", "stem", "but", "test", "shall"}

			
 
				+        );

			
 
				+    }

			
 
				+

			
 
				+    private static class EnglishKeywordTestAnalyzer extends Analyzer {

			
 
				+        @Override

			
 
				+        protected TokenStreamComponents createComponents(String s) {

			
 
				+            StandardTokenizer src = new StandardTokenizer();

			
 
				+            CharArraySet dontStem = new CharArraySet(1, false);

			
 
				+            dontStem.add("Tests");

			
 
				+            TokenFilter filter = new SetKeywordMarkerFilter(src, dontStem);

			
 
				+            filter = new LowerCaseFilter(filter);

			
 
				+            try {

			
 
				+                filter = new MorphologyFilter(filter, new EnglishLuceneMorphology());

			
 
				+            } catch (IOException ex) {

			
 
				+                throw new RuntimeException("cannot create EnglishLuceneMorphology", ex);

			
 
				+            }

			
 
				+            return new TokenStreamComponents(src, filter);

			
 
				+        }

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
@@ -0,0 +1,66 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;

			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;

			
 
				+import org.junit.Test;

			
 
				+

			
 
				+import java.io.BufferedReader;

			
 
				+import java.io.IOException;

			
 
				+import java.io.InputStream;

			
 
				+import java.io.InputStreamReader;

			
 
				+import java.util.Arrays;

			
 
				+import java.util.HashSet;

			
 
				+import java.util.List;

			
 
				+import java.util.Set;

			
 
				+

			
 
				+import static org.hamcrest.CoreMatchers.equalTo;

			
 
				+import static org.junit.Assert.assertThat;

			
 
				+

			
 
				+

			
 
				+public class LuceneMorphTest {

			
 
				+

			
 
				+    @Test

			
 
				+    public void englishMorphologyShouldGetCorrectNormalForm() throws IOException {

			
 
				+        LuceneMorphology luceneMorph = new EnglishLuceneMorphology();

			
 
				+        String pathToTestData = "/english/english-morphology-test.txt";

			
 
				+        testMorphology(luceneMorph, pathToTestData);

			
 
				+    }

			
 
				+

			
 
				+    @Test

			
 
				+    public void russianMorphologyShouldGetCorrectNormalForm() throws IOException {

			
 
				+        LuceneMorphology luceneMorph = new RussianLuceneMorphology();

			
 
				+        List<String> v = luceneMorph.getMorphInfo("вина");

			
 
				+        System.out.println(v);

			
 
				+        String pathToTestData = "/russian/russian-morphology-test.txt";

			
 
				+        testMorphology(luceneMorph, pathToTestData);

			
 
				+    }

			
 
				+

			
 
				+    private void testMorphology(LuceneMorphology luceneMorph, String pathToTestData) throws IOException {

			
 
				+        InputStream stream = this.getClass().getResourceAsStream(pathToTestData);

			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));

			
 
				+        String s = bufferedReader.readLine();

			
 
				+        while (s != null) {

			
 
				+            String[] qa = s.trim().split(" ");

			
 
				+            Set<String> result = new HashSet<String>();

			
 
				+            result.addAll(Arrays.asList(qa).subList(1, qa.length));

			
 
				+            Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));

			
 
				+            assertThat(stringList, equalTo(result));

			
 
				+            s = bufferedReader.readLine();

			
 
				+        }

			
 
				+    }

			
 
				+}

			
--- a/dictionary-reader/src/test/resources/english/english-analyzer-answer.txt
+++ b/dictionary-reader/src/test/resources/english/english-analyzer-answer.txt
@@ -0,0 +1 @@
 
				+following follow the instruction exactly will be help ensure the best well good result
			
--- a/dictionary-reader/src/test/resources/english/english-analyzer-data.txt
+++ b/dictionary-reader/src/test/resources/english/english-analyzer-data.txt
@@ -0,0 +1 @@
 
				+Following the instructions exactly will help ensure the best results
			
--- a/dictionary-reader/src/test/resources/english/english-morphology-test.txt
+++ b/dictionary-reader/src/test/resources/english/english-morphology-test.txt
@@ -0,0 +1,8 @@
 
				+purchases purchase

			
 
				+existing exist

			
 
				+was be

			
 
				+men man

			
 
				+bore bore bear

			
 
				+grown grow grown

			
 
				+came come

			
 
				+md md
			
--- a/dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt
+++ b/dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt
@@ -0,0 +1 @@
 
				+в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель
			
--- a/dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt
+++ b/dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt
@@ -0,0 +1 @@
 
				+В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель
			
--- a/dictionary-reader/src/test/resources/russian/russian-morphology-test.txt
+++ b/dictionary-reader/src/test/resources/russian/russian-morphology-test.txt
@@ -0,0 +1,20 @@
 
				+еду еда ехать

			
 
				+тестов тест

			
 
				+вина вино вина

			
 
				+вино вино

			
 
				+ехать ехать

			
 
				+ананасов ананас ананасовый

			
 
				+сухой сухой

			
 
				+дураков дурак

			
 
				+пушка пушка пушок

			
 
				+пушок пушок

			
 
				+пушек пушка

			
 
				+козлов козлов козловый козел

			
 
				+жуков жуков жук

			
 
				+красив красить красивый

			
 
				+красивая красивый

			
 
				+тосклив тоскливый

			
 
				+лучший хороший

			
 
				+на на

			
 
				+тест тест тесто

			
 
				+наибольшую наибольший
			
--- a/dictonary/Dicts/GraphAn/abbr.eng
+++ b/dictonary/Dicts/GraphAn/abbr.eng
@@ -0,0 +1,153 @@
 
				+//  …ach token in this file should be  separated by a space. Otherwise it 

			
 
				+// would not be recognized as an abbreviation. For example:

			
 
				+// Mr. 

			
 
				+// would  be treated as one token, while Graphan module divides it into

			
 
				+// two tokens. So in this file it should be written as follows:

			
 
				+// Mr . 

			
 
				+

			
 
				+ANON . 

			
 
				+APPROX . 

			
 
				+ASSN . 

			
 
				+ASSOC . 

			
 
				+AVG . 

			
 
				+CAPT . 

			
 
				+CC . 

			
 
				+CH . 

			
 
				+CIRC . 

			
 
				+CM . 

			
 
				+CO . 

			
 
				+COL . 

			
 
				+COMDR . 

			
 
				+CONT . 

			
 
				+CONTD . 

			
 
				+COR . 

			
 
				+CORP . 

			
 
				+CPL . 

			
 
				+CU . 

			
 
				+DD . 

			
 
				+DEC . 

			
 
				+DEF . 

			
 
				+DEP . 

			
 
				+DIST . 

			
 
				+DIV . 

			
 
				+DLR . 

			
 
				+DPT . /:u

			
 
				+DR . /:u

			
 
				+ED . /:u

			
 
				+EG . 

			
 
				+ELEV . 

			
 
				+EQ . 

			
 
				+ESQ . 

			
 
				+EST . 

			
 
				+EXC . 

			
 
				+EXCH . 

			
 
				+EXEC . 

			
 
				+EXP . 

			
 
				+EXT . 

			
 
				+FF . 

			
 
				+FIG . 

			
 
				+FL . 

			
 
				+FLD . 

			
 
				+FR . 

			
 
				+FT . 

			
 
				+GA . 

			
 
				+GAL . 

			
 
				+GEN . 

			
 
				+GL . 

			
 
				+GM . 

			
 
				+GR . 

			
 
				+HRS . 

			
 
				+IB . 

			
 
				+IBID . 

			
 
				+ID . 

			
 
				+ILL . 

			
 
				+ILLUS . 

			
 
				+INC . 

			
 
				+INCL . 

			
 
				+INCOG . 

			
 
				+IV . 

			
 
				+JR . 

			
 
				+KM . 

			
 
				+LAT . 

			
 
				+LB . 

			
 
				+LG . 

			
 
				+LGE . 

			
 
				+LT . 

			
 
				+LTD . 

			
 
				+MA . 

			
 
				+MAJ . 

			
 
				+MI . 

			
 
				+MIL . 

			
 
				+MO . 

			
 
				+MR . /:u

			
 
				+MRS . /:u

			
 
				+MS . /:u

			
 
				+NO . 

			
 
				+PCT . 

			
 
				+PG . 

			
 
				+PP . 

			
 
				+PROF . 

			
 
				+PT . 

			
 
				+QT . 

			
 
				+RE . 

			
 
				+REF . 

			
 
				+REP . 

			
 
				+REV . 

			
 
				+RF . 

			
 
				+SGT . 

			
 
				+SP . 

			
 
				+SQ . 

			
 
				+ST . 

			
 
				+SW . 

			
 
				+TN . 

			
 
				+U . S . /:u

			
 
				+U . S . 

			
 
				+WK . 

			
 
				+YR . 

			
 
				+( ? )

			
 
				+( ! )

			
 
				+Ala . 

			
 
				+Ariz . 

			
 
				+Ark . 

			
 
				+Calif . 

			
 
				+Colo . 

			
 
				+Conn . 

			
 
				+Del . 

			
 
				+Fla . 

			
 
				+Ga . 

			
 
				+Iowa Kan . 

			
 
				+Ky . 

			
 
				+La . 

			
 
				+Md . 

			
 
				+Ma . 

			
 
				+Mass . 

			
 
				+Me . 

			
 
				+Mich . 

			
 
				+Minn . 

			
 
				+Miss . 

			
 
				+Mo . 

			
 
				+Mont . 

			
 
				+Neb . 

			
 
				+Nev . 

			
 
				+N . H . 

			
 
				+N . J . 

			
 
				+N . M . 

			
 
				+N . Y . 

			
 
				+N . C . 

			
 
				+N . D . 

			
 
				+Okla . 

			
 
				+Ore . 

			
 
				+Pa . 

			
 
				+RI 

			
 
				+R . I . 

			
 
				+S . C . 

			
 
				+SD S . D . 

			
 
				+Tenn . 

			
 
				+Tex . 

			
 
				+Texas

			
 
				+Va . 

			
 
				+Vt . 

			
 
				+Wash . 

			
 
				+W . V . 

			
 
				+Wis . 

			
 
				+Wy . 

			
--- a/dictonary/Dicts/GraphAn/abbr.ger
+++ b/dictonary/Dicts/GraphAn/abbr.ger
--- a/dictonary/Dicts/GraphAn/abbr.rus
+++ b/dictonary/Dicts/GraphAn/abbr.rus
@@ -0,0 +1,34 @@
 
				+//  Åach token in this file should be  separated by a space. Otherwise it 

			
 
				+// would not be recognized as an abbreviation. For example:

			
 
				+// Mr. 

			
 
				+// would  be treated as one token, while Graphan module divides it into

			
 
				+// two tokens. So in this file it should be written as follows:

			
 
				+// Mr . 

			
 
				+

			
 
				+ÑÌ . /:u

			
 
				+ÓË . 

			
 
				+ÊÂ . 

			
 
				+ÏÏ . 

			
 
				+ÃÃ . 

			
 
				+ÏÎÑ . 

			
 
				+ÒÎÂ . 

			
 
				+ÒÀÁË . 

			
 
				+Ó× . 

			
 
				+ÒÀÁ . 

			
 
				+ÐÈÑ . 

			
 
				+ÃË . 

			
 
				+ÐÓÊ . 

			
 
				+ÒÅË . 

			
 
				+ÏÎÄ . 

			
 
				+ÝÒ . 

			
 
				+ÑÒÐ . 

			
 
				+ÒÛÑ . 

			
 
				+ÌËÍ . 

			
 
				+ÌËÐÄ . 

			
 
				+ÒÐËÍ . 

			
 
				+ØÒ . 

			
 
				+È Ò . Ï . 

			
 
				+È Ò . Ä . 

			
 
				+P . Õð . 

			
 
				+Ò . Å . /:a

			
 
				+Ã . /:a
			
--- a/dictonary/Dicts/GraphAn/abbrev.log
+++ b/dictonary/Dicts/GraphAn/abbrev.log
--- a/dictonary/Dicts/GraphAn/enames.lem
+++ b/dictonary/Dicts/GraphAn/enames.lem
--- a/dictonary/Dicts/GraphAn/enames.txt
+++ b/dictonary/Dicts/GraphAn/enames.txt
--- a/dictonary/Dicts/GraphAn/extensions.txt
+++ b/dictonary/Dicts/GraphAn/extensions.txt
@@ -0,0 +1,36 @@
 
				+arc

			
 
				+arj

			
 
				+bat

			
 
				+bin

			
 
				+bmp

			
 
				+cmd

			
 
				+com

			
 
				+dbf

			
 
				+dll

			
 
				+doc

			
 
				+dvi

			
 
				+exe

			
 
				+gz

			
 
				+Hqx

			
 
				+htm

			
 
				+html

			
 
				+lhz

			
 
				+mrw

			
 
				+nsf

			
 
				+pdb

			
 
				+ps

			
 
				+rcf

			
 
				+rtf

			
 
				+shar

			
 
				+sit

			
 
				+slf

			
 
				+syn

			
 
				+sys

			
 
				+tar

			
 
				+tex

			
 
				+txt

			
 
				+wav

			
 
				+win

			
 
				+z

			
 
				+zip

			
 
				+zoo

			
--- a/dictonary/Dicts/GraphAn/idents.txt
+++ b/dictonary/Dicts/GraphAn/idents.txt
@@ -0,0 +1,28 @@
 
				+3.1

			
 
				+3.11

			
 
				+a:

			
 
				+b:

			
 
				+c:

			
 
				+C++

			
 
				+CLIENT/SERVER

			
 
				+d:

			
 
				+FAT16

			
 
				+FAT32

			
 
				+I/O

			
 
				+input/output

			
 
				+M/B

			
 
				+moderator/demoderator

			
 
				+OS/2

			
 
				+peer-to-peer

			
 
				+POP1

			
 
				+POP2

			
 
				+POP3

			
 
				+PS/2

			
 
				+TCP/IP

			
 
				+à:

			
 
				+ñ:

			
 
				+ñè++

			
 
				+ÑÈ++

			
 
				+áèò/ñ

			
 
				+Unix-to-Unix

			
 
				+terminate-and-stay
			
--- a/dictonary/Dicts/GraphAn/keyboard.txt
+++ b/dictonary/Dicts/GraphAn/keyboard.txt
@@ -0,0 +1,29 @@
 
				+[modifiers]

			
 
				+shift

			
 
				+ctrl

			
 
				+alt

			
 
				+control

			
 
				+Shift

			
 
				+Ctrl

			
 
				+Alt

			
 
				+Control

			
 
				+SHIFT

			
 
				+CTRL

			
 
				+ALT

			
 
				+CONTROL

			
 
				+

			
 
				+[keys]

			
 
				+PgUp

			
 
				+F1

			
 
				+F2

			
 
				+@

			
 
				+Page Down

			
 
				+ScrLock 

			
 
				+Scroll Lock

			
 
				+Caps Lock

			
 
				+CapsLock

			
 
				+Num Lock

			
 
				+NumLock

			
 
				+Del

			
 
				+Delete

			
 
				+

			
--- a/dictonary/Dicts/GraphAn/keyword
+++ b/dictonary/Dicts/GraphAn/keyword
@@ -0,0 +1,651 @@
 
				+%Z10  Правила заполнения  зон

			
 
				+      cм. в файле  header.des

			
 
				+

			
 
				+  Правило 1

			
 
				+  Помни! Если один <Вариант ТО> (левая часть распознающего предложения), на-

			
 
				+  чинается с тех же слов, что и другой, то НУЖНО ставить более длинный вариант

			
 
				+  перед более коротким!

			
 
				+

			
 
				+  Правило 2 

			
 
				+  В переменных заглавные и строчные буквы различаются.

			
 
				+

			
 
				+	    

			
 
				+  Минимальный незанятый номер зоны  :  23

			
 
				+  Дата изменения : 03.10.95

			
 
				+  Зарезервированные символы  # $ +

			
 
				+

			
 
				+

			
 
				+%Z1  грифы приложений в тексте    03.07.95

			
 
				+// 

			
 
				+

			
 
				+#10 утвержд     #n       = $A1   // комментарии

			
 
				+#10 проект      #n       = $A2

			
 
				+#10 одобрен     #n       = $A1

			
 
				+#10 типовой проект #n    = $A2

			
 
				+#10 выписка     #n       = $A3

			
 
				+#10 форма N #   #n       = $A4

			
 
				+#10 перевод с # #n       = $A5

			
 
				+#10 приложени N  # #n    = $A1

			
 
				+#10 приложени # #n       = $A1

			
 
				+#10 приложени   #n       = $A1

			
 
				+#10 приложен к # #n      = $A1

			
 
				+.+         #n      = $A1

			
 
				+

			
 
				+

			
 
				+%Z16   выходные данные для типа приложений в header (ТИП_ПРИЛ) 03.07.95

			
 
				+

			
 
				+ $A1  =  Приложение

			
 
				+ $A2  =  Проект

			
 
				+ $A3  =  Выписка

			
 
				+ $A4  =  Форма

			
 
				+ $A5  =  Перевод

			
 
				+

			
 
				+%Z2  инстанции  в тексте  03.07.95

			
 
				+

			
 
				+верховн совет РСФСР                           = $I12   

			
 
				+вице - президент РФ                 = $I4

			
 
				+вице - президент Российск Федераци  = $I4

			
 
				+государственн советник РСФСР # # # # #  = $I5

			
 
				+государственн секретар РСФСР # # # # #  = $I6

			
 
				+законодательн собран санкт - петерб           = $IB 

			
 
				+заместител председат совета министров  # # #  = $I8

			
 
				+засед централь изб комисс росс фед        = $ID

			
 
				+конституц суд росс федер                  = $IJ 

			
 
				+минис связи росс федер                        = $IF

			
 
				+минис внутр дел росс федер   =  $IL

			
 
				+председател совет министров                   = $I3

			
 
				+пpедседател пpавите pосс федеpа               = $I7

			
 
				+президент РФ                        = $I1

			
 
				+президент Российск Федераци         = $I1

			
 
				+президент РСФСР                     = $I2

			
 
				+президент Российск советск # # #    = $I2

			
 
				+российск  федерац                             = $I9  

			
 
				+российск  советс федерат социал респуб         = $I13   

			
 
				+совет министров - # # #             = $I3

			
 
				+совет федера федер собр росс федер            = $I10

			
 
				+cовет федерации                               = $I10

			
 
				+государ дум федер собр росс федер              = $I11

			
 
				+государ дум                                   = $I11

			
 
				+государ налогов служб росс федер             = $IE

			
 
				+губернатор санкт - петербурга                 = $IA

			
 
				+пленум верх суд росс федер   =  $IG

			
 
				+правительств москв     =  $IK

			
 
				+правительств РСФСР                         = $I7

			
 
				+правительств РФ                     = $I3

			
 
				+правительств Российск Федераци      = $I3

			
 
				+РСФСР                                         = $I13  

			
 
				+союз советс социал респ                     = $IE

			
 
				+центральный банк россии                       = $IC

			
 
				+централь изб комисс росс фед                  = $ID

			
 
				+централь изб ком по выбор в гос дум фед соб рос фед  = $ID

			
 
				+централь изб ком по выбор в сов фед и по выб в гос дум фед соб рос фед  = $ID

			
 
				+централь изб ком по        = $ID

			
 
				+централь изб ком            = $ID

			
 
				+

			
 
				+// $IL

			
 
				+

			
 
				+%Z13  выходные инстанции в header (ИНСТ) 02.08.95

			
 
				+

			
 
				+$I1  = Президент РФ

			
 
				+$I2  = Президент  РСФСР

			
 
				+$I3  = Правительство РФ

			
 
				+$I4  = Вице-президент РФ

			
 
				+$I5  = Государственный советник РСФСР

			
 
				+$I6  = Государственный секретарь РСФСР

			
 
				+$I7  = Правительство РСФСР

			
 
				+$I8  = Заместитель Председателя Совета Министров

			
 
				+$I9  = нет

			
 
				+$I10 = Совет Федерации

			
 
				+$I11 = Государственная Дума

			
 
				+$I12 = Верховный Совет  РСФСР

			
 
				+$I13 = РСФСР

			
 
				+$IA  = Губернатор Санкт-Петербурга

			
 
				+$IB  = Законодательное Собрание Санкт-Петербурга

			
 
				+$IC  = Центральный Банк России 

			
 
				+$ID  = Центральная Избирательная Комиссия

			
 
				+$IE  = Государственная налоговая служба РФ

			
 
				+$IF  = Министерство связи РФ

			
 
				+$IG  = Пленум Верховного Суда РФ

			
 
				+$IH  = Cоюз Cоветских Социалистических Республик

			
 
				+$IJ  = Конституционный Суд РФ

			
 
				+$IK  = Правительство Москвы 

			
 
				+$IL  = Министерство Внутренних Дел РФ

			
 
				+ 

			
 
				+%Z3  классы документов в тексте    03.07.95

			
 
				+

			
 
				+распоряжение             = $C1

			
 
				+указ                     = $C2

			
 
				+постановление            = $C3

			
 
				+послание                 = $C4

			
 
				+

			
 
				+федеральный конст закон  = $C5

			
 
				+федеральный закон        = $C5

			
 
				+закон санкт - петербург  = $C7 

			
 
				+закон                    = $C5

			
 
				+# кодекс росс федер      = $C5

			
 
				+#     кодекс             = $C5

			
 
				+приказ                   = $C6

			
 
				+соглашение               = $C8

			
 
				+договор                  = $C9

			
 
				+письмо                   = $CA    

			
 
				+телеграмма               = $CB

			
 
				+протокол                 = $CC

			
 
				+

			
 
				+Выписка из протокол N # засед =  $CD

			
 
				+

			
 
				+%Z14  выходные классы документов в header (ТИП_ДОК) 03.07.95

			
 
				+

			
 
				+$C1   =  Распоряжение

			
 
				+$C2   =  Указ

			
 
				+$C3   =  Постановление

			
 
				+$C4   =  Послание

			
 
				+$C5   =  Закон

			
 
				+$C6   =  Приказ

			
 
				+$C7   =  Закон Санкт-Петербурга

			
 
				+$C8   =  Соглашение

			
 
				+$C9   =  Договор

			
 
				+$CA   =  Письмо

			
 
				+$CB   =  Телеграмма

			
 
				+$CC   =  Протокол

			
 
				+$CD   =  Выписка из протокола

			
 
				+

			
 
				+%Z8  построение типа документа 06.07.95

			
 
				+

			
 
				+$I1 $C1  =   $T1

			
 
				+$I2 $C1  =   $T1

			
 
				+

			
 
				+$I1 $C2  =   $T2

			
 
				+$I2 $C2  =   $T2

			
 
				+$I1 $C3  =   $T2

			
 
				+$I2  $C3  =   $T2

			
 
				+

			
 
				+$I4  $C1  =   $T3

			
 
				+$I3  $C1  =   $T4

			
 
				+$I7  $C1  =   $T4

			
 
				+$I3  $C3  =   $T5

			
 
				+$I7  $C3  =   $T5

			
 
				+$I5  $C1  =   $T6

			
 
				+$I6  $C1  =   $T9

			
 
				+$I8  $C1  =   $T7

			
 
				+$IA $C1  = $TA

			
 
				+$IA $C6  = $TB

			
 
				+

			
 
				+$IB $C3  = $TC

			
 
				+$IB $C7  = $TC

			
 
				+

			
 
				+$I9  $C5  =   $T8

			
 
				+$IH  $C5  =   $TO

			
 
				+#    $C5  =   $T8

			
 
				+

			
 
				+$T10 $T10 =   $T10

			
 
				+

			
 
				+$I3  $C8  =   $TD

			
 
				+$I1  $C9  =   $TE

			
 
				+$I1  $C9  =   $TF

			
 
				+$I11 $C3  =   $T11

			
 
				+$IC  $CA  =   $TG

			
 
				+$IC  $CB  =   $TF

			
 
				+

			
 
				+$ID  $C3  =   $TH

			
 
				+$ID  $CC  =   $TN

			
 
				+

			
 
				+

			
 
				+$IE  $C3  =   $TJ

			
 
				+$IE  $CA  =   $TI

			
 
				+

			
 
				+$IF  $C6  =   $TK

			
 
				+

			
 
				+$I10  $C3  =   $TL

			
 
				+

			
 
				+$IG   $C3  =  $TM

			
 
				+

			
 
				+$IJ   $C3  =  $TР

			
 
				+

			
 
				+$IL   $C6  =  $TS

			
 
				+

			
 
				+$ID $C3 $IK =  $TR

			
 
				+$IK $C3 $ID =  $TR

			
 
				+

			
 
				+$ID $CD = $TQ

			
 
				+

			
 
				+%Z18  выходной тип документа для имени файла и header 06.07.95

			
 
				+

			
 
				+$T1  =    {"Распоряжение Президента РФ",ar}

			
 
				+$T2  =    {"Указ Президента РФ",au}

			
 
				+$T3  =    {"Распоряжение Вице-президента РФ",av}

			
 
				+$T4  =    {"Распоряжение Правительства РФ",as}

			
 
				+$T5  =    {"Постановление Правительства РФ",ap}

			
 
				+$T6  =    {"Распоряжение Государственного секретаря",ab}

			
 
				+$T7  =    {"Распоряжение Заместителя Председателя Правительства РФ",az}

			
 
				+$T8  =    {"Закон РФ",al}

			
 
				+$T9  =    {"Распоряжение Государственного советника",aа}

			
 
				+$T10 =    {"Двухсторонний документ",ai}

			
 
				+$TA  =    {"Распоряжение Губернатора Санкт-Петербурга",an}

			
 
				+$TB  =    {"Приказ Губернатора Санкт-Петербурга",ak}

			
 
				+$TC  =    {"Законы Санкт-Петербурга",ad}

			
 
				+$TD  =    {"Соглашение правительства РФ",ac}

			
 
				+$TE  =    {"Договор Президента РФ",ae}

			
 
				+$TF  =    {"Соглашение Президента РФ",af}

			
 
				+$T11  =    {"Постановление Государственной Думы",ag}

			
 
				+$TG   =    {"Письмо Центрального Банка России",ao}

			
 
				+$TF   =    {"Телеграмма Цетрального Банка России",aj}

			
 
				+$TH   =    {"Постановление Центральной Избирательной Комиссии РФ",at}

			
 
				+$TJ   =    {"Постановление Государственной налоговой службы РФ",ax}

			
 
				+$TI   =    {"Письмо Государственной налоговой службы РФ",ay}

			
 
				+$TK   =    {"Приказ Министерства связи РФ",aw}

			
 
				+$TL   =    {"Постановление Совета Федерации РФ",am}

			
 
				+$TM   =    {"Постановление Пленума Верховного Суда РФ",aq}

			
 
				+$TN   =    {"Протокол заседания Центральной Избирательной Комиссии РФ",ah}

			
 
				+$TO   =    {"Закон СССР",ba}

			
 
				+$TР   =    {"Постановление Конституционного Суда РФ",bb}

			
 
				+$TR   =    {"Совместное постановление Правительства Москвы и Центризбиркома РФ",bс}

			
 
				+$TS   =	   {"Приказ Министерства Внутренних Дел РФ",bd}	

			
 
				+$TQ   =    {"Выписка",bе}

			
 
				+

			
 
				+%Z4  Подпись и в тексте и в header (АВТОР)  02.08.95

			
 
				+

			
 
				+белобородов  =  А.Белобородов  

			
 
				+булгак       =  В.Булгак

			
 
				+бурбулис     =  Г.Бурбулис

			
 
				+веденеев     =  Ю.Веденеев

			
 
				+вешняков     =  А.Вешняков

			
 
				+гайдар       =  Е.Гайдар

			
 
				+галушко      =  И.Галушко

			
 
				+горбачев     =  М.Горбачев

			
 
				+демидов      =  В.Демидов

			
 
				+ельцин       =  Б.Ельцин

			
 
				+заверюха     =  А.Заверюха

			
 
				+загуляев     =  В.Загуляев   

			
 
				+иванченко    =  A.Иванченко

			
 
				+исаев        =  Б.Исаев

			
 
				+лобов        =  О.Лобов

			
 
				+лебедев      =  В.Лебедев

			
 
				+куликов      =  А.Куликов

			
 
				+лужков       =  Ю.Лужков

			
 
				+махарадзе    =  В.Махарадзе

			
 
				+павлов       =  В.ПАВЛОВ

			
 
				+парамонова   =  Т.Парамонова 

			
 
				+полторанин   =  М.Полторанин

			
 
				+руцкой       =  А.Руцкой

			
 
				+рябов        =  Н.Рябов

			
 
				+рыбкин       =  И.Рыбкин

			
 
				+салтыков     =  Б.Салтыков

			
 
				+сосковец     =  О.Сосковец

			
 
				+кравцов      =  Ю.Кравцов

			
 
				+некрасов     =  В.Некрасов

			
 
				+селезнев     =  Г.Селезнев

			
 
				+станкевич    =  С.Станкевич

			
 
				+федоров      =  Б.Федоров

			
 
				+хижа         =  Г.Хижа

			
 
				+хандруев     =  А.Хандруев

			
 
				+хасбулат     =  Р.Хасбулатов

			
 
				+черномырдин  =  В.Черномырдин

			
 
				+чубайс       =  А.Чубайс

			
 
				+шахрай       =  С.Шахрай

			
 
				+шохин        =  А.Шохин

			
 
				+шумейко      =  В.Шумейко

			
 
				+яковлев      =  В.Яковлев

			
 
				+яров         =  Ю.Яров

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+%Z5  первые слова в заголовках    29.09.95

			
 
				+

			
 
				+ввод = ВВОД

			
 
				+ведомость = ВЕДОМОСТЬ

			
 
				+вкладыш = ВКЛАДЫШ

			
 
				+выделение = ВЫДЕЛЕНИЕ

			
 
				+границы = ГРАНИЦЫ

			
 
				+график = ГРАФИК

			
 
				+декларация = ДЕКЛАРАЦИЯ

			
 
				+договоры = ДОГОВОРЫ

			
 
				+договор = ДОГОВОР

			
 
				+дополнения = ДОПОЛНЕНИЯ

			
 
				+доходы = ДОХОДЫ

			
 
				+задание = ЗАДАНИЕ

			
 
				+изменения = ИЗМЕНЕНИЯ

			
 
				+инструкция = ИНСТРУКЦИЯ

			
 
				+количество = КОЛИЧЕСТВО

			
 
				+комитет = КОМИТЕТ

			
 
				+комплекс = КОМПЛЕКС

			
 
				+конвенция = КОНВЕНЦИЯ

			
 
				+концепция = КОНЦЕПЦИЯ

			
 
				+координаты = КООРДИНАТЫ

			
 
				+критерии = КРИТЕРИИ

			
 
				+лицензия = ЛИЦЕНЗИЯ

			
 
				+меморандум = МЕМОРАНДУМ

			
 
				+мероприятия = МЕРОПРИЯТИЯ

			
 
				+номенклатура = НОМЕНКЛАТУРА

			
 
				+нормативы отчислений = НОРМАТИВЫ ОТЧИСЛЕНИЙ

			
 
				+нормативы = НОРМАТИВЫ

			
 
				+норма = НОРМА

			
 
				+нормы = НОРМЫ

			
 
				+нота = НОТА

			
 
				+образцы документов = ОБРАЗЦЫ ДОКУМЕНТОВ

			
 
				+обращение = ОБРАЩЕНИЕ

			
 
				+объемы = ОБЪЕМЫ

			
 
				+объем = ОБЪЕМ

			
 
				+описание = ОПИСАНИЕ

			
 
				+оргкомитет = ОРГКОМИТЕТ

			
 
				+перечень = ПЕРЕЧЕНЬ

			
 
				+перечни = ПЕРЕЧНИ

			
 
				+персональный состав = ПЕРСОНАЛЬНЫЙ СОСТАВ

			
 
				+план мероприятий   = ПЛАН МЕРОПРИЯТИЙ

			
 
				+план = ПЛАН

			
 
				+положение = ПОЛОЖЕНИЕ

			
 
				+порядок = ПОРЯДОК

			
 
				+послание = ПОСЛАНИЕ

			
 
				+постановление = ПОСТАНОВЛЕНИЕ

			
 
				+правила = ПРАВИЛА

			
 
				+предложения = ПРЕДЛОЖЕНИЯ

			
 
				+предписание = ПРЕДПИСАНИЕ

			
 
				+приложение = ПРИЛОЖЕНИЕ

			
 
				+прогноз = ПРОГНОЗ

			
 
				+программа = ПРОГРАММА

			
 
				+производство = ПРОИЗВОДСТВО

			
 
				+протокол = ПРОТОКОЛ

			
 
				+размеры = РАЗМЕРЫ

			
 
				+разъяснения = РАЗЪЯСНЕНИЯ

			
 
				+распределение = РАСПРЕДЕЛЕНИЕ

			
 
				+расчет = РАСЧЕТ

			
 
				+регламент = РЕГЛАМЕНТ

			
 
				+режим = РЕЖИМ

			
 
				+сведения = СВЕДЕНИЯ

			
 
				+совместное соглашение = СОВМЕСТНОЕ СОГЛАШЕНИЕ

			
 
				+соглашение = СОГЛАШЕНИЕ

			
 
				+сообщение = СООБЩЕНИЕ

			
 
				+состав = СОСТАВ

			
 
				+списки = СПИСКИ

			
 
				+список = СПИСОК

			
 
				+справка = СПРАВКА

			
 
				+сроки = СРОКИ

			
 
				+ставки = СТАВКИ

			
 
				+статус = СТАТУС

			
 
				+структура = СТРУКТУРА

			
 
				+схема = СХЕМА

			
 
				+текст = ТЕКСТ

			
 
				+указания = УКАЗАНИЯ

			
 
				+условия = УСЛОВИЯ

			
 
				+устав = УСТАВ

			
 
				+функции = ФУНКЦИИ

			
 
				+штрафы = ШТРАФЫ

			
 
				+экспликация = ЭКСПЛИКАЦИЯ

			
 
				+

			
 
				+

			
 
				+%Z6   не используется  03.07.95

			
 
				+

			
 
				+о = $FW

			
 
				+об = $FW

			
 
				+вопросы = $FW

			
 
				+

			
 
				+%Z7  Место подписания в тексте 03.07.95

			
 
				+

			
 
				+москва , кремль                  = $SP1

			
 
				+г .  Москва , кремль             = $SP1

			
 
				+г . Москва                       = $SP2

			
 
				+Москва , Дом Советов России      = $SP3

			
 
				+москва                           = $SP2

			
 
				+$Dig2 часов $Dig2 минут москва , кремль  = $SP1

			
 
				+Санкт - Петербург , Мариин двор  = $SP4

			
 
				+Санкт - Петербург               = $SP5

			
 
				+

			
 
				+%Z15  Место подписания в header (МЕСТО)  03.07.95

			
 
				+

			
 
				+$SP1  = Москва,Кремль

			
 
				+$SP2  = Москва

			
 
				+$SP3  = Москва, Дом Советов России

			
 
				+$SP4  = Санкт-Петербург, Мариинский  дворец

			
 
				+$SP5  = Санкт-Петербург

			
 
				+

			
 
				+%Z9  Расширение выходного номера в тексте    03.07.95

			
 
				+

			
 
				+- рп  = -рп 

			
 
				+- рв  = -рв 

			
 
				+- рз  = -рз 

			
 
				+- р   = -р 

			
 
				+- ргс = -ргс 

			
 
				+- фз  = -фз 

			
 
				+- I   = -I  

			
 
				+- 1   = -1

			
 
				+- к   = -к  

			
 
				+- гд  = -гд 

			
 
				+- сф  = -сф 

			
 
				+- фкз = -фкз 

			
 
				+- ФКЗ = -фкз 

			
 
				+  сф  = _сф  

			
 
				+  гд  = _гд  

			
 
				+

			
 
				+%Z11 Год подписания документа в тексте  03.07.95

			
 
				+

			
 
				+ #y года . = $Y|0

			
 
				+ #y года   = $Y|0

			
 
				+ #y г .    = $Y|0

			
 
				+ #y г      = $Y|0

			
 
				+ #у        = $Y|0

			
 
				+ . #y года = $Y|1

			
 
				+ . #y г .  = $Y|1

			
 
				+ . #y      = $Y|1

			
 
				+

			
 
				+

			
 
				+%Z12  должности в тексте    30.09.95

			
 
				+

			
 
				+президент РФ                        = $P1

			
 
				+президент Российск Федераци         = $P1

			
 
				+президент РСФСР                     = $P2

			
 
				+президент Российск советск # # #    = $P2

			
 
				+вице - президент РФ                   = $P3

			
 
				+вице - президент Российск Федераци    = $P3

			
 
				+государственн советник РСФСР # # # # #  = $P4

			
 
				+государственн секретар РСФСР # # # # #  = $P5

			
 
				+первый заместитель Председателя Центрального банка # # = $PD

			
 
				+перв заместитель председател # # #      = $P6

			
 
				+за Секретаря,член Центральной избирательной комиссии # # = $PT

			
 
				+зам . председат  правительств #        = $P6

			
 
				+замест председ верховн совета РСФСР              = $P10

			
 
				+замест председ совета министров  # # #  = $P8

			
 
				+замест председ правительств #  = $P6

			
 
				+замест председ централь изб ком росс фед  = $PL

			
 
				+замест руков госуд налоговой служ росс фед - гос сов налог служб 1 ранга =  $PG

			
 
				+замест руков госуд налоговой служ росс фед - гос сов налог служб II ранга =  $PG

			
 
				+замест руков госналог РФ - гос сов налог служб II ранга =  $

			
 
				+министр генерал - полковник = $PS

			
 
				+председ верхов совет РСФСР                    = $P9

			
 
				+председ верхов совет СССР                    = $PP

			
 
				+председ верхов суда росс федер                = $PN

			
 
				+председ государ дум федер собран росс федер =  $PJ

			
 
				+председ законод собр санкт - петер = $PA

			
 
				+пpедсед пpавите pосс федеpа               = $P7

			
 
				+председ совет министров - прав росс федер  = $P7

			
 
				+председ совет министров                    = $P7

			
 
				+председ совет федер федер собран росс федер = $PK

			
 
				+председ центральной избирательной комиссии # # = $PF

			
 
				+премьер правите москвы              =  $PR

			
 
				+губернатор санкт - петер              = $P11

			
 
				+И . о . Председателя Центрального банка # # =  $PB

			
 
				+И . о . Секрет Центральной избирательной комиссии # # =  $PK

			
 
				+Секретарь Центральной избирательной комиссии # # =  $PE

			
 
				+Секретарь Плен , судья Верхов Суд росс  федер  = $PO

			
 
				+федерал министр связи росс фед     =   $PI

			
 
				+

			
 
				+//  Max Number  = $PS

			
 
				+

			
 
				+

			
 
				+						      

			
 
				+%Z17  должности в header (ДОЛЖ_АВТ) 30.09.95

			
 
				+

			
 
				+$P1   = Президент РФ

			
 
				+$P2   = Президент РСФСР

			
 
				+$P3   = Вице-президент РФ

			
 
				+$P4   = Государственный советник

			
 
				+$P5   = Государственный секретарь

			
 
				+$P6   = Заместитель Председателя Правительства РФ

			
 
				+$P7   = Председатель Совета Министров РФ

			
 
				+$P8   = Заместитель Председателя Совета Министров РФ

			
 
				+$P9   = Председатель Верховного Совета РСФСР

			
 
				+$P10  = Заместитель Председателя Верховного Совета РСФСР

			
 
				+$P11  = Губернатор Санкт-Петербурга

			
 
				+$PA   = Председатель Законодательного Собрания Санкт-Петербурга

			
 
				+$PB   = И. о. Председателя Центрального банка РФ

			
 
				+$PD   = Первый заместитель Председателя Центрального банка РФ

			
 
				+$PE   = Секретарь Центральной избирательной комиссии РФ

			
 
				+$PF   = Председатель Центральной избирательной комиссии РФ

			
 
				+$PG   = Заместитель Руководителя Госналогслужбы РФ

			
 
				+$PI   = Федеральный министр связи РФ

			
 
				+$PJ   = Председатель Государственной Думы Федерального Собрания РФ

			
 
				+$PK   = Председатель Совета Федерации Федерального Собрания РФ

			
 
				+$PL   = Заместитель Председателя Центральной избирательной комиссии РФ                                          

			
 
				+$PM   = И. о. Секретаря Центральной избирательной комиссии РФ

			
 
				+$PN   = Председатель Верховного Суда РФ

			
 
				+$PO   = Секретарь Пленума, судья Верховного Суда РФ

			
 
				+$PP   = Председатель Верховного Совета СССР

			
 
				+$PR   = Премьер Правительства Москвы

			
 
				+$PS   = Министр генерал-полковник

			
 
				+$PT   = За Секретаря,член Центральной избирательной комиссии РФ

			
 
				+

			
 
				+%Z19  стандарты  оформления приложений (пока не анализир.)  17.07.95

			
 
				+

			
 
				+ЛЕВ_ОТСТУП ТИП_ПРИЛ [АБЗАЦ] [ЗГЛ] ТЕКСТ = $AO

			
 
				+

			
 
				+%Z20  стандарты  оформления документов в graphmat.cfg (DocOrder)  17.07.95

			
 
				+

			
 
				+// Автомат, DocOrder 0

			
 
				+АВТОМАТ  = $DA                {ESC}

			
 
				+

			
 
				+// Дата вверху, DocOrder  1

			
 
				+[НАДПИСКИ] ТИП_ДОК ДАТА N_ТЕКСТ [МЕСТО] [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO0

			
 
				+

			
 
				+// Дата внизу,  DocOrder  2

			
 
				+[НАДПИСКИ] ТИП_ДОК [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* [МЕСТО] ДАТА N_ТЕКСТ = $DO1

			
 
				+

			
 
				+// Закон со статусом, DocOrder  3

			
 
				+ТИП_ДОК ДАТА N_ТЕКСТ [МЕСТО] [ЗГЛ] [ПРИНЯТ] [ОДОБР] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO2

			
 
				+

			
 
				+// советско-американские документы, DocOrder  4

			
 
				+N_ТЕКСТ [ЗГЛ] ДАТА ТЕКСТ = $DO3

			
 
				+

			
 
				+// свободный текст, DocOrder 5

			
 
				+[ЗГЛ] ТЕКСТ   = $DO5

			
 
				+

			
 
				+// Закон "со статусом" + "дата внизу"  , DocOrder  6

			
 
				+ТИП_ДОК [ЗГЛ]  [ПРИНЯТ]  [ОДОБР] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* [МЕСТО] ДАТА N_ТЕКСТ = $DO2

			
 
				+

			
 
				+// Распоряжения Губернатора Санкт-Петербурга 

			
 
				+// "дата вверху" + "с псевдографикой", DocOrder 7

			
 
				+КЛАСС_ДОК СТРОКА("””””") ИНСТ ДАТА N_ТЕКСТ [ЗГЛ_НЕЦЕНТ] ТЕКСТ [ДОЛЖ_АВТ] [АВТОР] [АБЗАЦ("текст доку")] = $DO0

			
 
				+	

			
 
				+// Дата cверху, ссылка в шапке DocOrder  8

			
 
				+КЛАСС_ССЫЛКИ КЛАСС_ДОК N_ТЕКСТ ИНСТ ДАТА [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO4

			
 
				+

			
 
				+

			
 
				+%Z21

			
 
				+

			
 
				+$DO0 $AO   = $DT0

			
 
				+$DO1 $AO   = $DT0

			
 
				+$DO2 $AO   = $DT1

			
 
				+// выписки

			
 
				+$DO4 #     = $DT4

			
 
				+

			
 
				+// свободный текст

			
 
				+$DO5 #     = $DT3

			
 
				+#    #     = $DT3

			
 
				+

			
 
				+%Z22 Порядок и факультативность полей в выходном файле 

			
 
				+// Header-file

			
 
				+// Error-file

			
 
				+// Pss-file

			
 
				+

			
 
				+$DT0 = {"Собрание законодательных актов РФ",

			
 
				+	 "НАЧАЛО ИНСТ КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МЕСТО_ПОДП ПОДПИСЬ ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "ИНСТ КЛАСС_ДОК [ЗГЛ] ТИП_ДОК ДАТА_ПОДП N_ТЕКСТ ПОДПИСЬ",

			
 
				+	 "ДАТА_ПОДП ИНСТ КЛАСС_ДОК ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП НАДПИСКИ",

			
 
				+  // приложение

			
 
				+	 "НАЧАЛО ИНСТ КЛАСС_ДОК ТИП_ДОК ТИП_ПРИЛ ГЛАВ_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ  МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "[ЗГЛ]",

			
 
				+	 "ТИП_ПРИЛ ШАПКА_ПРИЛ ЗГЛ"}

			
 
				+

			
 
				+$DT1 = {"Собрание законодательных актов РФ",

			
 
				+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК [СТАТУС] ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП [ЗГЛ] N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",

			
 
				+	 "ИНСТ КЛАСС_ДОК [CТАТУС] ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",

			
 
				+  // приложение

			
 
				+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК ТИП_ПРИЛ ГЛАВ_ДОК [СТАТУС] ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "[ЗГЛ]",

			
 
				+	 "ТИП_ПРИЛ ШАПКА_ПРИЛ ЗГЛ"}

			
 
				+

			
 
				+$DT2 = {"Российско-американские документы",

			
 
				+	 "НАЧАЛО КВА_ТИП ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ТЕКСТ",

			
 
				+	 "ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ",

			
 
				+	// приложение

			
 
				+	 "НАЧАЛО КВА_ТИП ТИП_ПРИЛ ТИП_ДОК ГЛАВ_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "[ЗГЛ]",

			
 
				+	 "ТИП_ПРИЛ ЗГЛ"}

			
 
				+

			
 
				+$DT3 = {"Свободный текст",

			
 
				+	 "НАЧАЛО ЗГЛ N_ВХОД N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "[ЗГЛ]",

			
 
				+	 "ЗГЛ",

			
 
				+	// приложение

			
 
				+	 "НАЧАЛО ТИП_ПРИЛ ГЛАВ_ДОК ЗГЛ N_ВХОД N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "",

			
 
				+	 ""}

			
 
				+

			
 
				+$DT4 = {"Выписки",

			
 
				+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП ДАТА_ДУБЛЬ ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ [МЕСТО_ПОДП] ДОЛЖ_ПОДП МАССИВ РАЗМЕР КОНЕЦ",

			
 
				+	 "КЛАСС_ДОК ТИП_ДОК ДАТА_ДУБЛЬ ДАТА_ПОДП [ЗГЛ] N_ТЕКСТ ПОДПИСЬ [МЕСТО_ПОДП] ДОЛЖ_ПОДП",

			
 
				+	 "ИНСТ КЛАСС_ДОК ДАТА_ДУБЛЬ ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",

			
 
				+  // приложение

			
 
				+	 "",

			
 
				+	 "",

			
 
				+	 ""}

			
 
				+

			
 
				+

			
 
				+%Z23

			
 
				+

			
 
				+ принят   =  Принят

			
 
				+ одобрен  =  Одобрен

			
 
				+

			
 
				+%Z24   // разделители приложений

			
 
				+

			
 
				+ _____   = $Del1

			
 
				+ #       = $Del2

			
 
				+

			
 
				+%Z0

			
 
				+// Объявление  ограниченных отрезков. Для того, чтобы объявить ограниченный 

			
 
				+// отрезок, необходимо указать из каких символов он должен состоять,  

			
 
				+// и какой максимальной длины он может быть. В символы нельзя включать пробелы и признаки 

			
 
				+// конца строки.

			
 
				+ 1234567890         3  = $Dig3

			
 
				+ 1234567890         4  = $Dig4

			
 
				+ 1234567890-        7  = $Dig6Hyp

			
 
				+ 1234567890/_-ПВнНI 20  = $DNumRich 

			
 
				+

			
 
				+

			
 
				+%Z25   Числовая часть выходного номера документа.

			
 
				+// В Zone 9 описано расширение выходного номера документа.

			
 
				+// Здесь используются ограниченные отрезки (Zone 0).

			
 
				+// Самые свободные - наверх, в противном случае номер будет распознаваться

			
 
				+// не полностью.

			
 
				+

			
 
				+N $DNumRich = $DNum|1

			
 
				+ј $DNumRich = $DNum|1

			
 
				+N $Dig6Hyp  = $DNum|1

			
 
				+N $Dig4     = $DNum|1

			
 
				+за N $Dig4  = $DNum|2

			
 
				+$Dig3       = $DNum|0

			
 
				+

			
 
				+

			
 
				+%Z26  День даты подписания 

			
 
				+ 

			
 
				+ от #d      = $D|1

			
 
				+ #d .       = $D|0

			
 
				+ " #d "     = $D|1

			
 
				+ от q #d "     = $D|2

			
 
				+ от " #d "  = $D|2

			
 
				+ #d         = $D|0

			
 
				+ 

			
--- a/dictonary/Dicts/GraphAn/ross.txt
+++ b/dictonary/Dicts/GraphAn/ross.txt
@@ -0,0 +1,402 @@
 
				+abraham

			
 
				+achim

			
 
				+adam

			
 
				+adolf

			
 
				+agnes

			
 
				+albert

			
 
				+albrecht

			
 
				+alex

			
 
				+alexander

			
 
				+alexandra

			
 
				+alfons

			
 
				+alfred

			
 
				+ali

			
 
				+alice

			
 
				+alma

			
 
				+andi

			
 
				+andrea

			
 
				+andreas

			
 
				+andrew

			
 
				+angela

			
 
				+angelika

			
 
				+anita

			
 
				+anja

			
 
				+anke

			
 
				+ann

			
 
				+anna

			
 
				+anne

			
 
				+anneliese

			
 
				+annette

			
 
				+antje

			
 
				+antoine

			
 
				+anton

			
 
				+antonio

			
 
				+aprilia

			
 
				+armin

			
 
				+arndt

			
 
				+arnold

			
 
				+arthur

			
 
				+astrid

			
 
				+axel

			
 
				+barbara

			
 
				+bartholomäus

			
 
				+bastian

			
 
				+beate

			
 
				+ben

			
 
				+benjamin

			
 
				+benno

			
 
				+bernd

			
 
				+bernhard

			
 
				+bernie

			
 
				+bertha

			
 
				+berthold

			
 
				+berti

			
 
				+bertold

			
 
				+bettina

			
 
				+bill

			
 
				+birgit

			
 
				+björn

			
 
				+bob

			
 
				+bobby

			
 
				+bodo

			
 
				+bogdan

			
 
				+bonifatius

			
 
				+boris

			
 
				+brigitte

			
 
				+bruno

			
 
				+burkhard

			
 
				+butros

			
 
				+bärbel

			
 
				+carina

			
 
				+carl

			
 
				+carlos

			
 
				+carmen

			
 
				+carsten

			
 
				+charles

			
 
				+charlie

			
 
				+charlotte

			
 
				+chris

			
 
				+christian

			
 
				+christiane

			
 
				+christine

			
 
				+christoph

			
 
				+christopher

			
 
				+clara

			
 
				+claudia

			
 
				+claus

			
 
				+clemens

			
 
				+constantin

			
 
				+constanze

			
 
				+cornelia

			
 
				+dagmar

			
 
				+daisy

			
 
				+daniel

			
 
				+daniela

			
 
				+david

			
 
				+dennis

			
 
				+denny

			
 
				+detlef

			
 
				+dieter

			
 
				+dietmar

			
 
				+dietrich

			
 
				+dirk

			
 
				+doris

			
 
				+dragomir

			
 
				+eberhard

			
 
				+eberhardt

			
 
				+ebert

			
 
				+eckard

			
 
				+eckhard

			
 
				+edgar

			
 
				+edith

			
 
				+edmund

			
 
				+eduard

			
 
				+egon

			
 
				+elisabeth

			
 
				+elke

			
 
				+elmar

			
 
				+eloise

			
 
				+else

			
 
				+elvis

			
 
				+emil

			
 
				+emma

			
 
				+ercan

			
 
				+erhard

			
 
				+eric

			
 
				+erich

			
 
				+erika

			
 
				+erwin

			
 
				+eugen

			
 
				+eva

			
 
				+felipiano

			
 
				+felix

			
 
				+ferdinand

			
 
				+fernando

			
 
				+florian

			
 
				+frank

			
 
				+franz

			
 
				+fred

			
 
				+friedrich

			
 
				+fritz

			
 
				+gabi

			
 
				+gabriele

			
 
				+gallus

			
 
				+georg

			
 
				+george

			
 
				+gerd

			
 
				+gerhard

			
 
				+gerhardt

			
 
				+gerhold

			
 
				+gernhardt

			
 
				+gert

			
 
				+gertrud

			
 
				+gisela

			
 
				+giuseppe

			
 
				+gottfried

			
 
				+gregor

			
 
				+gudrun

			
 
				+guido

			
 
				+gustav

			
 
				+götz

			
 
				+günter

			
 
				+günther

			
 
				+hanna

			
 
				+hannelore

			
 
				+hannes

			
 
				+hans

			
 
				+harald

			
 
				+harry

			
 
				+hartmut

			
 
				+heidemarie

			
 
				+heidi

			
 
				+heike

			
 
				+heiko

			
 
				+heiner

			
 
				+heinrich

			
 
				+heinz

			
 
				+helga

			
 
				+helmut

			
 
				+henriette

			
 
				+henry

			
 
				+herbert

			
 
				+heribert

			
 
				+hermann

			
 
				+hilde

			
 
				+hildegard

			
 
				+holger

			
 
				+horst

			
 
				+hubert

			
 
				+hugo

			
 
				+ibrahim

			
 
				+igor

			
 
				+ilse

			
 
				+inge

			
 
				+ingeborg

			
 
				+ingo

			
 
				+ingrid

			
 
				+iris

			
 
				+irmgard

			
 
				+isabel

			
 
				+ivan

			
 
				+izabella

			
 
				+jack

			
 
				+jacques

			
 
				+jakob

			
 
				+james

			
 
				+jan

			
 
				+jean

			
 
				+jeanne

			
 
				+jens

			
 
				+jerry

			
 
				+jim

			
 
				+joachim

			
 
				+jochen

			
 
				+joe

			
 
				+johann

			
 
				+johanna

			
 
				+johannes

			
 
				+john

			
 
				+jon

			
 
				+joschka

			
 
				+josef

			
 
				+josefine

			
 
				+joseph

			
 
				+juan

			
 
				+julia

			
 
				+jutta

			
 
				+jörg

			
 
				+jörn

			
 
				+jürgen

			
 
				+kai

			
 
				+karin

			
 
				+karina

			
 
				+karl

			
 
				+karlheinz

			
 
				+karolin

			
 
				+karoline

			
 
				+karsten

			
 
				+katharina

			
 
				+katja

			
 
				+katrin

			
 
				+kerstin

			
 
				+kevin

			
 
				+kilian

			
 
				+kim

			
 
				+kirsten

			
 
				+klaus

			
 
				+konrad

			
 
				+kurt

			
 
				+käthe

			
 
				+lars

			
 
				+lech

			
 
				+lee

			
 
				+leo

			
 
				+leonhard

			
 
				+linda

			
 
				+lisa

			
 
				+lorenz

			
 
				+lothar

			
 
				+louis

			
 
				+ludwig

			
 
				+lulu

			
 
				+lutz

			
 
				+manfred

			
 
				+manuela

			
 
				+marc

			
 
				+marcel

			
 
				+marco

			
 
				+margaret

			
 
				+margarete

			
 
				+margaretha

			
 
				+margarethe

			
 
				+margot

			
 
				+margret

			
 
				+margritte

			
 
				+maria

			
 
				+marianne

			
 
				+marie

			
 
				+mario

			
 
				+marion

			
 
				+markus

			
 
				+martin

			
 
				+mary

			
 
				+mathias

			
 
				+matthias

			
 
				+matthäus

			
 
				+maurice

			
 
				+max

			
 
				+mechthild

			
 
				+michael

			
 
				+michail

			
 
				+michel

			
 
				+mike

			
 
				+mirjam

			
 
				+mohammed

			
 
				+monica

			
 
				+monika

			
 
				+moritz

			
 
				+naxos

			
 
				+neidhard

			
 
				+nelson

			
 
				+nicolas

			
 
				+nicole

			
 
				+norbert

			
 
				+olaf

			
 
				+olga

			
 
				+oliver

			
 
				+oscar

			
 
				+oskar

			
 
				+otto

			
 
				+patrick

			
 
				+paul

			
 
				+pauline

			
 
				+peter

			
 
				+petra

			
 
				+philip

			
 
				+philipp

			
 
				+pierre

			
 
				+rainer

			
 
				+ralf

			
 
				+ralph

			
 
				+regina

			
 
				+reinhard

			
 
				+reinhardt

			
 
				+reinhold

			
 
				+renate

			
 
				+richard

			
 
				+rita

			
 
				+robert

			
 
				+robin

			
 
				+roger

			
 
				+roland

			
 
				+rolf

			
 
				+romeo

			
 
				+ronald

			
 
				+roy

			
 
				+rudi

			
 
				+rudolf

			
 
				+ruth

			
 
				+rüdiger

			
 
				+sabine

			
 
				+saddam

			
 
				+salman

			
 
				+sandra

			
 
				+sascha

			
 
				+sebastian

			
 
				+siegfried

			
 
				+sigrid

			
 
				+silke

			
 
				+simon

			
 
				+simone

			
 
				+sonja

			
 
				+stefan

			
 
				+steffen

			
 
				+steffi

			
 
				+stephan

			
 
				+steve

			
 
				+susanne

			
 
				+sven

			
 
				+sylvia

			
 
				+theo

			
 
				+theodor

			
 
				+thomas

			
 
				+thorsten

			
 
				+tim

			
 
				+titus

			
 
				+tom

			
 
				+toni

			
 
				+tony

			
 
				+torsten

			
 
				+udo

			
 
				+ulf

			
 
				+uli

			
 
				+ulrich

			
 
				+ulrike

			
 
				+ursula

			
 
				+uta

			
 
				+ute

			
 
				+uwe

			
 
				+vera

			
 
				+verena

			
 
				+vicki

			
 
				+viktoria

			
 
				+viola

			
 
				+vladimir

			
 
				+volker

			
 
				+walter

			
 
				+waltraud

			
 
				+werner

			
 
				+whitney

			
 
				+wilfried

			
 
				+wilhelm

			
 
				+willi

			
 
				+william

			
 
				+willie

			
 
				+willy

			
 
				+winfried

			
 
				+wladimir

			
 
				+wladyslaw

			
 
				+wolfgang

			
 
				+wolfram

			
 
				+wynalda

			
 
				+yvonne

			
--- a/dictonary/Dicts/GraphAn/space.dic
+++ b/dictonary/Dicts/GraphAn/space.dic
@@ -0,0 +1,15 @@
 
				+закон

			
 
				+ПЕРЕЧЕНЬ

			
 
				+СХЕМА

			
 
				+СОСТАВ

			
 
				+ПОЛОЖЕНИЕ

			
 
				+СПИСОК

			
 
				+ВЕДОМОСТЬ

			
 
				+ОБЪЕМ

			
 
				+ПОСТАНОВЛЯЮ

			
 
				+ПОСТАНОВЛЯЕТ

			
 
				+УКАЗ

			
 
				+постановляет

			
 
				+ПОСТАНОВЛЕНИЕ

			
 
				+РАСПОРЯЖЕНИЕ

			
 
				+РЕШЕНИЕ
			
--- a/dictonary/Dicts/Morph/Eng/morph.options
+++ b/dictonary/Dicts/Morph/Eng/morph.options
--- a/dictonary/Dicts/Morph/Rus/morph.options
+++ b/dictonary/Dicts/Morph/Rus/morph.options
--- a/dictonary/Dicts/Morph/egramtab.tab
+++ b/dictonary/Dicts/Morph/egramtab.tab
@@ -0,0 +1,123 @@
 
				+aa 1 ADJECTIVE 

			
 
				+ab 1 ADJECTIVE comp        

			
 
				+ac 1 ADJECTIVE sup 

			
 
				+

			
 
				+// many, more  most

			
 
				+xi 1 NUMERAL

			
 
				+cb 1 NUMERAL comp

			
 
				+cc 1 NUMERAL sup

			
 
				+

			
 
				+         

			
 
				+//  for adjectives like "English", "Russian"

			
 
				+ad 1 ADJECTIVE prop

			
 
				+ba 1 ADVERB

			
 
				+bb 1 ADVERB comp

			
 
				+bc 1 ADVERB sup          

			
 
				+va 1 VERB inf            

			
 
				+vb 1 VERB prsa,sg,3    

			
 
				+vc 1 VERB pasa          

			
 
				+vd 1 VERB pp             

			
 
				+ve 1 VERB ing            

			
 
				+vf 1 MOD inf          

			
 
				+vh 1 MOD pasa         

			
 
				+ta 1 VBE inf          

			
 
				+tb 1 VBE prsa,sg,1    

			
 
				+td 1 VBE prsa,sg,3     

			
 
				+te 1 VBE prsa,pl      

			
 
				+tf 1 VBE ing          

			
 
				+tg 1 VBE pasa,sg      

			
 
				+ti 1 VBE pasa,pl      

			
 
				+tj 1 VBE pp           

			
 
				+tk 1 VBE fut,1,sg

			
 
				+tl 1 VBE fut,sg,pl,1,2,3

			
 
				+tm 1 VBE if,sg,1,2

			
 
				+tn 1 VBE if,sg,3      

			
 
				+to 1 VBE if,pl       

			
 
				+pa 1 PN pers,nom      

			
 
				+pb 1 PN pers,obj

			
 
				+pc 1 PN pers,nom,sg,1

			
 
				+pd 1 PN pers,obj,sg,1

			
 
				+pe 1 PN pers,nom,2      

			
 
				+pf 1 PN pers,obj,2

			
 
				+pg 1 PN pers,nom,sg,3      

			
 
				+ph 1 PN pers,obj,sg,3

			
 
				+pi 1 PN pers,nom,pl,1

			
 
				+pk 1 PN pers,obj,pl,1

			
 
				+pl 1 PN pers,nom,pl,3      

			
 
				+pm 1 PN pers,obj,pl,3

			
 
				+da 1 PN ref,sg

			
 
				+db 1 PN ref,pl       

			
 
				+ea 1 PN_ADJ poss     

			
 
				+eb 1 PN_ADJ poss,pred

			
 
				+ec 1 PN_ADJ dem,sg

			
 
				+ed 1 PN_ADJ dem,pl

			
 
				+ee 1 PN_ADJ 

			
 
				+ef 1 PRON 

			
 
				+

			
 
				+// "table", "town"

			
 
				+na 1 NOUN narr,sg        

			
 
				+nb 1 NOUN narr,pl

			
 
				+

			
 
				+//  analytical possessive

			
 
				+fa 1 NOUN narr,poss

			
 
				+

			
 
				+//  nouns which can be mass  and uncount

			
 
				+// "silk", "clay"

			
 
				+nc 1 NOUN narr,mass,uncount,sg

			
 
				+//  analytical possessive

			
 
				+fb 1 NOUN narr,mass,uncount,poss

			
 
				+

			
 
				+

			
 
				+//  mass nouns 

			
 
				+// "water", "butter"

			
 
				+ne 1 NOUN narr,mass,sg

			
 
				+ng 1 NOUN narr,mass,pl

			
 
				+//  analytical possessive

			
 
				+fc 1 NOUN narr,mass,poss

			
 
				+ 

			
 
				+

			
 
				+//  uncount nouns 

			
 
				+// "acceleration", "activism"

			
 
				+ni 1 NOUN narr,uncount,sg

			
 
				+

			
 
				+

			
 
				+// "John", "James"

			
 
				+oa 1 NOUN prop,m,sg   

			
 
				+ob 1 NOUN prop,m,pl      

			
 
				+

			
 
				+//  analytical possessive

			
 
				+fd 1 NOUN prop,m,poss

			
 
				+

			
 
				+// "Mary", "Jane"

			
 
				+oc 1 NOUN prop,f,sg      

			
 
				+od 1 NOUN prop,f,pl      

			
 
				+//  analytical possessive

			
 
				+fe 1 NOUN prop,f,poss

			
 
				+

			
 
				+// "Glen" "Lee" "Jerry"

			
 
				+oe 1 NOUN prop,m,f,sg    

			
 
				+of 1 NOUN prop,m,f,pl

			
 
				+//  analytical possessive

			
 
				+ff 1 NOUN prop,m,f,poss

			
 
				+

			
 
				+// general geographical names

			
 
				+ga 1 NOUN prop

			
 
				+//  analytical possessive

			
 
				+fg 1 NOUN prop,poss

			
 
				+

			
 
				+xa 1 CONJ               

			
 
				+xb 1 INT              

			
 
				+xc 1 PREP             

			
 
				+xd 1 PART             

			
 
				+xf 1 ARTICLE

			
 
				+xi 1 NUMERAL

			
 
				+xp 1 ORDNUM              

			
 
				+yc 1 POSS plsq

			
 
				+yd 1 POSS plsgs

			
 
				+ //‘¯¥æ¨ «ì®¥ áãé¥áâ¢¨â¥«ì®¥ § £«ãèª , ®¬¥à ª®¤  ¨á¯®«ì§ã¥âáï!

			
 
				+xx 1 NOUN prop sg pl

			
 
				+

			
 
				+// type ancodes 

			
 
				+za 1 * geo        

			
 
				+zb 1 * name

			
 
				+zc 1 * org
			
--- a/dictonary/Dicts/Morph/rgramtab.tab
+++ b/dictonary/Dicts/Morph/rgramtab.tab
@@ -0,0 +1,878 @@
 
				+//  ======  ���������������   ========

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו לףזסךמדמ נמהא

			
 
				+אא A � לנ,וה,טל

			
 
				+אב A � לנ,וה,נה

			
 
				+�פ A � לנ,וה,נה,2

			
 
				+אג A � לנ,וה,הע

			
 
				+אד A � לנ,וה,גם

			
 
				+אה A � לנ,וה,עג

			
 
				+או A � לנ,וה,ןנ

			
 
				+�ץ A � לנ,וה,ןנ,2

			
 
				+אס A � לנ,וה,חג,

			
 
				+אז A � לנ,לם,טל

			
 
				+אח A � לנ,לם,נה

			
 
				+אט A � לנ,לם,הע

			
 
				+אי A � לנ,לם,גם

			
 
				+אך A � לנ,לם,עג

			
 
				+אכ A � לנ,לם,ןנ

			
 
				+אל B � לנ,0

			
 
				+אם B � לנ,וה,0

			
 
				+

			
 
				+// =============  נאחדמגמנם�י  ================

			
 
				+�מ A � לנ,וה,טל,נאחד 

			
 
				+�ן A � לנ,וה,נה,נאחד 

			
 
				+�נ A � לנ,וה,הע,נאחד 

			
 
				+�ס A � לנ,וה,גם,נאחד 

			
 
				+�ע A � לנ,וה,עג,נאחד 

			
 
				+�פ A � לנ,וה,ןנ,נאחד 

			
 
				+�ץ A � לנ,וה,חג,נאחד 

			
 
				+‗ב A � לנ,לם,טל,נאחד 

			
 
				+‗א A � לנ,לם,נה,נאחד

			
 
				+‗ג A � לנ,לם,הע,נאחד

			
 
				+‗ד A � לנ,לם,גם,נאחד

			
 
				+‗ה A � לנ,לם,עג,נאחד

			
 
				+‗ז A � לנ,לם,ןנ,נאחד

			
 
				+

			
 
				+

			
 
				+// =============  אנץאטחל  ================

			
 
				+דמ A � לנ,וה,טל,אנץ 

			
 
				+דן A � לנ,וה,נה,אנץ 

			
 
				+דנ A � לנ,וה,הע,אנץ 

			
 
				+דס A � לנ,וה,גם,אנץ 

			
 
				+דע A � לנ,וה,עג,אנץ 

			
 
				+דף A � לנ,וה,ןנ,אנץ 

			
 
				+דפ A � לנ,לם,טל,אנץ 

			
 
				+דץ A � לנ,לם,נה,אנץ

			
 
				+דצ A � לנ,לם,הע,אנץ

			
 
				+דק A � לנ,לם,גם,אנץ

			
 
				+דר A � לנ,לם,עג,אנץ

			
 
				+דש A � לנ,לם,ןנ,אנץ

			
 
				+

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו לףזסךמדמ-זוםסךמדמ נמהא

			
 
				+

			
 
				+גא E � לנ-זנ,וה,טל

			
 
				+גב E � לנ-זנ,וה,נה

			
 
				+גג E � לנ-זנ,וה,הע

			
 
				+גד E � לנ-זנ,וה,גם

			
 
				+גה E � לנ-זנ,וה,עג

			
 
				+גו E � לנ-זנ,וה,ןנ

			
 
				+גז E � לנ-זנ,לם,טל

			
 
				+גח E � לנ-זנ,לם,נה

			
 
				+גט E � לנ-זנ,לם,הע

			
 
				+גי E � לנ-זנ,לם,גם

			
 
				+גך E � לנ-זנ,לם,עג

			
 
				+גכ E � לנ-זנ,לם,ןנ

			
 
				+גל F � לנ-זנ,0

			
 
				+גם F � לנ-זנ,וה,0

			
 
				+

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו לףזסךמדמ-זוםסךמדמ נמהא (אנץאטחל)

			
 
				+גמ E � אנץ,לנ-זנ,וה,טל

			
 
				+גן E � אנץ,לנ-זנ,וה,נה

			
 
				+גנ E � אנץ,לנ-זנ,וה,הע

			
 
				+גס E � אנץ,לנ-זנ,וה,גם

			
 
				+גע E � אנץ,לנ-זנ,וה,עג

			
 
				+גף E � אנץ,לנ-זנ,וה,ןנ

			
 
				+גפ E � אנץ,לנ-זנ,לם,טל

			
 
				+גץ E � אנץ,לנ-זנ,לם,נה

			
 
				+גצ E � אנץ,לנ-זנ,לם,הע

			
 
				+גק E � אנץ,לנ-זנ,לם,גם

			
 
				+גר E � אנץ,לנ-זנ,לם,עג

			
 
				+גש E � אנץ,לנ-זנ,לם,ןנ

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו זוםסךמדמ נמהא

			
 
				+דא G � זנ,וה,טל

			
 
				+דב G � זנ,וה,נה

			
 
				+דג G � זנ,וה,הע

			
 
				+דד G � זנ,וה,גם

			
 
				+דה G � זנ,וה,עג

			
 
				+דו G � זנ,וה,ןנ

			
 
				+�ק G � זנ,וה,ןנ,2

			
 
				+�ר G � זנ,וה,חג

			
 
				+דז G � זנ,לם,טל

			
 
				+דח G � זנ,לם,נה

			
 
				+דט G � זנ,לם,הע

			
 
				+די G � זנ,לם,גם

			
 
				+דך G � זנ,לם,עג

			
 
				+דכ G � זנ,לם,ןנ

			
 
				+דל H � זנ,0

			
 
				+דם H � זנ,וה,0

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו זוםסךמדמ (אנץאטחל)

			
 
				+�א G � אנץ,זנ,וה,טל

			
 
				+�ב G � אנץ,זנ,וה,נה

			
 
				+�ג G � אנץ,זנ,וה,הע

			
 
				+�ד G � אנץ,זנ,וה,גם

			
 
				+�ה G � אנץ,זנ,וה,עג

			
 
				+�ו G � אנץ,זנ,וה,ןנ

			
 
				+�ז G � אנץ,זנ,לם,טל

			
 
				+�ח G � אנץ,זנ,לם,נה

			
 
				+�ט G � אנץ,זנ,לם,הע

			
 
				+�י G � אנץ,זנ,לם,גם

			
 
				+�ך G � אנץ,זנ,לם,עג

			
 
				+�כ G � אנץ,זנ,לם,ןנ

			
 
				+

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו זוםסךמדמ (נאחדמגמנם�י)

			
 
				+�ל G � נאחד,זנ,וה,טל

			
 
				+�ם G � נאחד,זנ,וה,נה

			
 
				+�מ G � נאחד,זנ,וה,הע

			
 
				+�ן G � נאחד,זנ,וה,גם

			
 
				+�נ G � נאחד,זנ,וה,עג

			
 
				+�ס G � נאחד,זנ,וה,ןנ

			
 
				+�ע G � נאחד,זנ,לם,טל

			
 
				+�ף G � נאחד,זנ,לם,נה

			
 
				+�פ G � נאחד,זנ,לם,הע

			
 
				+�ץ G � נאחד,זנ,לם,גם

			
 
				+�צ G � נאחד,זנ,לם,עג

			
 
				+�ק G � נאחד,זנ,לם,ןנ

			
 
				+

			
 
				+

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו סנוהםודמ נמהא

			
 
				+

			
 
				+וא K � סנ,וה,טל

			
 
				+וב K � סנ,וה,נה

			
 
				+וג K � סנ,וה,הע

			
 
				+וד K � סנ,וה,גם

			
 
				+וה K � סנ,וה,עג

			
 
				+וו K � סנ,וה,ןנ

			
 
				+וז K � סנ,לם,טל

			
 
				+וח K � סנ,לם,נה

			
 
				+וט K � סנ,לם,הע

			
 
				+וי K � סנ,לם,גם

			
 
				+וך K � סנ,לם,עג

			
 
				+וכ K � סנ,לם,ןנ

			
 
				+ול L � סנ,0

			
 
				+ום L � סנ,וה,0

			
 
				+

			
 
				+// טל. �וםטםא

			
 
				+�� K � סנ,וה,נה,אבבנ

			
 
				+

			
 
				+// סףשוסעגטעוכ�ם�ו סנוהםודמ נמהא (נאחדמגמנם�י)

			
 
				+‗ח K � נאחד,סנ,וה,טל

			
 
				+‗ט K � נאחד,סנ,וה,נה

			
 
				+‗ך K � נאחד,סנ,וה,הע

			
 
				+‗כ K � נאחד,סנ,וה,גם

			
 
				+‗ל K � נאחד,סנ,וה,עג

			
 
				+‗ם K � נאחד,סנ,וה,ןנ

			
 
				+‗מ K � נאחד,סנ,לם,טל

			
 
				+‗ן K � נאחד,סנ,לם,נה

			
 
				+‗נ K � נאחד,סנ,לם,הע

			
 
				+‗ס K � נאחד,סנ,לם,גם

			
 
				+‗ע K � נאחד,סנ,לם,עג

			
 
				+‗ף K � נאחד,סנ,לם,ןנ

			
 
				+

			
 
				+// pluralia tantum

			
 
				+טז Q � לם,לם,טל

			
 
				+טח Q � לם,לם,נה

			
 
				+טט Q � לם,לם,הע

			
 
				+טי Q � לם,לם,גם

			
 
				+טך Q � לם,לם,עג

			
 
				+טכ Q � לם,לם,ןנ

			
 
				+טל R � לם,0

			
 
				+

			
 
				+//  אבבנוגטאעףנ�

			
 
				+אמ B � לנ,אבבנ,0,

			
 
				+אן B � לנ,וה,אבבנ,0

			
 
				+אע H � זנ,אבבנ,0

			
 
				+אף H � זנ,וה,אבבנ,0

			
 
				+אצ H � סנ,אבבנ,0

			
 
				+אק H � סנ,וה,אבבנ,0

			
 
				+את R � לם,אבבנ,0

			
 
				+

			
 
				+// טלוםא

			
 
				+

			
 
				+במ C � לנ,טל�,וה,טל

			
 
				+בן C � לנ,טל�,וה,נה

			
 
				+בנ C � לנ,טל�,וה,הע

			
 
				+בס C � לנ,טל�,וה,גם

			
 
				+בע C � לנ,טל�,וה,עג

			
 
				+בף C � לנ,טל�,וה,ןנ

			
 
				+ב� C � לנ,טל�,וה,חג,נאחד

			
 
				+בפ C � לנ,טל�,לם,טל

			
 
				+בץ C � לנ,טל�,לם,נה

			
 
				+בצ C � לנ,טל�,לם,הע

			
 
				+בק C � לנ,טל�,לם,גם

			
 
				+בר C � לנ,טל�,לם,עג

			
 
				+בש C � לנ,טל�,לם,ןנ

			
 
				+

			
 
				+ב� I � לנ,טל�,0

			
 
				+

			
 
				+

			
 
				+ג� E � לנ-זנ,טל�,0

			
 
				+ג� E � לנ-זנ,טל�,וה,טל

			
 
				+ג� E � לנ-זנ,טל�,וה,נה

			
 
				+ג� E � לנ-זנ,טל�,וה,הע

			
 
				+ג� E � לנ-זנ,טל�,וה,גם

			
 
				+ג� E � לנ-זנ,טל�,וה,עג

			
 
				+ג� E � לנ-זנ,טל�,וה,ןנ

			
 
				+ג� E � לנ-זנ,טל�,וה,חג,נאחד

			
 
				+ג� E � לנ-זנ,טל�,לם,טל

			
 
				+ג� E � לנ-זנ,טל�,לם,נה

			
 
				+ג� E � לנ-זנ,טל�,לם,הע

			
 
				+ג� E � לנ-זנ,טל�,לם,גם

			
 
				+ג� E � לנ-זנ,טל�,לם,עג

			
 
				+ג� E � לנ-זנ,טל�,לם,ןנ

			
 
				+

			
 
				+

			
 
				+המ I � זנ,טל�,וה,טל

			
 
				+הן I � זנ,טל�,וה,נה

			
 
				+הנ I � זנ,טל�,וה,הע

			
 
				+הס I � זנ,טל�,וה,גם

			
 
				+הע I � זנ,טל�,וה,עג

			
 
				+הף I � זנ,טל�,וה,ןנ

			
 
				+ה� I � זנ,טל�,וה,חג,נאחד

			
 
				+הפ I � זנ,טל�,לם,טל

			
 
				+הץ I � זנ,טל�,לם,נה

			
 
				+הצ I � זנ,טל�,לם,הע

			
 
				+הק I � זנ,טל�,לם,גם

			
 
				+הר I � זנ,טל�,לם,עג

			
 
				+הש I � זנ,טל�,לם,ןנ

			
 
				+

			
 
				+

			
 
				+ה� I � זנ,טל�,0

			
 
				+

			
 
				+

			
 
				+

			
 
				+// לףזסךטו מעקוסעגא 

			
 
				+

			
 
				+�א Q � לנ,מעק,וה,טל,

			
 
				+�ב Q � לנ,מעק,וה,נה,

			
 
				+�ג Q � לנ,מעק,וה,הע,

			
 
				+�ד Q � לנ,מעק,וה,גם,

			
 
				+�ה Q � לנ,מעק,וה,עג,

			
 
				+�ו Q � לנ,מעק,וה,ןנ,

			
 
				+�ם Q � לנ,מעק,לם,טל,

			
 
				+�מ Q � לנ,מעק,לם,נה,

			
 
				+�ן Q � לנ,מעק,לם,הע,

			
 
				+�נ Q � לנ,מעק,לם,גם,

			
 
				+�ס Q � לנ,מעק,לם,עג,

			
 
				+�ע Q � לנ,מעק,לם,ןנ,

			
 
				+

			
 
				+// זוםסךטו מעקוסעגא 

			
 
				+

			
 
				+�ז Q � זנ,מעק,וה,טל,

			
 
				+�ח Q � זנ,מעק,וה,נה,

			
 
				+�ט Q � זנ,מעק,וה,הע,

			
 
				+�ך Q � זנ,מעק,וה,גם,

			
 
				+�כ Q � זנ,מעק,וה,עג,

			
 
				+�ל Q � זנ,מעק,וה,ןנ,

			
 
				+�ף Q � זנ,מעק,לם,טל,

			
 
				+�פ Q � זנ,מעק,לם,נה,

			
 
				+�ץ Q � זנ,מעק,לם,הע,

			
 
				+�צ Q � זנ,מעק,לם,גם,

			
 
				+�ק Q � זנ,מעק,לם,עג,

			
 
				+�ר Q � זנ,מעק,לם,ןנ,

			
 
				+

			
 
				+

			
 
				+

			
 
				+// לףזסךטו מעקוסעגא  (נאחד.)

			
 
				+

			
 
				+�א Q � לנ,מעק,נאחד,וה,טל,

			
 
				+�ב Q � לנ,מעק,נאחד,וה,נה,

			
 
				+�ג Q � לנ,מעק,נאחד,וה,הע,

			
 
				+�ד Q � לנ,מעק,נאחד,וה,גם,

			
 
				+�ה Q � לנ,מעק,נאחד,וה,עג,

			
 
				+�ו Q � לנ,מעק,נאחד,וה,ןנ,

			
 
				+�ם Q � לנ,מעק,נאחד,לם,טל,

			
 
				+�מ Q � לנ,מעק,נאחד,לם,נה,

			
 
				+�ן Q � לנ,מעק,נאחד,לם,הע,

			
 
				+�נ Q � לנ,מעק,נאחד,לם,גם,

			
 
				+�ס Q � לנ,מעק,נאחד,לם,עג,

			
 
				+�ע Q � לנ,מעק,נאחד,לם,ןנ,

			
 
				+

			
 
				+// זוםסךטו מעקוסעגא  (נאחד.)

			
 
				+

			
 
				+�ז Q � זנ,מעק,נאחד,וה,טל,

			
 
				+�ח Q � זנ,מעק,נאחד,וה,נה,

			
 
				+�ט Q � זנ,מעק,נאחד,וה,הע,

			
 
				+�ך Q � זנ,מעק,נאחד,וה,גם,

			
 
				+�כ Q � זנ,מעק,נאחד,וה,עג,

			
 
				+�ל Q � זנ,מעק,נאחד,וה,ןנ,

			
 
				+�ף Q � זנ,מעק,נאחד,לם,טל,

			
 
				+�פ Q � זנ,מעק,נאחד,לם,נה,

			
 
				+�ץ Q � זנ,מעק,נאחד,לם,הע,

			
 
				+�צ Q � זנ,מעק,נאחד,לם,גם,

			
 
				+�ק Q � זנ,מעק,נאחד,לם,עג,

			
 
				+�ר Q � זנ,מעק,נאחד,לם,ןנ,

			
 
				+

			
 
				+

			
 
				+

			
 
				+//  ======  ��������������   ========

			
 
				+

			
 
				+יא Y � לנ,וה,טל,מה,םמ

			
 
				+יב Y � לנ,וה,נה,מה,םמ

			
 
				+יג Y � לנ,וה,הע,מה,םמ

			
 
				+יד Y � לנ,וה,גם,מה

			
 
				+�ש Y � לנ,וה,גם,םמ

			
 
				+יה Y � לנ,וה,עג,מה,םמ

			
 
				+יו Y � לנ,וה,ןנ,מה,םמ

			
 
				+יז Y � זנ,וה,טל,מה,םמ

			
 
				+יח Y � זנ,וה,נה,מה,םמ

			
 
				+יט Y � זנ,וה,הע,מה,םמ

			
 
				+יי Y � זנ,וה,גם,מה,םמ

			
 
				+יך Y � זנ,וה,עג,מה,םמ

			
 
				+יכ Y � זנ,וה,ןנ,מה,םמ

			
 
				+יל Y � סנ,וה,טל,מה,םמ

			
 
				+ים Y � סנ,וה,נה,מה,םמ

			
 
				+ימ Y � סנ,וה,הע,מה,םמ

			
 
				+ין Y � סנ,וה,גם,מה,םמ

			
 
				+ינ Y � סנ,וה,עג,מה,םמ

			
 
				+יס Y � סנ,וה,ןנ,מה,םמ

			
 
				+יע Y � לם,טל,מה,םמ

			
 
				+יף Y � לם,נה,מה,םמ

			
 
				+יפ Y � לם,הע,מה,םמ

			
 
				+יץ Y � לם,גם,מה

			
 
				+�� Y � לם,גם,םמ

			
 
				+יצ Y � לם,עג,מה,םמ

			
 
				+יק Y � לם,ןנ,מה,םמ

			
 
				+יר Y ��_���� לנ,וה,מה,םמ

			
 
				+יש Y ��_���� זנ,וה,מה,םמ

			
 
				+י� Y ��_���� סנ,וה,מה,םמ

			
 
				+י‎ Y ��_���� לם,מה,םמ

			
 
				+י‏ Y � סנאגם,מה,םמ

			
 
				+ית Y � סנאגם,2,מה,םמ

			
 
				+י� Y � סנאגם,מה,םמ,נאחד

			
 
				+י� Z � 0,מה,םמ

			
 
				+

			
 
				+//== ןנוגמסץמהםא� סעוןום� ןנטכאדאעוכ�םמדמ

			
 
				+טא Y � ןנוג,לנ,וה,טל,מה,םמ

			
 
				+טב Y � ןנוג,לנ,וה,נה,מה,םמ

			
 
				+טג Y � ןנוג,לנ,וה,הע,מה,םמ

			
 
				+טד Y � ןנוג,לנ,וה,גם,מה

			
 
				+טה Y � ןנוג,לנ,וה,גם,םמ

			
 
				+טו Y � ןנוג,לנ,וה,עג,מה,םמ

			
 
				+�ב Y � ןנוג,לנ,וה,ןנ,מה,םמ

			
 
				+�ג Y � ןנוג,זנ,וה,טל,מה,םמ

			
 
				+�ד Y � ןנוג,זנ,וה,נה,מה,םמ

			
 
				+�ה Y � ןנוג,זנ,וה,הע,מה,םמ

			
 
				+�ו Y � ןנוג,זנ,וה,גם,מה,םמ

			
 
				+�ז Y � ןנוג,זנ,וה,עג,מה,םמ

			
 
				+�ח Y � ןנוג,זנ,וה,ןנ,מה,םמ

			
 
				+טם Y � ןנוג,סנ,וה,טל,מה,םמ

			
 
				+טמ Y � ןנוג,סנ,וה,נה,מה,םמ

			
 
				+טן Y � ןנוג,סנ,וה,הע,מה,םמ

			
 
				+טנ Y � ןנוג,סנ,וה,גם,מה,םמ

			
 
				+טס Y � ןנוג,סנ,וה,עג,מה,םמ

			
 
				+טע Y � ןנוג,סנ,וה,ןנ,מה,םמ

			
 
				+טף Y � ןנוג,לם,טל,מה,םמ

			
 
				+טפ Y � ןנוג,לם,נה,מה,םמ

			
 
				+טץ Y � ןנוג,לם,הע,מה,םמ

			
 
				+טצ Y � ןנוג,לם,גם,מה

			
 
				+טק Y � ןנוג,לם,גם,םמ

			
 
				+טר Y � ןנוג,לם,עג,מה,םמ

			
 
				+טש Y � ןנוג,לם,ןנ,מה,םמ

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+//  ========   בוחכטקם�ו דכאדמכ�  ============

			
 
				+// ןמהףלאע�ס�,סגועאע�

			
 
				+םנ a ��������� בוחכ

			
 
				+// ןמהףלאועס�

			
 
				+םס a � בוחכ,בףה 

			
 
				+// ןמהףלאכמס�,סגועאכמ

			
 
				+םע a � בוחכ,ןנר

			
 
				+// סגועאוע

			
 
				+םף a � בוחכ,םסע

			
 
				+

			
 
				+

			
 
				+

			
 
				+ךא a ��������� הסע

			
 
				+

			
 
				+//  ========================================

			
 
				+//  ========   כטקם�ו פמנל� דכאדמכא  ============

			
 
				+

			
 
				+ךב a � הסע,םסע,1כ,וה

			
 
				+ךג a � הסע,םסע,1כ,לם

			
 
				+ךד a � הסע,םסע,2כ,וה

			
 
				+ךה a � הסע,םסע,2כ,לם

			
 
				+ךו a � הסע,םסע,3כ,וה

			
 
				+ךז a � הסע,םסע,3כ,לם

			
 
				+ךח a � הסע,ןנר,לנ,וה

			
 
				+ךט a � הסע,ןנר,זנ,וה

			
 
				+ךי a � הסע,ןנר,סנ,וה

			
 
				+ךך a � הסע,ןנר,לם

			
 
				+ךן a � הסע,בףה,1כ,וה

			
 
				+ךנ a � הסע,בףה,1כ,לם

			
 
				+ךס a � הסע,בףה,2כ,וה

			
 
				+ךע a � הסע,בףה,2כ,לם

			
 
				+ךף a � הסע,בףה,3כ,וה

			
 
				+ךפ a � הסע,בףה,3כ,לם

			
 
				+

			
 
				+

			
 
				+// נאחדמגמנם�ו פמנל�: "כמזף", "כמזטל","ה�רףע", "כמזףע", "כמזאע", "המזהףס�"

			
 
				+�ת a � הסע,םסע,1כ,וה,נאחד

			
 
				+�� a � הסע,םסע,1כ,לם,נאחד

			
 
				+�‎ a � הסע,םסע,2כ,וה,נאחד

			
 
				+�‏ a � הסע,םסע,2כ,לם,נאחד

			
 
				+�� a � הסע,םסע,3כ,וה,נאחד

			
 
				+ך‏ a � הסע,םסע,3כ,לם,נאחד

			
 
				+ך� a � הסע,ןנר,לם,נאחד

			
 
				+

			
 
				+ך‎ a � הסע,בףה,1כ,וה,נאחד

			
 
				+�א a � הסע,בףה,1כ,לם,נאחד

			
 
				+�ב a � הסע,בףה,2כ,וה,נאחד

			
 
				+�ג a � הסע,בףה,2כ,לם,נאחד

			
 
				+�ד a � הסע,בףה,3כ,וה,נאחד

			
 
				+�ה a � הסע,בףה,3כ,לם,נאחד

			
 
				+

			
 
				+

			
 
				+

			
 
				+// אנץאטקם�ו פמנל�: "ףךאחףוע", 

			
 
				+�ו a � הסע,םסע,1כ,וה,אנץ

			
 
				+�ז a � הסע,םסע,1כ,לם,אנץ

			
 
				+�ח a � הסע,םסע,2כ,וה,אנץ

			
 
				+�ט a � הסע,םסע,2כ,לם,אנץ

			
 
				+�י a � הסע,םסע,3כ,וה,אנץ

			
 
				+�ך a � הסע,םסע,3כ,לם,אנץ

			
 
				+�כ a � הסע,ןנר,לם,אנץ

			
 
				+

			
 
				+�ל a � הסע,בףה,1כ,וה,אנץ

			
 
				+�ם a � הסע,בףה,1כ,לם,אנץ

			
 
				+�מ a � הסע,בףה,2כ,וה,אנץ

			
 
				+�ן a � הסע,בףה,2כ,לם,אנץ

			
 
				+�נ a � הסע,בףה,3כ,וה,אנץ

			
 
				+�ס a � הסע,בףה,3כ,לם,אנץ

			
 
				+

			
 
				+

			
 
				+// ===================================================

			
 
				+// ==============   ������������ ====================

			
 
				+// ===================================================

			
 
				+ךם a ������������ הסע,םסע

			
 
				+ךמ a ������������ הסע,ןנר

			
 
				+

			
 
				+// ===================================================

			
 
				+// ==============   ������������ (אנץאטחל)  ==========

			
 
				+// ===================================================

			
 
				+�ע a ������������ הסע,םסע,אנץ

			
 
				+�ף a ������������ הסע,ןנר,אנץ

			
 
				+

			
 
				+//===================================================

			
 
				+// ==============   ���������    ====================

			
 
				+// "םו בףהול זו חאב�גאע� םארטץ ןנוהךמג!"

			
 
				+

			
 
				+םן a � הסע,ןגכ,1כ,לם

			
 
				+ךת a � הסע,ןגכ,1כ,וה

			
 
				+ךכ a � הסע,ןגכ,2כ,וה

			
 
				+ךל a � הסע,ןגכ,2כ,לם

			
 
				+

			
 
				+

			
 
				+//  "ןנמשוגאיעו"

			
 
				+כ� a � הסע,ןגכ,2כ,וה,נאחד

			
 
				+ך� a � הסע,ןגכ,2כ,לם,נאחד

			
 
				+

			
 
				+// סל.

			
 
				+�‏ a � הסע,ןגכ,2כ,וה,אבבנ 

			
 
				+

			
 
				+//  "טח�הט", "טח�הטעו"

			
 
				+פת a � הסע,ןגכ,2כ,וה,אנץ

			
 
				+פ‏ a � הסע,ןגכ,2כ,לם,אנץ

			
 
				+

			
 
				+

			
 
				+//  הויסעגטעוכ�םמו ןנטקאסעטו םאסעמ�שודמ גנולוםט

			
 
				+כא a ��������� מה,םמ,םסע,הסע,וה,לנ,טל

			
 
				+כב a ��������� מה,םמ,םסע,הסע,וה,לנ,נה

			
 
				+כג a ��������� מה,םמ,םסע,הסע,וה,לנ,הע

			
 
				+כד a ��������� מה,םסע,הסע,וה,לנ,גם

			
 
				+�א a ��������� םמ,םסע,הסע,וה,לנ,גם

			
 
				+כה a ��������� מה,םמ,םסע,הסע,וה,לנ,עג

			
 
				+כו a ��������� מה,םמ,םסע,הסע,וה,לנ,ןנ

			
 
				+כח a ��������� מה,םמ,םסע,הסע,וה,זנ,טל

			
 
				+כט a ��������� מה,םמ,םסע,הסע,וה,זנ,נה

			
 
				+כי a ��������� מה,םמ,םסע,הסע,וה,זנ,הע

			
 
				+כך a ��������� מה,םמ,םסע,הסע,וה,זנ,גם

			
 
				+ככ a ��������� מה,םמ,םסע,הסע,וה,זנ,עג

			
 
				+כל a ��������� מה,םמ,םסע,הסע,וה,זנ,ןנ

			
 
				+כמ a ��������� מה,םמ,םסע,הסע,וה,סנ,טל

			
 
				+כן a ��������� מה,םמ,םסע,הסע,וה,סנ,נה

			
 
				+כנ a ��������� מה,םמ,םסע,הסע,וה,סנ,הע

			
 
				+כס a ��������� מה,םמ,םסע,הסע,וה,סנ,גם

			
 
				+כע a ��������� מה,םמ,םסע,הסע,וה,סנ,עג

			
 
				+כף a ��������� מה,םמ,םסע,הסע,וה,סנ,ןנ

			
 
				+כץ a ��������� מה,םמ,םסע,הסע,לם,טל

			
 
				+כצ a ��������� מה,םמ,םסע,הסע,לם,נה

			
 
				+כק a ��������� מה,םמ,םסע,הסע,לם,הע

			
 
				+כר a ��������� מה,םסע,הסע,לם,גם

			
 
				+�י a ��������� םמ,םסע,הסע,לם,גם

			
 
				+כש a ��������� מה,םמ,םסע,הסע,לם,עג

			
 
				+כ� a ��������� מה,םמ,םסע,הסע,לם,ןנ

			
 
				+

			
 
				+//  הויסעגטעוכ�םמו ןנטקאסעטו ןנמרוהרודמ גנולוםט

			
 
				+לא a ��������� מה,םמ,ןנר,הסע,וה,לנ,טל

			
 
				+לב a ��������� מה,םמ,ןנר,הסע,וה,לנ,נה

			
 
				+לג a ��������� מה,םמ,ןנר,הסע,וה,לנ,הע

			
 
				+לד a ��������� מה,ןנר,הסע,וה,לנ,גם

			
 
				+�ב a ��������� םמ,ןנר,הסע,וה,לנ,גם

			
 
				+לה a ��������� מה,םמ,ןנר,הסע,וה,לנ,עג

			
 
				+לו a ��������� מה,םמ,ןנר,הסע,וה,לנ,ןנ

			
 
				+לח a ��������� מה,םמ,ןנר,הסע,וה,זנ,טל

			
 
				+לט a ��������� מה,םמ,ןנר,הסע,וה,זנ,נה

			
 
				+לי a ��������� מה,םמ,ןנר,הסע,וה,זנ,הע

			
 
				+לך a ��������� מה,םמ,ןנר,הסע,וה,זנ,גם

			
 
				+לכ a ��������� מה,םמ,ןנר,הסע,וה,זנ,עג

			
 
				+לל a ��������� מה,םמ,ןנר,הסע,וה,זנ,ןנ

			
 
				+למ a ��������� מה,םמ,ןנר,הסע,וה,סנ,טל

			
 
				+לן a ��������� מה,םמ,ןנר,הסע,וה,סנ,נה

			
 
				+לנ a ��������� מה,םמ,ןנר,הסע,וה,סנ,הע

			
 
				+לס a ��������� מה,םמ,ןנר,הסע,וה,סנ,גם

			
 
				+לע a ��������� מה,םמ,ןנר,הסע,וה,סנ,עג

			
 
				+לף a ��������� מה,םמ,ןנר,הסע,וה,סנ,ןנ

			
 
				+לץ a ��������� מה,םמ,ןנר,הסע,לם,טל

			
 
				+לצ a ��������� מה,םמ,ןנר,הסע,לם,נה

			
 
				+לק a ��������� מה,םמ,ןנר,הסע,לם,הע

			
 
				+לר a ��������� מה,ןנר,הסע,לם,גם

			
 
				+�ך a ��������� םמ,ןנר,הסע,לם,גם

			
 
				+לש a ��������� מה,םמ,ןנר,הסע,לם,עג

			
 
				+ל� a ��������� מה,םמ,ןנר,הסע,לם,ןנ

			
 
				+

			
 
				+//  סענאהאעוכ�םמו ןנטקאסעטו םאסעמ�שודמ גנולוםט

			
 
				+ןא b ��������� מה,םמ,םסע,סענ,וה,לנ,טל

			
 
				+ןב b ��������� מה,םמ,םסע,סענ,וה,לנ,נה

			
 
				+ןג b ��������� מה,םמ,םסע,סענ,וה,לנ,הע

			
 
				+ןד b ��������� מה,םסע,סענ,וה,לנ,גם

			
 
				+�ד b ��������� םמ,םסע,סענ,וה,לנ,גם

			
 
				+ןה b ��������� מה,םמ,םסע,סענ,וה,לנ,עג

			
 
				+ןו b ��������� מה,םמ,םסע,סענ,וה,לנ,ןנ

			
 
				+ןז b ��_��������� מה,םמ,םסע,סענ,וה,לנ

			
 
				+ןח b ��������� מה,םמ,םסע,סענ,וה,זנ,טל

			
 
				+ןט b ��������� מה,םמ,םסע,סענ,וה,זנ,נה

			
 
				+ןי b ��������� מה,םמ,םסע,סענ,וה,זנ,הע

			
 
				+ןך b ��������� מה,םמ,םסע,סענ,וה,זנ,גם

			
 
				+ןכ b ��������� מה,םמ,םסע,סענ,וה,זנ,עג

			
 
				+ןל b ��������� מה,םמ,םסע,סענ,וה,זנ,ןנ

			
 
				+ןם b ��_��������� מה,םמ,םסע,סענ,וה,זנ

			
 
				+ןמ b ��������� מה,םמ,םסע,סענ,וה,סנ,טל

			
 
				+ןן b ��������� מה,םמ,םסע,סענ,וה,סנ,נה

			
 
				+ןנ b ��������� מה,םמ,םסע,סענ,וה,סנ,הע

			
 
				+ןס b ��������� מה,םמ,םסע,סענ,וה,סנ,גם

			
 
				+ןע b ��������� מה,םמ,םסע,סענ,וה,סנ,עג

			
 
				+ןף b ��������� מה,םמ,םסע,סענ,וה,סנ,ןנ

			
 
				+ןפ b ��_��������� מה,םמ,םסע,סענ,וה,סנ

			
 
				+ןץ b ��������� מה,םמ,םסע,סענ,לם,טל

			
 
				+ןצ b ��������� מה,םמ,םסע,סענ,לם,נה

			
 
				+ןק b ��������� מה,םמ,םסע,סענ,לם,הע

			
 
				+ןר b ��������� מה,םסע,סענ,לם,גם

			
 
				+�ל b ��������� םמ,םסע,סענ,לם,גם

			
 
				+ןש b ��������� מה,םמ,םסע,סענ,לם,עג

			
 
				+ן� b ��������� מה,םמ,םסע,סענ,לם,ןנ

			
 
				+ן‎ b ��_��������� מה,םמ,םסע,סענ,לם

			
 
				+

			
 
				+//  סענאהאעוכ�םמו ןנטקאסעטו ןנמרוהרודמ גנולוםט

			
 
				+סא b ��������� מה,םמ,ןנר,סענ,וה,לנ,טל

			
 
				+סב b ��������� מה,םמ,ןנר,סענ,וה,לנ,נה

			
 
				+סג b ��������� מה,םמ,ןנר,סענ,וה,לנ,הע

			
 
				+סד b ��������� מה,ןנר,סענ,וה,לנ,גם

			
 
				+�ו b ��������� םמ,ןנר,סענ,וה,לנ,גם

			
 
				+סה b ��������� מה,םמ,ןנר,סענ,וה,לנ,עג

			
 
				+סו b ��������� מה,םמ,ןנר,סענ,וה,לנ,ןנ

			
 
				+סז b ��_��������� מה,םמ,ןנר,סענ,וה,לנ

			
 
				+סח b ��������� מה,םמ,ןנר,סענ,וה,זנ,טל

			
 
				+סט b ��������� מה,םמ,ןנר,סענ,וה,זנ,נה

			
 
				+סי b ��������� מה,םמ,ןנר,סענ,וה,זנ,הע

			
 
				+סך b ��������� מה,םמ,ןנר,סענ,וה,זנ,גם

			
 
				+סכ b ��������� מה,םמ,ןנר,סענ,וה,זנ,עג

			
 
				+סל b ��������� מה,םמ,ןנר,סענ,וה,זנ,ןנ

			
 
				+סם b ��_��������� מה,םמ,ןנר,סענ,וה,זנ

			
 
				+סמ b ��������� מה,םמ,ןנר,סענ,וה,סנ,טל

			
 
				+סן b ��������� מה,םמ,ןנר,סענ,וה,סנ,נה

			
 
				+סנ b ��������� מה,םמ,ןנר,סענ,וה,סנ,הע

			
 
				+סס b ��������� מה,םמ,ןנר,סענ,וה,סנ,גם

			
 
				+סע b ��������� מה,םמ,ןנר,סענ,וה,סנ,עג

			
 
				+סף b ��������� מה,םמ,ןנר,סענ,וה,סנ,ןנ

			
 
				+ספ b ��_��������� מה,םמ,ןנר,סענ,וה,סנ

			
 
				+סץ b ��������� מה,םמ,ןנר,סענ,לם,טל

			
 
				+סצ b ��������� מה,םמ,ןנר,סענ,לם,נה

			
 
				+סק b ��������� מה,םמ,ןנר,סענ,לם,הע

			
 
				+סר b ��������� מה,ןנר,סענ,לם,גם

			
 
				+�מ b ��������� םמ,ןנר,סענ,לם,גם

			
 
				+סש b ��������� מה,םמ,ןנר,סענ,לם,עג

			
 
				+ס� b ��������� מה,םמ,ןנר,סענ,לם,ןנ

			
 
				+ס‎ b ��_��������� מה,םמ,ןנר,סענ,לם

			
 
				+

			
 
				+

			
 
				+קא e �� 1כ,וה,טל

			
 
				+קב e �� 1כ,וה,נה

			
 
				+קג e �� 1כ,וה,הע

			
 
				+קד e �� 1כ,וה,גם

			
 
				+קה e �� 1כ,וה,עג

			
 
				+קו e �� 1כ,וה,ןנ

			
 
				+קז e �� 1כ,לם,טל

			
 
				+קח e �� 1כ,לם,נה

			
 
				+קט e �� 1כ,לם,הע

			
 
				+קי e �� 1כ,לם,גם

			
 
				+קך e �� 1כ,לם,עג

			
 
				+קכ e �� 1כ,לם,ןנ

			
 
				+קל e �� 2כ,וה,טל

			
 
				+קם e �� 2כ,וה,נה

			
 
				+קמ e �� 2כ,וה,הע

			
 
				+קן e �� 2כ,וה,גם

			
 
				+קנ e �� 2כ,וה,עג

			
 
				+קס e �� 2כ,וה,ןנ

			
 
				+קע e �� 2כ,לם,טל

			
 
				+קף e �� 2כ,לם,נה

			
 
				+קפ e �� 2כ,לם,הע

			
 
				+קץ e �� 2כ,לם,גם

			
 
				+קצ e �� 2כ,לם,עג

			
 
				+קק e �� 2כ,לם,ןנ

			
 
				+רא e �� 3כ,לנ,וה,טל

			
 
				+רב e �� 3כ,לנ,וה,נה

			
 
				+רג e �� 3כ,לנ,וה,הע

			
 
				+רד e �� 3כ,לנ,וה,גם

			
 
				+רה e �� 3כ,לנ,וה,עג

			
 
				+רו e �� 3כ,לנ,וה,ןנ

			
 
				+רז e �� 3כ,זנ,וה,טל

			
 
				+רח e �� 3כ,זנ,וה,נה

			
 
				+רט e �� 3כ,זנ,וה,הע

			
 
				+רי e �� 3כ,זנ,וה,גם

			
 
				+רך e �� 3כ,זנ,וה,עג

			
 
				+רכ e �� 3כ,זנ,וה,ןנ

			
 
				+רל e �� 3כ,סנ,וה,טל

			
 
				+רם e �� 3כ,סנ,וה,נה

			
 
				+רמ e �� 3כ,סנ,וה,הע

			
 
				+רן e �� 3כ,סנ,וה,גם

			
 
				+רנ e �� 3כ,סנ,וה,עג

			
 
				+רס e �� 3כ,סנ,וה,ןנ

			
 
				+רע e �� 3כ,לם,טל

			
 
				+רף e �� 3כ,לם,נה

			
 
				+רפ e �� 3כ,לם,הע

			
 
				+רץ e �� 3כ,לם,גם

			
 
				+רצ e �� 3כ,לם,עג

			
 
				+רק e �� 3כ,לם,ןנ

			
 
				+שא e �� לנ,וה,טל

			
 
				+שב e �� לנ,וה,נה

			
 
				+שג e �� לנ,וה,הע

			
 
				+שד e �� לנ,וה,גם

			
 
				+שה e �� לנ,וה,עג

			
 
				+שו e �� לנ,וה,ןנ

			
 
				+שז e �� זנ,וה,טל

			
 
				+שח e �� זנ,וה,נה

			
 
				+שט e �� זנ,וה,הע

			
 
				+שי e �� זנ,וה,גם

			
 
				+שך e �� זנ,וה,עג

			
 
				+שכ e �� זנ,וה,ןנ

			
 
				+של e �� סנ,וה,טל

			
 
				+שם e �� סנ,וה,נה

			
 
				+שמ e �� סנ,וה,הע

			
 
				+שן e �� סנ,וה,גם

			
 
				+שנ e �� סנ,וה,עג

			
 
				+שס e �� סנ,וה,ןנ

			
 
				+שע e �� לם,טל

			
 
				+שף e �� לם,נה

			
 
				+שפ e �� לם,הע

			
 
				+שץ e �� לם,גם

			
 
				+שצ e �� לם,עג

			
 
				+שק e �� לם,ןנ

			
 
				+שש e �� נה

			
 
				+ש� e �� הע

			
 
				+ש‎ e �� גם

			
 
				+ש‏ e �� עג

			
 
				+ש� e �� ןנ

			
 
				+�א f ��-� לנ,וה,טל,מה,םמ

			
 
				+�ב f ��-� לנ,וה,נה,מה,םמ

			
 
				+�ג f ��-� לנ,וה,הע,מה,םמ

			
 
				+�ד f ��-� לנ,וה,גם,םמ

			
 
				+�פ f ��-� לנ,וה,גם,מה

			
 
				+�ה f ��-� לנ,וה,עג,מה,םמ

			
 
				+�ו f ��-� לנ,וה,ןנ,מה,םמ

			
 
				+�ז f ��-� זנ,וה,טל,מה,םמ

			
 
				+�ח f ��-� זנ,וה,נה,מה,םמ

			
 
				+�ט f ��-� זנ,וה,הע,מה,םמ

			
 
				+�י f ��-� זנ,וה,גם,מה,םמ

			
 
				+�ך f ��-� זנ,וה,עג,מה,םמ

			
 
				+�כ f ��-� זנ,וה,ןנ,מה,םמ

			
 
				+�ל f ��-� סנ,וה,טל,מה,םמ

			
 
				+�ם f ��-� סנ,וה,נה,מה,םמ

			
 
				+�מ f ��-� סנ,וה,הע,מה,םמ

			
 
				+�ן f ��-� סנ,וה,גם,מה,םמ

			
 
				+�נ f ��-� סנ,וה,עג,מה,םמ

			
 
				+�ס f ��-� סנ,וה,ןנ,מה,םמ

			
 
				+�ע f ��-� לם,טל,מה,םמ

			
 
				+�ף f ��-� לם,נה,מה,םמ

			
 
				+�פ f ��-� לם,הע,מה,םמ

			
 
				+�ץ f ��-� לם,גם,םמ

			
 
				+�ץ f ��-� לם,גם,מה

			
 
				+�צ f ��-� לם,עג,מה,םמ

			
 
				+�ק f ��-� לם,ןנ,מה,םמ

			
 
				+�ר f ��-� 0,מה,םמ

			
 
				+�ש g ��-����� וה,נה

			
 
				+�� g ��-����� וה,הע

			
 
				+�‎ g ��-����� וה,גם

			
 
				+�‏ g ��-����� וה,עג

			
 
				+

			
 
				+// ‎עמ אםאכ. פמנלא "םו מ  ךמל"

			
 
				+�� g ��-����� וה,ןנ

			
 
				+

			
 
				+�� g ��-�����

			
 
				+‎א h ���� טל

			
 
				+‎ב h ���� נה

			
 
				+‎ג h ���� הע

			
 
				+‎ד h ���� גם

			
 
				+‎ה h ���� עג

			
 
				+‎ו h ���� ןנ

			
 
				+

			
 
				+�א h ���� טל,אנץ

			
 
				+�ב h ���� נה,אנץ

			
 
				+�ג h ���� הע,אנץ

			
 
				+�ד h ���� גם,אנץ

			
 
				+�ה h ���� עג,אנץ

			
 
				+�ו h ���� ןנ,אנץ

			
 
				+

			
 
				+

			
 
				+‎ז h ���� לנ,טל

			
 
				+‎ח h ���� לנ,נה

			
 
				+‎ט h ���� לנ,הע

			
 
				+‎י h ���� לנ,גם

			
 
				+‎ך h ���� לנ,עג

			
 
				+‎כ h ���� לנ,ןנ

			
 
				+‎ל h ���� זנ,טל

			
 
				+‎ם h ���� זנ,נה

			
 
				+‎מ h ���� זנ,הע

			
 
				+‎ן h ���� זנ,גם

			
 
				+‎נ h ���� זנ,עג

			
 
				+‎ס h ���� זנ,ןנ

			
 
				+‎ע h ���� סנ,טל

			
 
				+‎ף h ���� סנ,נה

			
 
				+‎פ h ���� סנ,הע

			
 
				+‎ץ h ���� סנ,גם

			
 
				+‎צ h ���� סנ,עג

			
 
				+‎ק h ���� סנ,ןנ

			
 
				+‎ר h ���� סנאגם

			
 
				+‏א i ����-� לנ,וה,טל,מה,םמ

			
 
				+‏ב i ����-� לנ,וה,נה,מה,םמ

			
 
				+‏ג i ����-� לנ,וה,הע,מה,םמ

			
 
				+‏ד i ����-� לנ,וה,גם,םמ

			
 
				+�ע i ����-� לנ,וה,גם,מה

			
 
				+‏ה i ����-� לנ,וה,עג,מה,םמ

			
 
				+‏ו i ����-� לנ,וה,ןנ,מה,םמ

			
 
				+‏ז i ����-� זנ,וה,טל,מה,םמ

			
 
				+‏ח i ����-� זנ,וה,נה,מה,םמ

			
 
				+‏ט i ����-� זנ,וה,הע,מה,םמ

			
 
				+‏י i ����-� זנ,וה,גם,מה,םמ

			
 
				+‏ך i ����-� זנ,וה,עג,מה,םמ

			
 
				+‏כ i ����-� זנ,וה,ןנ,מה,םמ

			
 
				+‏ל i ����-� סנ,וה,טל,מה,םמ

			
 
				+‏ם i ����-� סנ,וה,נה,מה,םמ

			
 
				+‏מ i ����-� סנ,וה,הע,מה,םמ

			
 
				+‏ן i ����-� סנ,וה,גם,מה,םמ

			
 
				+‏נ i ����-� סנ,וה,עג,מה,םמ

			
 
				+‏ס i ����-� סנ,וה,ןנ,מה,םמ

			
 
				+‏ע i ����-� לם,טל,מה,םמ

			
 
				+‏ף i ����-� לם,נה,מה,םמ

			
 
				+‏פ i ����-� לם,הע,מה,םמ

			
 
				+‏ץ i ����-� לם,גם,םמ

			
 
				+�ף i ����-� לם,גם,מה

			
 
				+‏צ i ����-� לם,עג,מה,םמ

			
 
				+‏ק i ����-� לם,ןנ,מה,םמ

			
 
				+‏ש i ����-� נה,מה,םמ

			
 
				+

			
 
				+//  םאנוקט�

			
 
				+�א j �

			
 
				+�ם j � גמןנ

			
 
				+�מ j � ףךאחאע

			
 
				+�ן j � נאחד

			
 
				+

			
 
				+// "לםו טםעונוסםמ","לםו ב�כמ סענארםמ"

			
 
				+�ב k ����� םסע

			
 
				+�ך k ����� ןנר

			
 
				+�כ k ����� 

			
 
				+// לםו כףקרו

			
 
				+�נ k ����� סנאגם,םסע

			
 
				+

			
 
				+//  "גמע", "ץנףסע�" (םוע אםאכ. פמנל)

			
 
				+�ל k ����� 0

			
 
				+

			
 
				+�ג l �����

			
 
				+�ד m ����

			
 
				+�ה n ����

			
 
				+�ו o ����

			
 
				+�¸ o ���� נאחד

			
 
				+�ז p ����

			
 
				+�ח q �����

			
 
				+�י s ����

			
 
				+�ן b � סענ,בףה,1כ,וה

			
 
				+�נ b � סענ,בףה,1כ,לם

			
 
				+�ס b � סענ,בףה,2כ,וה

			
 
				+�ע b � סענ,בףה,2כ,לם

			
 
				+�ף b � סענ,בףה,3כ,וה

			
 
				+�פ b � סענ,בףה,3כ,לם

			
 
				+

			
 
				+

			
 
				+

			
 
				+

			
 
				+// מבשטו דנאללול� (סכמגממבנאחמגאעוכ�ם�ו)

			
 
				+�א a * כמך

			
 
				+// �ב a * 

			
 
				+// �ג a * 

			
 
				+// �ד a * 

			
 
				+// �ה a * 

			
 
				+�ו a * ךאק

			
 
				+�ז a * הפסע

			
 
				+�ח a * הפסע,מנד

			
 
				+�ט a * הפסע,כמך

			
 
				+//�ך a * 

			
 
				+�כ a * סג,ןו

			
 
				+�ל a * סג,םן

			
 
				+�ם a * םס,ןו

			
 
				+�מ a * םס,םן

			
 
				+�ן a * סג,םס,ןו

			
 
				+�נ a * סג,םס,םן

			
 
				+

			
 
				+

			
 
				+// הכ� בוחכטקם�ץ דכאדמכמג

			
 
				+�ס a * םס

			
 
				+�ע a * סג

			
 
				+

			
 
				+�פ a * זאנד

			
 
				+�ץ a * מןק

			
 
				+�ק a * זאנד,מןק

			
 
				+�צ a * מנד,זאנד

			
 
				+�ר a * כמך,זאנד

			
 
				+

			
 
				+�ש a * םמ,כמך

			
 
				+�� a * םמ,מנד

			
 
				+�� a * מה,פאל

			
 
				+�ת a * םמ,הפסע,כמך

			
 
				+�‎ a * םמ,הפסע,מנד

			
 
				+�‏ a * םמ,זאנד

			
 
				+�� a * םמ,מןק,

			
 
				+�א a * םמ,

			
 
				+�ב a * מה,

			
 
				+�ג a * מנד,זאנד,םמ

			
 
				+�ד a * הפסע,םמ

			
 
				+�ה a * הפסע,מה

			
 
				+�ז a * מה,זאנד

			
 
				+�ח a * טל�,ןנטע�ז

			
 
				+�ט a * ןנטע�ז

			
 
				+�ך a * סג,ןו,נאחד

			
 
				+�כ a * סג,םן,נאחד

			
 
				+�ם a * םס,ןו,נאחד

			
 
				+�מ a * םס,םן,נאחד

			
 
				+�ן a * םמ,נאחד

			
 
				+�נ a * מה,נאחד

			
 
				+�ס a * סג,ןו,זאנד

			
 
				+�ע a * סג,םן,זאנד

			
 
				+�ף a * םס,ןו,זאנד

			
 
				+�פ a * םס,םן,זאנד

			
 
				+�ץ a * נאחד

			
 
				+�צ a * אנץ

			
 
				+�ק a * סג,ןו,אנץ

			
 
				+�ר a * סג,םן,אנץ

			
 
				+�ש a * םס,ןו,אנץ

			
 
				+�� a * םס,םן,אנץ

			
 
				+�� a * םמ,אנץ

			
 
				+�ת a * מה,אנץ

			
 
				+�‎ a * םס,אנץ

			
 
				+�‏ a * סג,אנץ

			
 
				+�� a * ךאק,אנץ

			
 
				+�¸ a * םמ,מה

			
 
				+�א a * מה,מןק,

			
 
				+�ב a * כמך,מןק,

			
 
				+

			
 
				+

			
 
				+

			
 
				+�‏ F � לנ,זנ,סנ,וה,טל,נה,הע,גם,עג,ןנ

			
 
				+�� F � לנ,זנ,סנ,,וה,לם,טל,נה,הע,גם,עג,ןנ

			
--- a/dictonary/Dicts/SrcMorph/Eng.mwz
+++ b/dictonary/Dicts/SrcMorph/Eng.mwz
@@ -0,0 +1,3 @@
 
				+MRD_FILE 	EngSrc/morphs.mrd

			
 
				+LANG	        ENGLISH

			
 
				+USERS           gri,alex,boris,masha,af,oleg,nim

			
--- a/dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd
+++ b/dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd
--- a/dictonary/Dicts/SrcMorph/Rus.mwz
+++ b/dictonary/Dicts/SrcMorph/Rus.mwz
@@ -0,0 +1,3 @@
 
				+MRD_FILE 	RusSrc/morphs.mrd

			
 
				+LANG	        RUSSIAN

			
 
				+USERS           alex,vse-imena,accentor,user2008
			
--- a/dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd
+++ b/dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd
--- a/dictonary/Docs/Morph_UNIX.txt
+++ b/dictonary/Docs/Morph_UNIX.txt
@@ -0,0 +1,258 @@
 
				+This is a program of moprhological analysis (Russian, German, and English languages).
			
 
				+
			
 
				+This program is distributed under the Library GNU Public Licence, which is in the file
			
 
				+COPYING.  
			
 
				+
			
 
				+This program was  written by Andrey Putrin, Alexey Sokirko.  
			
 
				+The project started in Moscow in Dialing 
			
 
				+Company (Russian and English language). The German part was created  
			
 
				+at Berlin-Brandenburg Academy of Sciences and Humanities in  Berlin (the project DWDS). 
			
 
				+
			
 
				+The Russian  lexicon is based upon Zaliznyak's Dictionary .
			
 
				+The German lexicon is based upon Morphy system (http://www-psycho.uni-paderborn.de/lezius/).
			
 
				+The English  lexicon is based upon Wordnet.
			
 
				+
			
 
				+The project uses a regular expression library "PCRE" (Perl Compatible Regular Expressions).
			
 
				+We test compilation only with version 6.4. Other versions were not tested. 
			
 
				+One should download this version  from the official site and install it 
			
 
				+to the default place. If you do not want to install it or you do not have enough
			
 
				+rights to do it, then you should  create two environment variables: 
			
 
				+	1.  RML_PCRE_LIB, that  points to PCRE library directory, where
			
 
				+libpcre.a and libpcrecpp.a should be located, for example:
			
 
				+	export RML_PCRE_LIB=~/RML/contrib/pcre-6.4/.libs
			
 
				+    2  RML_PCRE_INCLUDE, that points to PCRE include catalog, 
			
 
				+where "pcrecpp.h" is located, for example
			
 
				+    export RML_PCRE_INCLUDE=~/RML/contrib/pcre-6.4
			
 
				+
			
 
				+
			
 
				+The system has been developed under Windows 2000 (MS VS 6.0), but
			
 
				+has also been compiled and run under Linux(GCC).  It should work with
			
 
				+minor changes on other systems.
			
 
				+
			
 
				+Website of DDC: www.aot.ru, https://sf.net/projects/morph-lexicon/
			
 
				+
			
 
				+I compiled all sources with gcc 3.2. Lower versions are not supported.
			
 
				+
			
 
				+
			
 
				+Contents of the this source archive
			
 
				+
			
 
				+1.	The main morphological  library (Source/LemmatizerLib).
			
 
				+2.	Library for grammatical codes (Source/AgrgamtabLib).
			
 
				+3.	Test morphological program  (Source/TestLem)..
			
 
				+4.	Library for working with text version of the dictionaries (Source/MorphWizardLib).
			
 
				+5.	Generator of morphological prediction base  (Source/GenPredIdx).
			
 
				+6.	Generator of binary  format of the dictionaries (Source/MorphGen).
			
 
				+
			
 
				+
			
 
				+=================================================
			
 
				+====== 					 Installation       =====
			
 
				+=================================================
			
 
				+
			
 
				+
			
 
				+Unpacking
			
 
				+
			
 
				+* Create  a catalog and  register a system variable RML, which  points 
			
 
				+to this catalog:
			
 
				+	mkdir /home/sokirko/RML
			
 
				+	export  RML=/home/sokirko/RML
			
 
				+
			
 
				+* Put "lemmatizer.tar.gz", "???-src-morph.tar.gz"
			
 
				+to this catalog, "???" can be "rus", "ger" or "eng"
			
 
				+according to what you have downloaded. Unpack it 
			
 
				+ 	tar xfz lemmatizer.tar.gz
			
 
				+	tar xfz ???-src-morph.tar.gz
			
 
				+
			
 
				+
			
 
				+
			
 
				+Compiling morphology
			
 
				+
			
 
				+  0. Do not forget to set  RML_PCRE (see above)
			
 
				+
			
 
				+
			
 
				+  1.  cd $RML
			
 
				+	
			
 
				+
			
 
				+  2.   ./compile_morph.sh  
			
 
				+      This step should create all libraries and a test program $RML\Bin\TestLem.
			
 
				+
			
 
				+
			
 
				+Building Morphological Dictionary
			
 
				+
			
 
				+  1.  cd $RML
			
 
				+
			
 
				+  2.   ./generate_morph_bin.sh <lang>
			
 
				+     where <lang> can be Russian, German according to the dictionary
			
 
				+    yo have  downloaded.
			
 
				+
			
 
				+  The script should terminate with message "Everything is OK". 
			
 
				+  You can test the morphology 
			
 
				+	$RML\Bin\TestLem <lang>
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+If something goes wrong, write me to sokirko@yandex.ru.
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+======================================================
			
 
				+==========      MRD-file                  ============
			
 
				+======================================================
			
 
				+
			
 
				+	This section describes the format of a mrd-file. Mrd-file is a text 
			
 
				+file which contains one morphological dictionary for one natural language. 
			
 
				+MRD is an abbreviation of "morphological dictionary".
			
 
				+	The usual place for this file is 
			
 
				+
			
 
				+	$RML/Dicts/SrcMorph/xxxSrc/morphs.mrd,
			
 
				+
			
 
				+where  xxx can be "Eng", "Rus" or  "Ger" depending on the language. 
			
 
				+    The encoding of the file depends also upon the language:
			
 
				+	* Russian - Windows 1251
			
 
				+	* German  - Windows 1252
			
 
				+	* English - ASCII
			
 
				+
			
 
				+
			
 
				+   Gramtab-files
			
 
				+
			
 
				+
			
 
				+	A mrd-file refers to a gramtab-file, which is 
			
 
				+language-dependent and which contains all possible full morphological 
			
 
				+patterns for the words. One line in a gramtab-file looks like as follows:
			
 
				+	<ancode> <unused_number> <part_of_speech> <grammems>
			
 
				+	An ancode is an ID, which consists of two letters and which uniquely  
			
 
				+identifies a morphological pattern. A morphological pattern consists of 
			
 
				+<part_of_speech> and <grammems>. For example, here is a line from the English
			
 
				+gramtab:
			
 
				+
			
 
				+	te 1 VBE prsa,pl      
			
 
				+
			
 
				+	Here "te" is an ancode,  "VBE" is a part of speech, "prsa,pl" are grammems,
			
 
				+"1" is the obsolete  unused number.
			
 
				+    In mrd-files we use ancodes to refer to a  morphological pattern.
			
 
				+
			
 
				+	Here is the list of all gramtab-files:
			
 
				+	* Russian - $Rml/Dicts/Morph/rgramtab.tab
			
 
				+	* German  - $Rml/Dicts/Morph/ggramtab.tab
			
 
				+	* English - $Rml/Dicts/Morph/egramtab.tab
			
 
				+
			
 
				+
			
 
				+
			
 
				+   Common information 
			
 
				+
			
 
				+
			
 
				+	All words in a mrd-file are written in uppercase.
			
 
				+	One mrd-file consists of the following sections:
			
 
				+		1. Section of flexion and prefix models;
			
 
				+		2. Section of accentual models; 
			
 
				+		3. Section of user sessions;
			
 
				+	    4. Section of prefix sets;
			
 
				+		5. Section of lemmas.
			
 
				+	Each section is a set of records, one per line. The number of all records 
			
 
				+of the section  is written in the very beginning of the section at 
			
 
				+a separate line. For example, here is a possible variant 
			
 
				+of the section of user sessions:
			
 
				+
			
 
				+1
			
 
				+alex;17:10, 13 October 2003;17:12, 13 October 2003
			
 
				+
			
 
				+"1" means that this section contains only one record, which is written  
			
 
				+on the next line, thus this section contains only two lines.
			
 
				+
			
 
				+
			
 
				+
			
 
				+	Section of possible flexion and prefix models
			
 
				+
			
 
				+
			
 
				+	Each record of this section is a list of items. Each item 
			
 
				+describes how one word form in a paradigm should be built. The whole list
			
 
				+describes the whole paradigm (a set of word forms with morphological patterns). 
			
 
				+The format  of one item is the following:
			
 
				+		%<flexion>*<ancode>
			
 
				+	or  %<flexion>*<ancode>*<prefix>
			
 
				+		where  
			
 
				+			<flexion> is a  flexion (a string, which should be added to right of the word base)
			
 
				+			<prefix> is a  prefix (a string, which should be added to left of the word base)
			
 
				+			<ancode> is an ancode.
			
 
				+	Let us consider an example of an English flexion and prefix model:
			
 
				+		%F*na%VES*nb
			
 
				+	Here we have two items:
			
 
				+		1. <flexion> = F;   <ancode> = na
			
 
				+		2. <flexion> = VES;   <ancode> = nb
			
 
				+		In order to decipher ancodes we should go the English gramtab-file. 
			
 
				+There we can find the following lines:
			
 
				+			na NOUN narr,sg        
			
 
				+			nb NOUN narr,pl
			
 
				+		If base "lea" would be ascribed to this model,  then its paradigm 
			
 
				+would be the following:
			
 
				+		leaf 	NOUN narr,sg
			
 
				+		leaves	NOUN narr,pl
			
 
				+	It is important, that each word of a morphological dictionary 
			
 
				+should contain a reference  to a line in this section.
			
 
				+
			
 
				+
			
 
				+	Section of possible accentual models
			
 
				+
			
 
				+
			
 
				+	Each record of this section is a comma-delimited list of numbers, where 
			
 
				+each number is an index of a stressed  vowel of a word form(counting 
			
 
				+from the end). The whole list contains a position for each word 
			
 
				+form in the paradigm.	
			
 
				+	If an item of an accentual model of word is equal to 255, then it 
			
 
				+is undefined, and it means that this word  form is unstressed.  
			
 
				+	Each word in the dictionary should have a reference  to 
			
 
				+an accentual model, even though this model can consist only of empty items.
			
 
				+	For one word, the number and the order of items in the  accentual model 
			
 
				+should be equal to the number and the order of items  in the flexion and 
			
 
				+prefix model. For example we can ascribe to word "leaf" with the paradigm  
			
 
				+		leaf 	NOUN narr,sg
			
 
				+		leaves	NOUN narr,pl
			
 
				+the following accentual model:
			
 
				+
			
 
				+	2,3
			
 
				+
			
 
				+	It produces the following accented paradigm: 
			
 
				+		le'af 	NOUN narr,sg
			
 
				+		le'aves	NOUN narr,pl
			
 
				+
			
 
				+		
			
 
				+
			
 
				+	Section of user section
			
 
				+
			
 
				+	This is a system section, which contains information about user edit 
			
 
				+sessions.
			
 
				+
			
 
				+
			
 
				+	Section of prefix sets
			
 
				+
			
 
				+	Each record of this section is a comma-delimited list of strings, where 
			
 
				+each string is a prefix, which can be prefixed to the whole word. If a prefix 
			
 
				+set is ascribed to a word, it means, that the words with these prefixes
			
 
				+can also exist  in the language. For example, if "leaf" has 
			
 
				+the prefix  set "anti,contra", it follows the existence of  words "antileaf",
			
 
				+"contraleaf".
			
 
				+	A flexion and prefix model can contain
			
 
				+also a reference to a prefix, but this prefix is for 
			
 
				+one separate word form, while a prefix set  is ascribed to the whole word 
			
 
				+paradigm.
			
 
				+
			
 
				+	
			
 
				+	Section of lemmas
			
 
				+
			
 
				+	A record of this section is a space-separated tuple of the following format:
			
 
				+
			
 
				+	<base> <flex_model_no> <accent_model_no> <session_no> <type_ancode> <prefix_set_no>
			
 
				+
			
 
				+	where 
			
 
				+
			
 
				+	<base> is a base (a constant part of a word in its paradigm)
			
 
				+	<flex_model_no> is an index  of a flexion and prefix model
			
 
				+	<accent_model_no> is an index of an accentual model
			
 
				+	<session_no> is an index of the session,  by which the last user edited this word
			
 
				+	<type_ancode> is ancode, which is ascribed to the whole word 
			
 
				+						(intended: the common part of grammems in the paradigm)
			
 
				+					   "-" if it is undefined 
			
 
				+	<prefix_set_no> is an index of a prefix set, or "-" if it is undefined
			
 
				+
			
--- a/dictonary/copying
+++ b/dictonary/copying
--- a/english/pom.xml
+++ b/english/pom.xml
@@ -0,0 +1,23 @@
 
				+<?xml version="1.0"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+    <parent>
			
 
				+        <artifactId>morphology</artifactId>
			
 
				+        <groupId>org.apache.lucene.morphology</groupId>
			
 
				+        <version>1.5</version>
			
 
				+    </parent>
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <groupId>org.apache.lucene.morphology</groupId>
			
 
				+    <artifactId>english</artifactId>
			
 
				+    <name>english</name>
			
 
				+    <version>1.5</version>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+    <dependencies>
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>morph</artifactId>
			
 
				+            <version>1.5</version>
			
 
				+        </dependency>
			
 
				+
			
 
				+    </dependencies>
			
 
				+</project>
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java
@@ -0,0 +1,29 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.english;

			
 
				+

			
 
				+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+

			
 
				+

			
 
				+public class EnglishAnalyzer extends MorphologyAnalyzer {

			
 
				+

			
 
				+    public EnglishAnalyzer() throws IOException {

			
 
				+        super(new EnglishLuceneMorphology());

			
 
				+    }

			
 
				+

			
 
				+}
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
@@ -0,0 +1,115 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english;
			
 
				+
			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;
			
 
				+import org.apache.lucene.morphology.SuffixToLongException;
			
 
				+import org.apache.lucene.morphology.WrongCharaterException;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+
			
 
				+public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
			
 
				+    public static final int ENGLISH_SMALL_LETTER_OFFSET = 96;
			
 
				+    static public int SUFFIX_LENGTH = 6;
			
 
				+    public static final int DASH_CHAR = 45;
			
 
				+    public static final int DASH_CODE = 27;
			
 
				+
			
 
				+    public Integer encode(String string) {
			
 
				+        if (string.length() > 6) throw new SuffixToLongException("Suffix length should not be greater then " + 12);
			
 
				+        int result = 0;
			
 
				+        for (int i = 0; i < string.length(); i++) {
			
 
				+            int c = 0 + string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
			
 
				+            if (c == 45 - ENGLISH_SMALL_LETTER_OFFSET) {
			
 
				+                c = DASH_CODE;
			
 
				+            }
			
 
				+            if (c < 0 || c > 27)
			
 
				+                throw new WrongCharaterException("Symbol " + string.charAt(i) + " is not small cirillic letter");
			
 
				+            result = result * 28 + c;
			
 
				+        }
			
 
				+        for (int i = string.length(); i < 6; i++) {
			
 
				+            result *= 28;
			
 
				+        }
			
 
				+        return result;
			
 
				+    }
			
 
				+
			
 
				+    public int[] encodeToArray(String s) {
			
 
				+
			
 
				+        ArrayList<Integer> integers = new ArrayList<Integer>();
			
 
				+        while (s.length() > 6) {
			
 
				+            integers.add(encode(s.substring(0, 6)));
			
 
				+            s = s.substring(6);
			
 
				+        }
			
 
				+        integers.add(encode(s));
			
 
				+        int[] ints = new int[integers.size()];
			
 
				+        int pos = 0;
			
 
				+        for (Integer i : integers) {
			
 
				+            ints[pos] = i;
			
 
				+            pos++;
			
 
				+        }
			
 
				+        return ints;
			
 
				+    }
			
 
				+
			
 
				+    public String decodeArray(int[] array) {
			
 
				+        String result = "";
			
 
				+        for (int i : array) {
			
 
				+            result += decode(i);
			
 
				+        }
			
 
				+        return result;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    public String decode(Integer suffixN) {
			
 
				+        String result = "";
			
 
				+        while (suffixN > 27) {
			
 
				+            int c = suffixN % 28 + ENGLISH_SMALL_LETTER_OFFSET;
			
 
				+            if (c == ENGLISH_SMALL_LETTER_OFFSET) {
			
 
				+                suffixN /= 28;
			
 
				+                continue;
			
 
				+            }
			
 
				+            if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
			
 
				+            result = (char) c + result;
			
 
				+            suffixN /= 28;
			
 
				+        }
			
 
				+        long c = suffixN + ENGLISH_SMALL_LETTER_OFFSET;
			
 
				+        if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
			
 
				+        result = (char) c + result;
			
 
				+        return result;
			
 
				+    }
			
 
				+
			
 
				+    public boolean checkCharacter(char c) {
			
 
				+        int code = 0 + c;
			
 
				+        if (code == 45) return true;
			
 
				+        code -= ENGLISH_SMALL_LETTER_OFFSET;
			
 
				+        if (code > 0 && code < 27) return true;
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    public boolean checkString(String word) {
			
 
				+        for (int i = 0; i < word.length(); i++) {
			
 
				+            if (!checkCharacter(word.charAt(i))) {
			
 
				+                return false;
			
 
				+            }
			
 
				+        }
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+    public String cleanString(String s) {
			
 
				+        return s;
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishLuceneMorphology.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishLuceneMorphology.java
@@ -0,0 +1,28 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english;
			
 
				+
			
 
				+import org.apache.lucene.morphology.LuceneMorphology;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+
			
 
				+public class EnglishLuceneMorphology extends LuceneMorphology {
			
 
				+
			
 
				+    public EnglishLuceneMorphology() throws IOException {
			
 
				+        super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
			
 
				+    }
			
 
				+}
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishMorphology.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishMorphology.java
@@ -0,0 +1,28 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english;
			
 
				+
			
 
				+import org.apache.lucene.morphology.MorphologyImpl;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+
			
 
				+public class EnglishMorphology extends MorphologyImpl {
			
 
				+
			
 
				+    public EnglishMorphology() throws IOException {
			
 
				+        super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
			
 
				+    }
			
 
				+}
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java
@@ -0,0 +1,45 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english.stemmer;
			
 
				+
			
 
				+
			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+public class EnglishStemmer {
			
 
				+    private EnglishLuceneMorphology englishLuceneMorphology;
			
 
				+
			
 
				+    public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
			
 
				+        this.englishLuceneMorphology = englishLuceneMorphology;
			
 
				+    }
			
 
				+
			
 
				+    public String getStemmedWord(String word){
			
 
				+        if(!englishLuceneMorphology.checkString(word)){
			
 
				+            return word;
			
 
				+        }
			
 
				+        List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
			
 
				+        if(normalForms.size() == 1){
			
 
				+            return normalForms.get(0);
			
 
				+        }
			
 
				+        normalForms.remove(word);
			
 
				+        if(normalForms.size() == 1){
			
 
				+            return normalForms.get(0);
			
 
				+        }
			
 
				+        return word;
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerFilter.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerFilter.java
@@ -0,0 +1,48 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english.stemmer;
			
 
				+
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class EnglishStemmerFilter extends TokenFilter {
			
 
				+    private EnglishStemmer englishStemmer;
			
 
				+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
			
 
				+
			
 
				+    public EnglishStemmerFilter(TokenStream input, EnglishStemmer englishStemmer) {
			
 
				+        super(input);
			
 
				+        this.englishStemmer = englishStemmer;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    final public boolean incrementToken() throws IOException {
			
 
				+
			
 
				+        boolean b = input.incrementToken();
			
 
				+        if (!b) {
			
 
				+            return false;
			
 
				+        }
			
 
				+        String s = new String(termAtt.buffer(), 0, termAtt.length());
			
 
				+        s = englishStemmer.getStemmedWord(s);
			
 
				+        termAtt.setEmpty();
			
 
				+        termAtt.append(s);
			
 
				+        return true;
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/english/src/main/resources/org/apache/lucene/morphology/english/exceptions.txt
+++ b/english/src/main/resources/org/apache/lucene/morphology/english/exceptions.txt
--- a/english/src/main/resources/org/apache/lucene/morphology/english/morph.info
+++ b/english/src/main/resources/org/apache/lucene/morphology/english/morph.info
--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
@@ -0,0 +1,40 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english;
			
 
				+
			
 
				+import static org.hamcrest.core.IsEqual.equalTo;
			
 
				+import static org.junit.Assert.assertThat;
			
 
				+import org.junit.Before;
			
 
				+
			
 
				+
			
 
				+public class EnglishLetterDecoderEncoderTest {
			
 
				+    private EnglishLetterDecoderEncoder decoderEncoder;
			
 
				+
			
 
				+    @Before
			
 
				+    public void setUp() {
			
 
				+        decoderEncoder = new EnglishLetterDecoderEncoder();
			
 
				+    }
			
 
				+
			
 
				+    @org.junit.Test
			
 
				+    public void testDecodeEncodeToArray() {
			
 
				+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
			
 
				+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
			
 
				+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
			
 
				+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
			
 
				+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));
			
 
				+
			
 
				+    }
			
 
				+}
			
--- a/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
@@ -0,0 +1,49 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.english.stemmer;
			
 
				+
			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
			
 
				+import org.junit.Test;
			
 
				+import static org.hamcrest.core.IsEqual.equalTo;
			
 
				+import static org.junit.Assert.assertThat;
			
 
				+
			
 
				+
			
 
				+public class EnglishStemmerTest {
			
 
				+    @Test
			
 
				+    public void testGetStemmedWord() throws Exception {
			
 
				+        EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
			
 
				+        EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
			
 
				+        assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
			
 
				+        assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
			
 
				+    }
			
 
				+}
			
--- a/etc/header.txt
+++ b/etc/header.txt
@@ -0,0 +1,13 @@
 
				+Copyright 2009 Alexander Kuznetsov 
			
 
				+
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
--- a/morph/pom.xml
+++ b/morph/pom.xml
@@ -0,0 +1,15 @@
 
				+<?xml version="1.0"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+    <parent>
			
 
				+        <artifactId>morphology</artifactId>
			
 
				+        <groupId>org.apache.lucene.morphology</groupId>
			
 
				+        <version>1.5</version>
			
 
				+    </parent>
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <groupId>org.apache.lucene.morphology</groupId>
			
 
				+    <artifactId>morph</artifactId>
			
 
				+    <name>morph</name>
			
 
				+    <version>1.5</version>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+
			
 
				+</project>
			
--- a/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
@@ -0,0 +1,55 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.lucene.morphology;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+
			
 
				+public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
			
 
				+    public int[] encodeToArray(String s) {
			
 
				+        ArrayList<Integer> integers = new ArrayList<Integer>();
			
 
				+        while (s.length() > 6) {
			
 
				+            integers.add(encode(s.substring(0, 6)));
			
 
				+            s = s.substring(6);
			
 
				+        }
			
 
				+        integers.add(encode(s));
			
 
				+        int[] ints = new int[integers.size()];
			
 
				+        int pos = 0;
			
 
				+        for (Integer i : integers) {
			
 
				+            ints[pos] = i;
			
 
				+            pos++;
			
 
				+        }
			
 
				+        return ints;
			
 
				+    }
			
 
				+
			
 
				+    public String decodeArray(int[] array) {
			
 
				+        String result = "";
			
 
				+        for (int i : array) {
			
 
				+            result += decode(i);
			
 
				+        }
			
 
				+        return result;
			
 
				+    }
			
 
				+
			
 
				+    public boolean checkString(String word) {
			
 
				+        for (int i = 0; i < word.length(); i++) {
			
 
				+            if (!checkCharacter(word.charAt(i))) {
			
 
				+                return false;
			
 
				+            }
			
 
				+        }
			
 
				+        return true;
			
 
				+    }
			
 
				+}
			
--- a/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
@@ -0,0 +1,92 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+import java.io.Serializable;

			
 
				+

			
 
				+

			
 
				+public class Heuristic implements Serializable {

			
 
				+    byte actualSuffixLength;

			
 
				+    String actualNormalSuffix;

			
 
				+    short formMorphInfo;

			
 
				+    short normalFormMorphInfo;

			
 
				+

			
 
				+    public Heuristic(String s) {

			
 
				+        String[] strings = s.split("\\|");

			
 
				+        actualSuffixLength = Byte.valueOf(strings[0]);

			
 
				+        actualNormalSuffix = strings[1];

			
 
				+        formMorphInfo = Short.valueOf(strings[2]);

			
 
				+        normalFormMorphInfo = Short.valueOf(strings[3]);

			
 
				+    }

			
 
				+

			
 
				+    public Heuristic(byte actualSuffixLength, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {

			
 
				+        this.actualSuffixLength = actualSuffixLength;

			
 
				+        this.actualNormalSuffix = actualNormalSuffix;

			
 
				+        this.formMorphInfo = formMorphInfo;

			
 
				+        this.normalFormMorphInfo = normalFormMorphInfo;

			
 
				+    }

			
 
				+

			
 
				+    public StringBuilder transformWord(String w) {

			
 
				+        if (w.length() - actualSuffixLength < 0) return new StringBuilder(w);

			
 
				+        return new StringBuilder(w.substring(0, w.length() - actualSuffixLength)).append(actualNormalSuffix);

			
 
				+    }

			
 
				+

			
 
				+    public byte getActualSuffixLength() {

			
 
				+        return actualSuffixLength;

			
 
				+    }

			
 
				+

			
 
				+    public String getActualNormalSuffix() {

			
 
				+        return actualNormalSuffix;

			
 
				+    }

			
 
				+

			
 
				+    public short getFormMorphInfo() {

			
 
				+        return formMorphInfo;

			
 
				+    }

			
 
				+

			
 
				+    public short getNormalFormMorphInfo() {

			
 
				+        return normalFormMorphInfo;

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public boolean equals(Object o) {

			
 
				+        if (this == o) return true;

			
 
				+        if (o == null || getClass() != o.getClass()) return false;

			
 
				+

			
 
				+        Heuristic heuristic = (Heuristic) o;

			
 
				+

			
 
				+        if (actualSuffixLength != heuristic.actualSuffixLength) return false;

			
 
				+        if (formMorphInfo != heuristic.formMorphInfo) return false;

			
 
				+        if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;

			
 
				+        if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)

			
 
				+            return false;

			
 
				+

			
 
				+        return true;

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public int hashCode() {

			
 
				+        int result = (int) actualSuffixLength;

			
 
				+        result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);

			
 
				+        result = 31 * result + (int) formMorphInfo;

			
 
				+        result = 31 * result + (int) normalFormMorphInfo;

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public String toString() {

			
 
				+        return "" + actualSuffixLength + "|" + actualNormalSuffix + "|" + formMorphInfo + "|" + normalFormMorphInfo;

			
 
				+    }

			
 
				+}

			
--- a/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
@@ -0,0 +1,33 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology;
			
 
				+
			
 
				+
			
 
				+public interface LetterDecoderEncoder {
			
 
				+    public Integer encode(String string);
			
 
				+
			
 
				+    public int[] encodeToArray(String s);
			
 
				+
			
 
				+    public String decodeArray(int[] array);
			
 
				+
			
 
				+    public String decode(Integer suffixN);
			
 
				+
			
 
				+    public boolean checkCharacter(char c);
			
 
				+
			
 
				+    public boolean checkString(String word);
			
 
				+
			
 
				+    public String cleanString(String s);
			
 
				+}
			
--- a/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
@@ -0,0 +1,70 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology;
			
 
				+
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+
			
 
				+public class LuceneMorphology extends MorphologyImpl {
			
 
				+
			
 
				+    public LuceneMorphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
			
 
				+        super(fileName, decoderEncoder);
			
 
				+    }
			
 
				+
			
 
				+    public LuceneMorphology(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
			
 
				+        super(inputStream, decoderEncoder);
			
 
				+    }
			
 
				+
			
 
				+    protected void readRules(BufferedReader bufferedReader) throws IOException {
			
 
				+        String s;
			
 
				+        Integer amount;
			
 
				+        s = bufferedReader.readLine();
			
 
				+        amount = Integer.valueOf(s);
			
 
				+        rules = new Heuristic[amount][];
			
 
				+        for (int i = 0; i < amount; i++) {
			
 
				+            String s1 = bufferedReader.readLine();
			
 
				+            Integer ruleLenght = Integer.valueOf(s1);
			
 
				+            Heuristic[] heuristics = new Heuristic[ruleLenght];
			
 
				+            for (int j = 0; j < ruleLenght; j++) {
			
 
				+                heuristics[j] = new Heuristic(bufferedReader.readLine());
			
 
				+            }
			
 
				+            rules[i] = modeifyHeuristic(heuristics);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    private Heuristic[] modeifyHeuristic(Heuristic[] heuristics) {
			
 
				+        ArrayList<Heuristic> result = new ArrayList<Heuristic>();
			
 
				+        for (Heuristic heuristic : heuristics) {
			
 
				+            boolean isAdded = true;
			
 
				+            for (Heuristic ch : result) {
			
 
				+                isAdded = isAdded && !(ch.getActualNormalSuffix().equals(heuristic.getActualNormalSuffix()) && (ch.getActualSuffixLength() == heuristic.getActualSuffixLength()));
			
 
				+            }
			
 
				+            if (isAdded) {
			
 
				+                result.add(heuristic);
			
 
				+            }
			
 
				+        }
			
 
				+        return result.toArray(new Heuristic[result.size()]);
			
 
				+    }
			
 
				+
			
 
				+    public boolean checkString(String s) {
			
 
				+        return decoderEncoder.checkString(s);
			
 
				+    }
			
 
				+}
			
--- a/morph/src/main/java/org/apache/lucene/morphology/Morphology.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/Morphology.java
@@ -0,0 +1,27 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology;
			
 
				+
			
 
				+import java.util.List;
			
 
				+
			
 
				+
			
 
				+public interface Morphology {
			
 
				+
			
 
				+    List<String> getNormalForms(String s);
			
 
				+
			
 
				+    List<String> getMorphInfo(String s);
			
 
				+
			
 
				+}
			
--- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
@@ -0,0 +1,208 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+

			
 
				+import java.io.*;

			
 
				+import java.util.ArrayList;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class MorphologyImpl implements Morphology {

			
 
				+    protected int[][] separators;

			
 
				+    protected short[] rulesId;

			
 
				+    protected Heuristic[][] rules;

			
 
				+    protected String[] grammarInfo;

			
 
				+    protected LetterDecoderEncoder decoderEncoder;

			
 
				+

			
 
				+

			
 
				+    public MorphologyImpl(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {

			
 
				+        readFromFile(fileName);

			
 
				+        this.decoderEncoder = decoderEncoder;

			
 
				+    }

			
 
				+

			
 
				+    public MorphologyImpl(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {

			
 
				+        readFromInputStream(inputStream);

			
 
				+        this.decoderEncoder = decoderEncoder;

			
 
				+    }

			
 
				+

			
 
				+    public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammarInfo) {

			
 
				+        this.separators = separators;

			
 
				+        this.rulesId = rulesId;

			
 
				+        this.rules = rules;

			
 
				+        this.grammarInfo = grammarInfo;

			
 
				+    }

			
 
				+

			
 
				+    public List<String> getNormalForms(String s) {

			
 
				+        ArrayList<String> result = new ArrayList<String>();

			
 
				+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));

			
 
				+        int ruleId = findRuleId(ints);

			
 
				+        boolean notSeenEmptyString = true;

			
 
				+        for (Heuristic h : rules[rulesId[ruleId]]) {

			
 
				+            String e = h.transformWord(s).toString();

			
 
				+            if (e.length() > 0) {

			
 
				+                result.add(e);

			
 
				+            } else if (notSeenEmptyString) {

			
 
				+                result.add(s);

			
 
				+                notSeenEmptyString = false;

			
 
				+            }

			
 
				+        }

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+    public List<String> getMorphInfo(String s) {

			
 
				+        ArrayList<String> result = new ArrayList<String>();

			
 
				+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));

			
 
				+        int ruleId = findRuleId(ints);

			
 
				+        for (Heuristic h : rules[rulesId[ruleId]]) {

			
 
				+            result.add(h.transformWord(s).append("|").append(grammarInfo[h.getFormMorphInfo()]).toString());

			
 
				+        }

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+    protected int findRuleId(int[] ints) {

			
 
				+        int low = 0;

			
 
				+        int high = separators.length - 1;

			
 
				+        int mid = 0;

			
 
				+        while (low <= high) {

			
 
				+            mid = (low + high) >>> 1;

			
 
				+            int[] midVal = separators[mid];

			
 
				+

			
 
				+            int comResult = compareToInts(ints, midVal);

			
 
				+            if (comResult > 0)

			
 
				+                low = mid + 1;

			
 
				+            else if (comResult < 0)

			
 
				+                high = mid - 1;

			
 
				+            else

			
 
				+                break;

			
 
				+        }

			
 
				+        if (compareToInts(ints, separators[mid]) >= 0) {

			
 
				+            return mid;

			
 
				+        } else {

			
 
				+            return mid - 1;

			
 
				+        }

			
 
				+

			
 
				+    }

			
 
				+

			
 
				+    private int compareToInts(int[] i1, int[] i2) {

			
 
				+        int minLength = Math.min(i1.length, i2.length);

			
 
				+        for (int i = 0; i < minLength; i++) {

			
 
				+            int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);

			
 
				+            if (i3 != 0) return i3;

			
 
				+        }

			
 
				+        return i1.length - i2.length;

			
 
				+    }

			
 
				+

			
 
				+    public void writeToFile(String fileName) throws IOException {

			
 
				+        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");

			
 
				+        writer.write(separators.length + "\n");

			
 
				+        for (int[] i : separators) {

			
 
				+            writer.write(i.length + "\n");

			
 
				+            for (int j : i) {

			
 
				+                writer.write(j + "\n");

			
 
				+            }

			
 
				+        }

			
 
				+        for (short i : rulesId) {

			
 
				+            writer.write(i + "\n");

			
 
				+        }

			
 
				+        writer.write(rules.length + "\n");

			
 
				+        for (Heuristic[] heuristics : rules) {

			
 
				+            writer.write(heuristics.length + "\n");

			
 
				+            for (Heuristic heuristic : heuristics) {

			
 
				+                writer.write(heuristic.toString() + "\n");

			
 
				+            }

			
 
				+        }

			
 
				+        writer.write(grammarInfo.length + "\n");

			
 
				+        for (String s : grammarInfo) {

			
 
				+            writer.write(s + "\n");

			
 
				+        }

			
 
				+        writer.close();

			
 
				+    }

			
 
				+

			
 
				+    public void readFromFile(String fileName) throws IOException {

			
 
				+        FileInputStream inputStream = new FileInputStream(fileName);

			
 
				+        readFromInputStream(inputStream);

			
 
				+    }

			
 
				+

			
 
				+    private void readFromInputStream(InputStream inputStream) throws IOException {

			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));

			
 
				+        String s = bufferedReader.readLine();

			
 
				+        Integer amount = Integer.valueOf(s);

			
 
				+

			
 
				+        readSeparators(bufferedReader, amount);

			
 
				+

			
 
				+        readRulesId(bufferedReader, amount);

			
 
				+

			
 
				+        readRules(bufferedReader);

			
 
				+        readGrammaInfo(bufferedReader);

			
 
				+        bufferedReader.close();

			
 
				+    }

			
 
				+

			
 
				+    private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {

			
 
				+        String s;

			
 
				+        Integer amount;

			
 
				+        s = bufferedReader.readLine();

			
 
				+        amount = Integer.valueOf(s);

			
 
				+        grammarInfo = new String[amount];

			
 
				+        for (int i = 0; i < amount; i++) {

			
 
				+            grammarInfo[i] = bufferedReader.readLine();

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    protected void readRules(BufferedReader bufferedReader) throws IOException {

			
 
				+        String s;

			
 
				+        Integer amount;

			
 
				+        s = bufferedReader.readLine();

			
 
				+        amount = Integer.valueOf(s);

			
 
				+        rules = new Heuristic[amount][];

			
 
				+        for (int i = 0; i < amount; i++) {

			
 
				+            String s1 = bufferedReader.readLine();

			
 
				+            Integer ruleLength = Integer.valueOf(s1);

			
 
				+            rules[i] = new Heuristic[ruleLength];

			
 
				+            for (int j = 0; j < ruleLength; j++) {

			
 
				+                rules[i][j] = new Heuristic(bufferedReader.readLine());

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    private void readRulesId(BufferedReader bufferedReader, Integer amount) throws IOException {

			
 
				+        rulesId = new short[amount];

			
 
				+        for (int i = 0; i < amount; i++) {

			
 
				+            String s1 = bufferedReader.readLine();

			
 
				+            rulesId[i] = Short.valueOf(s1);

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {

			
 
				+        separators = new int[amount][];

			
 
				+        for (int i = 0; i < amount; i++) {

			
 
				+            String s1 = bufferedReader.readLine();

			
 
				+            Integer wordLenght = Integer.valueOf(s1);

			
 
				+            separators[i] = new int[wordLenght];

			
 
				+            for (int j = 0; j < wordLenght; j++) {

			
 
				+                separators[i][j] = Integer.valueOf(bufferedReader.readLine());

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    protected String revertWord(String s) {

			
 
				+        StringBuilder result = new StringBuilder();

			
 
				+        for (int i = 1; i <= s.length(); i++) {

			
 
				+            result.append(s.charAt(s.length() - i));

			
 
				+        }

			
 
				+        return result.toString();

			
 
				+    }

			
 
				+}

			
--- a/morph/src/main/java/org/apache/lucene/morphology/SuffixToLongException.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/SuffixToLongException.java
@@ -0,0 +1,28 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+

			
 
				+public class SuffixToLongException extends RuntimeException {

			
 
				+

			
 
				+    public SuffixToLongException() {

			
 
				+    }

			
 
				+

			
 
				+    public SuffixToLongException(String message) {

			
 
				+        super(message);

			
 
				+    }

			
 
				+}

			
--- a/morph/src/main/java/org/apache/lucene/morphology/WrongCharaterException.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/WrongCharaterException.java
@@ -0,0 +1,27 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology;

			
 
				+

			
 
				+

			
 
				+public class WrongCharaterException extends RuntimeException {

			
 
				+    public WrongCharaterException() {

			
 
				+    }

			
 
				+

			
 
				+    public WrongCharaterException(String message) {

			
 
				+        super(message);

			
 
				+    }

			
 
				+}

			
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
@@ -0,0 +1,78 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.analyzer;

			
 
				+

			
 
				+import org.apache.lucene.analysis.Analyzer;

			
 
				+import org.apache.lucene.analysis.LowerCaseFilter;

			
 
				+import org.apache.lucene.analysis.TokenFilter;

			
 
				+import org.apache.lucene.analysis.payloads.PayloadEncoder;

			
 
				+import org.apache.lucene.analysis.payloads.PayloadHelper;

			
 
				+import org.apache.lucene.analysis.standard.StandardTokenizer;

			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;

			
 
				+import org.apache.lucene.morphology.LuceneMorphology;

			
 
				+import org.apache.lucene.util.BytesRef;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.io.InputStream;

			
 
				+

			
 
				+

			
 
				+public class MorphologyAnalyzer extends Analyzer {

			
 
				+    private LuceneMorphology luceneMorph;

			
 
				+

			
 
				+    public MorphologyAnalyzer(LuceneMorphology luceneMorph) {

			
 
				+        this.luceneMorph = luceneMorph;

			
 
				+    }

			
 
				+

			
 
				+    public MorphologyAnalyzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {

			
 
				+        luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);

			
 
				+    }

			
 
				+

			
 
				+    public MorphologyAnalyzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {

			
 
				+        luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);

			
 
				+    }

			
 
				+

			
 
				+

			
 
				+    @Override

			
 
				+    protected TokenStreamComponents createComponents(String s) {

			
 
				+

			
 
				+        StandardTokenizer src = new StandardTokenizer();

			
 
				+        final PayloadEncoder encoder = new PayloadEncoder() {

			
 
				+            @Override

			
 
				+            public BytesRef encode(char[] buffer) {

			
 
				+                final Float payload = Float.valueOf(new String(buffer));

			
 
				+                System.out.println(payload);

			
 
				+                final byte[] bytes = PayloadHelper.encodeFloat(payload);

			
 
				+                return new BytesRef(bytes, 0, bytes.length);

			
 
				+            }

			
 
				+

			
 
				+            @Override

			
 
				+            public BytesRef encode(char[] buffer, int offset, int length) {

			
 
				+

			
 
				+                final Float payload = Float.valueOf(new String(buffer, offset, length));

			
 
				+                System.out.println(payload);

			
 
				+                final byte[] bytes = PayloadHelper.encodeFloat(payload);

			
 
				+

			
 
				+                return new BytesRef(bytes, 0, bytes.length);

			
 
				+            }

			
 
				+        };

			
 
				+

			
 
				+        TokenFilter filter = new LowerCaseFilter(src);

			
 
				+        filter = new MorphologyFilter(filter, luceneMorph);

			
 
				+

			
 
				+        return new TokenStreamComponents(r -> src.setReader(r), filter);

			
 
				+    }

			
 
				+}

			
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
@@ -0,0 +1,87 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.analyzer;

			
 
				+

			
 
				+import org.apache.lucene.analysis.TokenFilter;

			
 
				+import org.apache.lucene.analysis.TokenStream;

			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

			
 
				+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;

			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

			
 
				+import org.apache.lucene.morphology.LuceneMorphology;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+import java.util.Iterator;

			
 
				+import java.util.List;

			
 
				+

			
 
				+

			
 
				+public class MorphologyFilter extends TokenFilter {

			
 
				+    private LuceneMorphology luceneMorph;

			
 
				+    private Iterator<String> iterator;

			
 
				+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

			
 
				+    private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);

			
 
				+    private final PositionIncrementAttribute position = addAttribute(PositionIncrementAttribute.class);

			
 
				+    private State state = null;

			
 
				+

			
 
				+    public MorphologyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {

			
 
				+        super(tokenStream);

			
 
				+        this.luceneMorph = luceneMorph;

			
 
				+    }

			
 
				+

			
 
				+

			
 
				+    final public boolean incrementToken() throws IOException {

			
 
				+        if (iterator != null) {

			
 
				+            if (iterator.hasNext()) {

			
 
				+                restoreState(state);

			
 
				+                position.setPositionIncrement(0);

			
 
				+                termAtt.setEmpty().append(iterator.next());

			
 
				+                return true;

			
 
				+            } else {

			
 
				+                state = null;

			
 
				+                iterator = null;

			
 
				+            }

			
 
				+        }

			
 
				+        while (true) {

			
 
				+            boolean b = input.incrementToken();

			
 
				+            if (!b) {

			
 
				+                return false;

			
 
				+            }

			
 
				+            if (!keywordAttr.isKeyword() && termAtt.length() > 0) {

			
 
				+                String s = new String(termAtt.buffer(), 0, termAtt.length());

			
 
				+                if (luceneMorph.checkString(s)) {

			
 
				+                    List<String> forms = luceneMorph.getNormalForms(s);

			
 
				+                    if (forms.isEmpty()) {

			
 
				+                        continue;

			
 
				+                    } else if (forms.size() == 1) {

			
 
				+                        termAtt.setEmpty().append(forms.get(0));

			
 
				+                    } else {

			
 
				+                        state = captureState();

			
 
				+                        iterator = forms.iterator();

			
 
				+                        termAtt.setEmpty().append(iterator.next());

			
 
				+                    }

			
 
				+                }

			
 
				+            }

			
 
				+            return true;

			
 
				+        }

			
 
				+    }

			
 
				+

			
 
				+    @Override

			
 
				+    public void reset() throws IOException {

			
 
				+        super.reset();

			
 
				+        state = null;

			
 
				+        iterator = null;

			
 
				+    }

			
 
				+}

			
--- a/pom.xml
+++ b/pom.xml
@@ -0,0 +1,183 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <groupId>org.apache.lucene.morphology</groupId>
			
 
				+    <artifactId>morphology</artifactId>
			
 
				+    <packaging>pom</packaging>
			
 
				+    <version>1.5</version>
			
 
				+    <name>morphology</name>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+
			
 
				+    <scm>
			
 
				+        <connection>scm:git:https://github.com/AKuznetsov/russianmorphology.git</connection>
			
 
				+        <developerConnection>scm:git:git@github.com:AKuznetsov/russianmorphology.git</developerConnection>
			
 
				+        <url>https://github.com/AKuznetsov/russianmorphology</url>
			
 
				+        <tag>HEAD</tag>
			
 
				+    </scm>
			
 
				+
			
 
				+    <properties>
			
 
				+        <lucene.version>8.0.0</lucene.version>
			
 
				+        <morphology.version>1.5</morphology.version>
			
 
				+        <junit.version>4.12</junit.version>
			
 
				+    </properties>
			
 
				+
			
 
				+    <distributionManagement>
			
 
				+        <repository>
			
 
				+            <id>bintray</id>
			
 
				+            <url>https://api.bintray.com/maven/akuznetsov/russianmorphology/morphology</url>
			
 
				+        </repository>
			
 
				+    </distributionManagement>
			
 
				+
			
 
				+    <licenses>
			
 
				+        <license>
			
 
				+            <name>Apache License, Version 2.0</name>
			
 
				+            <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
			
 
				+            <distribution>repo</distribution>
			
 
				+        </license>
			
 
				+    </licenses>
			
 
				+
			
 
				+    <dependencies>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene</groupId>
			
 
				+            <artifactId>lucene-test-framework</artifactId>
			
 
				+            <version>${lucene.version}</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.hamcrest</groupId>
			
 
				+            <artifactId>hamcrest-all</artifactId>
			
 
				+            <version>1.1</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene</groupId>
			
 
				+            <artifactId>lucene-core</artifactId>
			
 
				+            <version>${lucene.version}</version>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene</groupId>
			
 
				+            <artifactId>lucene-analyzers-common</artifactId>
			
 
				+            <version>${lucene.version}</version>
			
 
				+        </dependency>
			
 
				+    </dependencies>
			
 
				+
			
 
				+    <repositories>
			
 
				+        <repository>
			
 
				+            <id>maven2-repository.dev.java.net</id>
			
 
				+            <name>Java.net Repository for Maven</name>
			
 
				+            <url>http://download.java.net/maven/2/</url>
			
 
				+        </repository>
			
 
				+        <repository>
			
 
				+            <id>bintray</id>
			
 
				+            <url>http://dl.bintray.com/akuznetsov/russianmorphology</url>
			
 
				+            <releases>
			
 
				+                <enabled>true</enabled>
			
 
				+            </releases>
			
 
				+            <snapshots>
			
 
				+                <enabled>false</enabled>
			
 
				+            </snapshots>
			
 
				+        </repository>
			
 
				+    </repositories>
			
 
				+    
			
 
				+    <pluginRepositories>
			
 
				+        <pluginRepository>
			
 
				+            <id>mc-release</id>
			
 
				+            <name>maven-license-plugin repository of releases</name>
			
 
				+            <url>http://mc-repo.googlecode.com/svn/maven2/releases</url>
			
 
				+            <snapshots>
			
 
				+                <enabled>false</enabled>
			
 
				+            </snapshots>
			
 
				+            <releases>
			
 
				+                <enabled>true</enabled>
			
 
				+            </releases>
			
 
				+        </pluginRepository>
			
 
				+    </pluginRepositories>
			
 
				+    <build>
			
 
				+        <plugins>
			
 
				+            <plugin>
			
 
				+                <artifactId>maven-release-plugin</artifactId>
			
 
				+                <version>2.5.3</version>
			
 
				+                <configuration>
			
 
				+                    <useReleaseProfile>false</useReleaseProfile>
			
 
				+                    <releaseProfiles>release</releaseProfiles>
			
 
				+                    <autoVersionSubmodules>true</autoVersionSubmodules>
			
 
				+                </configuration>
			
 
				+            </plugin>
			
 
				+
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-compiler-plugin</artifactId>
			
 
				+                <version>3.7.0</version>
			
 
				+                <configuration>
			
 
				+                    <source>1.8</source>
			
 
				+                    <target>1.8</target>
			
 
				+                </configuration>
			
 
				+            </plugin>
			
 
				+            <plugin>                <!--                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                -->
			
 
				+                <artifactId>maven-license-plugin</artifactId>
			
 
				+                <groupId>com.google.code.maven-license-plugin</groupId>
			
 
				+                <version>1.4.0</version>
			
 
				+                <configuration>
			
 
				+                    <basedir>${project.parent.basedir}</basedir>
			
 
				+                    <header>etc/header.txt</header>
			
 
				+                    <excludes>
			
 
				+                        <exclude>**/*.txt</exclude>
			
 
				+                        <exclude>**/*.info</exclude>
			
 
				+                        <exclude>**/pom.xml</exclude>
			
 
				+                    </excludes>
			
 
				+                    <includes>
			
 
				+                        <include>**/src/**</include>
			
 
				+                    </includes>
			
 
				+                </configuration>
			
 
				+                <executions>
			
 
				+                    <execution>
			
 
				+                        <phase>test</phase>
			
 
				+                        <goals>
			
 
				+                            <goal>check</goal>
			
 
				+                        </goals>
			
 
				+                    </execution>
			
 
				+                </executions>
			
 
				+            </plugin>
			
 
				+        </plugins>
			
 
				+    </build>
			
 
				+    <profiles>
			
 
				+        <profile>
			
 
				+            <id>release</id>
			
 
				+            <build>
			
 
				+                <plugins>
			
 
				+                    <plugin>
			
 
				+                        <artifactId>maven-source-plugin</artifactId>
			
 
				+                        <version>3.0.1</version>
			
 
				+                        <executions>
			
 
				+                            <execution>
			
 
				+                                <id>attach-sources</id>
			
 
				+                                <goals>
			
 
				+                                    <goal>jar</goal>
			
 
				+                                </goals>
			
 
				+                            </execution>
			
 
				+                        </executions>
			
 
				+                    </plugin>
			
 
				+                    <plugin>
			
 
				+                        <artifactId>maven-javadoc-plugin</artifactId>
			
 
				+                        <version>2.10.4</version>
			
 
				+                        <executions>
			
 
				+                            <execution>
			
 
				+                                <id>attach-javadocs</id>
			
 
				+                                <goals>
			
 
				+                                    <goal>jar</goal>
			
 
				+                                </goals>
			
 
				+                            </execution>
			
 
				+                        </executions>
			
 
				+                    </plugin>
			
 
				+                </plugins>
			
 
				+            </build>
			
 
				+        </profile>
			
 
				+    </profiles>
			
 
				+    <modules>
			
 
				+        <module>morph</module>
			
 
				+        <module>dictionary-reader</module>
			
 
				+        <module>russian</module>
			
 
				+        <module>english</module>
			
 
				+        <module>solr-morphology-analysis</module>
			
 
				+    </modules>
			
 
				+</project>
			
--- a/russian/pom.xml
+++ b/russian/pom.xml
@@ -0,0 +1,31 @@
 
				+<?xml version="1.0"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+    <parent>
			
 
				+        <artifactId>morphology</artifactId>
			
 
				+        <groupId>org.apache.lucene.morphology</groupId>
			
 
				+        <version>1.5</version>
			
 
				+    </parent>
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+    <groupId>org.apache.lucene.morphology</groupId>
			
 
				+    <artifactId>russian</artifactId>
			
 
				+    <name>russian</name>
			
 
				+    <version>1.5</version>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+    <dependencies>
			
 
				+
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>morph</artifactId>
			
 
				+            <version>1.5</version>
			
 
				+        </dependency>
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>junit</groupId>
			
 
				+            <artifactId>junit</artifactId>
			
 
				+            <version>4.8.2</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+
			
 
				+    </dependencies>
			
 
				+</project>
			
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java
@@ -0,0 +1,27 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+package org.apache.lucene.morphology.russian;

			
 
				+

			
 
				+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;

			
 
				+

			
 
				+import java.io.IOException;

			
 
				+

			
 
				+

			
 
				+public class RussianAnalyzer extends MorphologyAnalyzer {

			
 
				+    public RussianAnalyzer() throws IOException {

			
 
				+        super(new RussianLuceneMorphology());

			
 
				+    }

			
 
				+}

			
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
@@ -0,0 +1,123 @@
 
				+/**

			
 
				+ * Copyright 2009 Alexander Kuznetsov 

			
 
				+ *

			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");

			
 
				+ * you may not use this file except in compliance with the License.

			
 
				+ * You may obtain a copy of the License at

			
 
				+ *

			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0

			
 
				+ *

			
 
				+ * Unless required by applicable law or agreed to in writing, software

			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,

			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

			
 
				+ * See the License for the specific language governing permissions and

			
 
				+ * limitations under the License.

			
 
				+ */

			
 
				+

			
 
				+package org.apache.lucene.morphology.russian;

			
 
				+

			
 
				+import org.apache.lucene.morphology.LetterDecoderEncoder;

			
 
				+import org.apache.lucene.morphology.SuffixToLongException;

			
 
				+import org.apache.lucene.morphology.WrongCharaterException;

			
 
				+

			
 
				+import java.util.ArrayList;

			
 
				+import java.util.LinkedList;

			
 
				+

			
 
				+/**

			
 
				+ * This helper class allow encode suffix of russian word

			
 
				+ * to long value and decode from it.

			
 
				+ * Assumed that suffix contains only small russian letters and dash.

			
 
				+ * Also assumed that letter � and � coinsed.

			
 
				+ */

			
 
				+public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {

			
 
				+    public static final int RUSSIAN_SMALL_LETTER_OFFSET = 1071;

			
 
				+    public static final int WORD_PART_LENGHT = 6;

			
 
				+    public static final int EE_CHAR = 34;

			
 
				+    public static final int E_CHAR = 6;

			
 
				+    public static final int DASH_CHAR = 45;

			
 
				+    public static final int DASH_CODE = 33;

			
 
				+

			
 
				+    public Integer encode(String string) {

			
 
				+        if (string.length() > WORD_PART_LENGHT)

			
 
				+            throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);

			
 
				+        int result = 0;

			
 
				+        for (int i = 0; i < string.length(); i++) {

			
 
				+            int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;

			
 
				+            if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {

			
 
				+                c = DASH_CODE;

			
 
				+            }

			
 
				+            if (c == EE_CHAR) c = E_CHAR;

			
 
				+            if (c < 0 || c > 33)

			
 
				+                throw new WrongCharaterException("Symbol " + string.charAt(i) + " is not small cirillic letter");

			
 
				+            result = result * 34 + c;

			
 
				+        }

			
 
				+        for (int i = string.length(); i < WORD_PART_LENGHT; i++) {

			
 
				+            result *= 34;

			
 
				+        }

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+    public int[] encodeToArray(String s) {

			
 
				+        LinkedList<Integer> integers = new LinkedList<Integer>();

			
 
				+        while (s.length() > WORD_PART_LENGHT) {

			
 
				+            integers.add(encode(s.substring(0, WORD_PART_LENGHT)));

			
 
				+            s = s.substring(WORD_PART_LENGHT);

			
 
				+        }

			
 
				+        integers.add(encode(s));

			
 
				+        int[] ints = new int[integers.size()];

			
 
				+        int pos = 0;

			
 
				+        for (Integer i : integers) {

			
 
				+            ints[pos] = i;

			
 
				+            pos++;

			
 
				+        }

			
 
				+        return ints;

			
 
				+    }

			
 
				+

			
 
				+    public String decodeArray(int[] array) {

			
 
				+        String result = "";

			
 
				+        for (int i : array) {

			
 
				+            result += decode(i);

			
 
				+        }

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+

			
 
				+    public String decode(Integer suffixN) {

			
 
				+        String result = "";

			
 
				+        while (suffixN > 33) {

			
 
				+            int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;

			
 
				+            if (c == RUSSIAN_SMALL_LETTER_OFFSET) {

			
 
				+                suffixN /= 34;

			
 
				+                continue;

			
 
				+            }

			
 
				+            if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;

			
 
				+            result = (char) c + result;

			
 
				+            suffixN /= 34;

			
 
				+        }

			
 
				+        long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;

			
 
				+        if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;

			
 
				+        result = (char) c + result;

			
 
				+        return result;

			
 
				+    }

			
 
				+

			
 
				+    public boolean checkCharacter(char c) {

			
 
				+        int code = 0 + c;

			
 
				+        if (code == 45) return true;

			
 
				+        code -= RUSSIAN_SMALL_LETTER_OFFSET;

			
 
				+        if (code > 0 && code < 33) return true;

			
 
				+        return false;

			
 
				+    }

			
 
				+

			
 
				+    public boolean checkString(String word) {

			
 
				+        for (int i = 0; i < word.length(); i++) {

			
 
				+            if (!checkCharacter(word.charAt(i))) {

			
 
				+                return false;

			
 
				+            }

			
 
				+        }

			
 
				+        return true;

			
 
				+    }

			
 
				+

			
 
				+    public String cleanString(String s) {

			
 
				+        return s.replace((char) (EE_CHAR + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (E_CHAR + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));

			
 
				+    }

			
 
				+}

			
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLuceneMorphology.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLuceneMorphology.java
@@ -0,0 +1,27 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.russian;
			
 
				+
			
 
				+import org.apache.lucene.morphology.LuceneMorphology;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class RussianLuceneMorphology extends LuceneMorphology {
			
 
				+
			
 
				+    public RussianLuceneMorphology() throws IOException {
			
 
				+        super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
			
 
				+    }
			
 
				+}
			
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianMorphology.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianMorphology.java
@@ -0,0 +1,27 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.russian;
			
 
				+
			
 
				+import org.apache.lucene.morphology.MorphologyImpl;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class RussianMorphology extends MorphologyImpl {
			
 
				+
			
 
				+    public RussianMorphology() throws IOException {
			
 
				+        super(RussianMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
			
 
				+    }
			
 
				+}
			
--- a/russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info
+++ b/russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info
--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
@@ -0,0 +1,92 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.morphology.russian;
			
 
				+
			
 
				+import org.apache.lucene.morphology.SuffixToLongException;
			
 
				+import org.apache.lucene.morphology.WrongCharaterException;
			
 
				+import org.junit.Before;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.io.InputStreamReader;
			
 
				+
			
 
				+import static org.hamcrest.core.IsEqual.equalTo;
			
 
				+import static org.junit.Assert.assertThat;
			
 
				+
			
 
				+public class RussianLetterDecoderEncoderTest {
			
 
				+    private RussianLetterDecoderEncoder decoderEncoder;
			
 
				+
			
 
				+    @Before
			
 
				+    public void setUp() {
			
 
				+        decoderEncoder = new RussianLetterDecoderEncoder();
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Test
			
 
				+    public void testShouldPreserverStringComporision() throws IOException {
			
 
				+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
			
 
				+        String s = bufferedReader.readLine();
			
 
				+        while (s != null) {
			
 
				+            String[] qa = s.trim().split(" ");
			
 
				+            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
			
 
				+                assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
			
 
				+            }
			
 
				+            s = bufferedReader.readLine();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Test
			
 
				+    public void testShouldCorrectDecodeEncode() throws IOException {
			
 
				+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
			
 
				+        String s = bufferedReader.readLine();
			
 
				+        while (s != null) {
			
 
				+            String[] qa = s.trim().split(" ");
			
 
				+            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
			
 
				+                Integer encodedSuffix = decoderEncoder.encode(qa[0]);
			
 
				+                assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
			
 
				+            }
			
 
				+            s = bufferedReader.readLine();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
			
 
				+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
			
 
				+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
			
 
				+        String s = bufferedReader.readLine();
			
 
				+        while (s != null) {
			
 
				+            String[] qa = s.trim().split(" ");
			
 
				+            int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
			
 
				+            assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
			
 
				+            s = bufferedReader.readLine();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Test(expected = SuffixToLongException.class)
			
 
				+    public void shouldThrownExeptionIfSuffixToLong() {
			
 
				+        decoderEncoder.encode("1234567890123");
			
 
				+    }
			
 
				+
			
 
				+    @Test(expected = WrongCharaterException.class)
			
 
				+    public void shouldThrownExceptionIfSuffixContainWrongCharater() {
			
 
				+        decoderEncoder.encode("1");
			
 
				+    }
			
 
				+}
			
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt
@@ -0,0 +1,15 @@
 
				+тест тест

			
 
				+ёж еж

			
 
				+естера естера

			
 
				+что-то что-то

			
 
				+а а

			
 
				+яяяяяя яяяяяя

			
 
				+яяяя яяяя

			
 
				+аа аа

			
 
				+аааааа аааааа

			
 
				+аааааааааааа аааааааааааа

			
 
				+аааааааааааааааааа аааааааааааааааааа

			
 
				+ааааааааааааааааа ааааааааааааааааа

			
 
				+йфячыцувс йфячыцувс

			
 
				+ёёё еее

			
 
				+ёёёе ееее
			
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data.txt
@@ -0,0 +1,10 @@
 
				+яяя яяя

			
 
				+юяю юяю

			
 
				+тест тест

			
 
				+ёж еж

			
 
				+естера естера

			
 
				+что-то что-то

			
 
				+а а

			
 
				+яяяяяя яяяяяя

			
 
				+яяяя яяяя

			
 
				+аа аа
			
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt
@@ -0,0 +1,7 @@
 
				+а аа
			
 
				+ааа ббб
			
 
				+ммм нннн
			
 
				+ммм ммн
			
 
				+аа ба
			
 
				+ииа к
			
 
				+удд уде
			
--- a/solr-morphology-analysis/pom.xml
+++ b/solr-morphology-analysis/pom.xml
@@ -0,0 +1,40 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0"
			
 
				+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
			
 
				+    <parent>
			
 
				+        <artifactId>morphology</artifactId>
			
 
				+        <groupId>org.apache.lucene.morphology</groupId>
			
 
				+        <version>1.5</version>
			
 
				+    </parent>
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+
			
 
				+    <groupId>org.apache.lucene.analysis</groupId>
			
 
				+    <artifactId>morphology</artifactId>
			
 
				+    <name>solr-morphology-analysis</name>
			
 
				+    <version>${morphology.version}</version>
			
 
				+    <url>http://maven.apache.org</url>
			
 
				+
			
 
				+    <dependencies>
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>russian</artifactId>
			
 
				+            <version>${morphology.version}</version>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.lucene.morphology</groupId>
			
 
				+            <artifactId>english</artifactId>
			
 
				+            <version>${morphology.version}</version>
			
 
				+        </dependency>
			
 
				+
			
 
				+        <dependency>
			
 
				+            <groupId>junit</groupId>
			
 
				+            <artifactId>junit</artifactId>
			
 
				+            <version>${junit.version}</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+
			
 
				+    </dependencies>
			
 
				+
			
 
				+</project>
			
--- a/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java
+++ b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java
@@ -0,0 +1,69 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.analysis.morphology;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.util.ResourceLoader;
			
 
				+import org.apache.lucene.analysis.util.ResourceLoaderAware;
			
 
				+import org.apache.lucene.analysis.util.TokenFilterFactory;
			
 
				+import org.apache.lucene.morphology.LuceneMorphology;
			
 
				+import org.apache.lucene.morphology.analyzer.MorphologyFilter;
			
 
				+
			
 
				+import java.util.Map;
			
 
				+
			
 
				+/**
			
 
				+ * Factory for {@link MorphologyFilter}, with configurable language
			
 
				+ * <p>
			
 
				+ * <b>Note:</b> Two languages are available now: English (default value) and Russian.
			
 
				+ * <pre class="prettyprint">
			
 
				+ * &lt;fieldType name="content" class="solr.TextField" positionIncrementGap="100"&gt;
			
 
				+ *   &lt;analyzer&gt;
			
 
				+ *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
			
 
				+ *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
			
 
				+ *     &lt;filter class="solr.MorphologyFilterFactory" language="English"/&gt;
			
 
				+ *   &lt;/analyzer&gt;
			
 
				+ * &lt;/fieldType&gt;</pre>
			
 
				+ */
			
 
				+public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{
			
 
				+
			
 
				+    private static final String LANGUAGE_KEY = "language";
			
 
				+
			
 
				+    private String language;
			
 
				+    private LuceneMorphology luceneMorphology;
			
 
				+
			
 
				+    public MorphologyFilterFactory(Map<String, String> args) {
			
 
				+        super(args);
			
 
				+
			
 
				+        language = get(args, LANGUAGE_KEY, "English");
			
 
				+        if (!args.isEmpty()) {
			
 
				+            throw new IllegalArgumentException("Unknown parameters: " + args);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public TokenStream create(TokenStream input) {
			
 
				+        return new MorphologyFilter(input, luceneMorphology);
			
 
				+    }
			
 
				+
			
 
				+    public void inform(ResourceLoader loader) {
			
 
				+
			
 
				+        String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology";
			
 
				+        luceneMorphology = loader.newInstance(className, LuceneMorphology.class);
			
 
				+    }
			
 
				+
			
 
				+    public LuceneMorphology getLuceneMorphology() {
			
 
				+        return luceneMorphology;
			
 
				+    }
			
 
				+}
			
--- a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
+++ b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
@@ -0,0 +1,75 @@
 
				+/**
			
 
				+ * Copyright 2009 Alexander Kuznetsov
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.lucene.analysis.morphology;
			
 
				+
			
 
				+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
			
 
				+import org.apache.lucene.analysis.util.ResourceLoader;
			
 
				+import org.apache.lucene.morphology.LuceneMorphology;
			
 
				+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
			
 
				+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Before;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+public class MorphologyFilterFactoryTest {
			
 
				+
			
 
				+    private static final String LANGUAGE_KEY = "language";
			
 
				+    private ResourceLoader loader = new ClasspathResourceLoader();
			
 
				+    private Map<String, String> args;
			
 
				+
			
 
				+    @Before
			
 
				+    public void setUp(){
			
 
				+        args = new HashMap<>();
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){
			
 
				+
			
 
				+        args.put(LANGUAGE_KEY, "Russian");
			
 
				+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
			
 
				+        morphologyFilterFactory.inform(loader);
			
 
				+
			
 
				+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
			
 
				+
			
 
				+        Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology);
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){
			
 
				+
			
 
				+        args.put(LANGUAGE_KEY, "English");
			
 
				+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
			
 
				+        morphologyFilterFactory.inform(loader);
			
 
				+
			
 
				+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
			
 
				+
			
 
				+        Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology);
			
 
				+    }
			
 
				+
			
 
				+    @Test
			
 
				+    public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){
			
 
				+
			
 
				+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
			
 
				+        morphologyFilterFactory.inform(loader);
			
 
				+
			
 
				+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
			
 
				+
			
 
				+        Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology);
			
 
				+    }
			
 
				+}
		`@@ -0,0 +1 @@`
		`+following follow the instruction exactly will be help ensure the best well good result`
		`@@ -0,0 +1 @@`
		`+Following the instructions exactly will help ensure the best results`