Browse Source

Initial commit

Oleg Kulikov 4 years ago
parent
commit
e820682a65
83 changed files with 2547942 additions and 0 deletions
  1. 31 0
      dictionary-reader/pom.xml
  2. 137 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
  3. 91 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
  4. 72 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
  5. 44 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
  6. 61 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
  7. 154 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
  8. 86 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
  9. 53 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
  10. 37 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordFilter.java
  11. 27 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
  12. 52 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
  13. 42 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
  14. 42 0
      dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
  15. 1 0
      dictionary-reader/src/main/resources/russian-adv-main-code.txt
  16. 150 0
      dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
  17. 163 0
      dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
  18. 66 0
      dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
  19. 1 0
      dictionary-reader/src/test/resources/english/english-analyzer-answer.txt
  20. 1 0
      dictionary-reader/src/test/resources/english/english-analyzer-data.txt
  21. 8 0
      dictionary-reader/src/test/resources/english/english-morphology-test.txt
  22. 1 0
      dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt
  23. 1 0
      dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt
  24. 20 0
      dictionary-reader/src/test/resources/russian/russian-morphology-test.txt
  25. 153 0
      dictonary/Dicts/GraphAn/abbr.eng
  26. 3555 0
      dictonary/Dicts/GraphAn/abbr.ger
  27. 34 0
      dictonary/Dicts/GraphAn/abbr.rus
  28. 1708 0
      dictonary/Dicts/GraphAn/abbrev.log
  29. 2185 0
      dictonary/Dicts/GraphAn/enames.lem
  30. 9877 0
      dictonary/Dicts/GraphAn/enames.txt
  31. 36 0
      dictonary/Dicts/GraphAn/extensions.txt
  32. 28 0
      dictonary/Dicts/GraphAn/idents.txt
  33. 29 0
      dictonary/Dicts/GraphAn/keyboard.txt
  34. 651 0
      dictonary/Dicts/GraphAn/keyword
  35. 402 0
      dictonary/Dicts/GraphAn/ross.txt
  36. 15 0
      dictonary/Dicts/GraphAn/space.dic
  37. 0 0
      dictonary/Dicts/Morph/Eng/morph.options
  38. 0 0
      dictonary/Dicts/Morph/Rus/morph.options
  39. 123 0
      dictonary/Dicts/Morph/egramtab.tab
  40. 878 0
      dictonary/Dicts/Morph/rgramtab.tab
  41. 3 0
      dictonary/Dicts/SrcMorph/Eng.mwz
  42. 105124 0
      dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd
  43. 3 0
      dictonary/Dicts/SrcMorph/Rus.mwz
  44. 179826 0
      dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd
  45. 258 0
      dictonary/Docs/Morph_UNIX.txt
  46. 144 0
      dictonary/copying
  47. 23 0
      english/pom.xml
  48. 29 0
      english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java
  49. 115 0
      english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
  50. 28 0
      english/src/main/java/org/apache/lucene/morphology/english/EnglishLuceneMorphology.java
  51. 28 0
      english/src/main/java/org/apache/lucene/morphology/english/EnglishMorphology.java
  52. 45 0
      english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java
  53. 48 0
      english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerFilter.java
  54. 0 0
      english/src/main/resources/org/apache/lucene/morphology/english/exceptions.txt
  55. 208467 0
      english/src/main/resources/org/apache/lucene/morphology/english/morph.info
  56. 40 0
      english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
  57. 49 0
      english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
  58. 13 0
      etc/header.txt
  59. 15 0
      morph/pom.xml
  60. 55 0
      morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
  61. 92 0
      morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
  62. 33 0
      morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
  63. 70 0
      morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
  64. 27 0
      morph/src/main/java/org/apache/lucene/morphology/Morphology.java
  65. 208 0
      morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
  66. 28 0
      morph/src/main/java/org/apache/lucene/morphology/SuffixToLongException.java
  67. 27 0
      morph/src/main/java/org/apache/lucene/morphology/WrongCharaterException.java
  68. 78 0
      morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
  69. 87 0
      morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
  70. 183 0
      pom.xml
  71. 31 0
      russian/pom.xml
  72. 27 0
      russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java
  73. 123 0
      russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
  74. 27 0
      russian/src/main/java/org/apache/lucene/morphology/russian/RussianLuceneMorphology.java
  75. 27 0
      russian/src/main/java/org/apache/lucene/morphology/russian/RussianMorphology.java
  76. 2031238 0
      russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info
  77. 92 0
      russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
  78. 15 0
      russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt
  79. 10 0
      russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data.txt
  80. 7 0
      russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt
  81. 40 0
      solr-morphology-analysis/pom.xml
  82. 69 0
      solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java
  83. 75 0
      solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java

+ 31 - 0
dictionary-reader/pom.xml

@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <parent>
+        <artifactId>morphology</artifactId>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <version>1.5</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>dictionary-reader</artifactId>
+    <name>dictionary-reader</name>
+    <version>1.5</version>
+    <url>http://maven.apache.org</url>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>russian</artifactId>
+            <version>1.5</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>english</artifactId>
+            <version>1.5</version>
+        </dependency>
+    </dependencies>
+
+
+</project>

+ 137 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java

@@ -0,0 +1,137 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+
+/**
+ * This class contain logic how read
+ * dictonary and produce word with it all forms.
+ */
+public class DictionaryReader {
+    private String fileName;
+    private String fileEncoding = "windows-1251";
+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
+    private Set<String> ignoredForm = new HashSet<String>();
+
+    public DictionaryReader(String fileName, Set<String> ignoredForm) {
+        this.fileName = fileName;
+        this.ignoredForm = ignoredForm;
+    }
+
+
+    public void process(WordProcessor wordProcessor) throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
+        readFlexias(bufferedReader);
+        skipBlock(bufferedReader);
+        skipBlock(bufferedReader);
+        readPrefix(bufferedReader);
+        readWords(bufferedReader, wordProcessor);
+    }
+
+
+    private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        int actual = 0;
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+            if (i % 10000 == 0) System.out.println("Proccess " + i + " wordBase of " + count);
+
+            WordCard card = buildForm(s);
+
+            if (card == null) {
+                continue;
+            }
+
+            wordProcessor.process(card);
+            actual++;
+
+        }
+        System.out.println("Finished word processing actual words " + actual);
+    }
+
+    private WordCard buildForm(String s) {
+        String[] wd = s.split(" ");
+        String wordBase = wd[0].toLowerCase();
+        if (wordBase.startsWith("-")) return null;
+        wordBase = "#".equals(wordBase) ? "" : wordBase;
+        List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
+        FlexiaModel flexiaModel = models.get(0);
+        if (models.size() == 0 || ignoredForm.contains(flexiaModel.getCode())) {
+            return null;
+        }
+
+        WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
+
+        for (FlexiaModel fm : models) {
+            card.addFlexia(fm);
+        }
+        return card;
+    }
+
+
+    private void skipBlock(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            reader.readLine();
+        }
+    }
+
+
+    private void readPrefix(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            reader.readLine();
+        }
+    }
+
+    private void readFlexias(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+            ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>();
+            wordsFlexias.add(flexiaModelArrayList);
+            for (String line : s.split("%")) {
+                addFlexia(flexiaModelArrayList, line);
+            }
+        }
+    }
+
+    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
+        String[] fl = line.split("\\*");
+        // we inored all forms thats
+        if (fl.length == 3) {
+            //System.out.println(line);
+            flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
+        }
+        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
+    }
+
+}

+ 91 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java

@@ -0,0 +1,91 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+/**
+ * Represent information of how word form created form it imutible part.
+ */
+public class FlexiaModel {
+    private String code;
+    private String suffix;
+    private String prefix;
+
+    public FlexiaModel(String code, String suffix, String prefix) {
+        this.code = code;
+        this.suffix = suffix;
+        this.prefix = prefix;
+    }
+
+    public String getCode() {
+        return code;
+    }
+
+    public void setCode(String code) {
+        this.code = code;
+    }
+
+    public String getSuffix() {
+        return suffix;
+    }
+
+    public void setSuffix(String suffix) {
+        this.suffix = suffix;
+    }
+
+    public String getPrefix() {
+        return prefix;
+    }
+
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    public String create(String s) {
+        return prefix + s + suffix;
+    }
+
+    @Override
+    public String toString() {
+        return "FlexiaModel{" +
+                "code='" + code + '\'' +
+                ", suffix='" + suffix + '\'' +
+                ", prefix='" + prefix + '\'' +
+                '}';
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        FlexiaModel that = (FlexiaModel) o;
+
+        if (code != null ? !code.equals(that.code) : that.code != null) return false;
+        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
+        if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = code != null ? code.hashCode() : 0;
+        result = 31 * result + (suffix != null ? suffix.hashCode() : 0);
+        result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
+        return result;
+    }
+}

+ 72 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java

@@ -0,0 +1,72 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+public class GrammarReader {
+    private String fileName;
+    private String fileEncoding = "windows-1251";
+    private List<String> grammarInfo = new ArrayList<String>();
+    private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();
+
+    public GrammarReader(String fileName) throws IOException {
+        this.fileName = fileName;
+        setUp();
+    }
+
+    public GrammarReader(String fileName, String fileEncoding) throws IOException {
+        this.fileName = fileName;
+        this.fileEncoding = fileEncoding;
+        setUp();
+    }
+
+    private void setUp() throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
+        String line = bufferedReader.readLine();
+        while (line != null) {
+            line = line.trim();
+            if (!line.startsWith("//") && line.length() > 0) {
+                String[] strings = line.split(" ", 2);
+                Integer i = grammarInfo.size();
+                inverseIndex.put(strings[0], i);
+                grammarInfo.add(i, strings[1]);
+            }
+            line = bufferedReader.readLine();
+        }
+    }
+
+    public List<String> getGrammarInfo() {
+        return grammarInfo;
+    }
+
+    public String[] getGrammarInfoAsArray() {
+        return grammarInfo.toArray(new String[grammarInfo.size()]);
+    }
+
+    public Map<String, Integer> getGrammarInverseIndex() {
+        return inverseIndex;
+    }
+}

+ 44 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java

@@ -0,0 +1,44 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.dictionary;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+
+public class RemoveFlexiaWithPrefixes extends WordFilter {
+
+    public RemoveFlexiaWithPrefixes(WordProcessor wordProcessor) {
+        super(wordProcessor);
+    }
+
+    @Override
+    public List<WordCard> transform(WordCard wordCard) {
+
+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();
+        for (FlexiaModel fm : wordCard.getWordsForms()) {
+            if (fm.getPrefix().length() > 0) {
+                flexiaModelsToRemove.add(fm);
+            }
+        }
+        for (FlexiaModel fm : flexiaModelsToRemove) {
+            wordCard.removeFlexia(fm);
+        }
+
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
+    }
+}

+ 61 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java

@@ -0,0 +1,61 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.dictionary;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.LinkedList;
+import java.util.List;
+
+
+public class RussianAdvSplitterFilter extends WordFilter {
+    private String code;
+
+    public RussianAdvSplitterFilter(WordProcessor wordProcessor) throws IOException {
+        super(wordProcessor);
+        code = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream("/russian-adv-main-code.txt"), "windows-1251")).readLine();
+    }
+
+    @Override
+    public List<WordCard> transform(WordCard wordCard) {
+        LinkedList<WordCard> result = new LinkedList<WordCard>();
+        result.add(wordCard);
+
+        String baseWord = "";
+        String canonicalForm = "";
+        String canonicalSuffix = "";
+        List<FlexiaModel> flexiaModels = new LinkedList<FlexiaModel>();
+        for (FlexiaModel flexiaModel : wordCard.getWordsForms()) {
+            if (flexiaModel.getPrefix().length() > 0) {
+                flexiaModels.add(new FlexiaModel(flexiaModel.getCode(), flexiaModel.getSuffix(), ""));
+            }
+            if (flexiaModel.getPrefix().length() > 0 && flexiaModel.getCode().equals(code)) {
+                baseWord = flexiaModel.getPrefix() + wordCard.getBase();
+                canonicalForm = flexiaModel.getCode();
+                canonicalSuffix = flexiaModel.getSuffix();
+            }
+        }
+
+        if (baseWord.length() > 0) {
+            WordCard wc = new WordCard(canonicalForm, baseWord, canonicalSuffix);
+            wc.setWordsForms(flexiaModels);
+            result.add(wc);
+        }
+
+        return result;
+    }
+}

+ 154 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java

@@ -0,0 +1,154 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+
+import org.apache.lucene.morphology.Heuristic;
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+import org.apache.lucene.morphology.MorphologyImpl;
+
+import java.io.IOException;
+import java.util.*;
+
+
+//todo made refactoring this class
+public class StatisticsCollector implements WordProcessor {
+    private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<String, Set<Heuristic>>();
+    private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<Set<Heuristic>, Integer>();
+    private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
+    private GrammarReader grammarReader;
+    private LetterDecoderEncoder decoderEncoder;
+
+
+    public StatisticsCollector(GrammarReader grammarReader, LetterDecoderEncoder decoderEncoder) {
+        this.grammarReader = grammarReader;
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public void process(WordCard wordCard) throws IOException {
+        cleanWordCard(wordCard);
+        String normalStringMorph = wordCard.getWordsForms().get(0).getCode();
+
+        for (FlexiaModel fm : wordCard.getWordsForms()) {
+            Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
+            String form = revertWord(fm.create(wordCard.getBase()));
+            Set<Heuristic> suffixHeuristics = inverseIndex.get(form);
+            if (suffixHeuristics == null) {
+                suffixHeuristics = new HashSet<Heuristic>();
+                inverseIndex.put(form, suffixHeuristics);
+            }
+            suffixHeuristics.add(heuristic);
+        }
+    }
+
+    private void cleanWordCard(WordCard wordCard) {
+        wordCard.setBase(cleanString(wordCard.getBase()));
+        wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));
+        wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
+        List<FlexiaModel> models = wordCard.getWordsForms();
+        for (FlexiaModel m : models) {
+            m.setSuffix(cleanString(m.getSuffix()));
+            m.setPrefix(cleanString(m.getPrefix()));
+        }
+    }
+
+
+    public void saveHeuristic(String fileName) throws IOException {
+
+        Map<Integer, Integer> dist = new TreeMap<Integer, Integer>();
+        Set<Heuristic> prevSet = null;
+        int count = 0;
+        for (String key : inverseIndex.keySet()) {
+            Set<Heuristic> currentSet = inverseIndex.get(key);
+            if (!currentSet.equals(prevSet)) {
+                Integer d = dist.get(key.length());
+                dist.put(key.length(), 1 + (d == null ? 0 : d));
+                prevSet = currentSet;
+                count++;
+                if (!ruleInverseIndex.containsKey(currentSet)) {
+                    ruleInverseIndex.put(currentSet, rules.size());
+                    rules.add(currentSet);
+                }
+            }
+        }
+        System.out.println("Word with diffirent rules " + count);
+        System.out.println("All ivers words " + inverseIndex.size());
+        System.out.println(dist);
+        System.out.println("diffirent rule count " + ruleInverseIndex.size());
+        Heuristic[][] heuristics = new Heuristic[ruleInverseIndex.size()][];
+        int index = 0;
+        for (Set<Heuristic> hs : rules) {
+            heuristics[index] = new Heuristic[hs.size()];
+            int indexj = 0;
+            for (Heuristic h : hs) {
+                heuristics[index][indexj] = h;
+                indexj++;
+            }
+            index++;
+        }
+
+        int[][] ints = new int[count][];
+        short[] rulesId = new short[count];
+        count = 0;
+        prevSet = null;
+        for (String key : inverseIndex.keySet()) {
+            Set<Heuristic> currentSet = inverseIndex.get(key);
+            if (!currentSet.equals(prevSet)) {
+                int[] word = decoderEncoder.encodeToArray(key);
+                ints[count] = word;
+                rulesId[count] = (short) ruleInverseIndex.get(currentSet).intValue();
+                count++;
+                prevSet = currentSet;
+            }
+        }
+        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammarReader.getGrammarInfoAsArray());
+        morphology.writeToFile(fileName);
+    }
+
+    private String revertWord(String s) {
+        String result = "";
+        for (int i = 1; i <= s.length(); i++) {
+            result += s.charAt(s.length() - i);
+        }
+        return result;
+    }
+
+
+    private Heuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
+        String form = fm.create(wordBase);
+        String normalForm = wordBase + canonicalSuffix;
+        Integer length = getCommonLength(form, normalForm);
+        Integer actualSuffixLengh = form.length() - length;
+        String actualNormalSuffix = normalForm.substring(length);
+        Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
+        Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
+        return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
+    }
+
+    public static Integer getCommonLength(String s1, String s2) {
+        Integer length = Math.min(s1.length(), s2.length());
+        for (int i = 0; i < length; i++) {
+            if (s1.charAt(i) != s2.charAt(i)) return i;
+        }
+        return length;
+    }
+
+    private String cleanString(String s) {
+        return decoderEncoder.cleanString(s);
+    }
+
+}

+ 86 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java

@@ -0,0 +1,86 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Represent word and all it forms.
+ */
+public class WordCard {
+    private String canonicalForm;
+    private String base;
+    private String canonicalSuffix;
+    private List<FlexiaModel> wordsForms = new ArrayList<FlexiaModel>();
+
+    public WordCard(String canonicalForm, String base, String canonicalSuffix) {
+        this.canonicalForm = canonicalForm;
+        this.canonicalSuffix = canonicalSuffix;
+        this.base = base;
+    }
+
+    public void addFlexia(FlexiaModel flexiaModel) {
+        wordsForms.add(flexiaModel);
+    }
+
+    public void removeFlexia(FlexiaModel flexiaModel) {
+        wordsForms.remove(flexiaModel);
+    }
+
+    public String getCanonicalForm() {
+        return canonicalForm;
+    }
+
+    public String getCanonicalSuffix() {
+        return canonicalSuffix;
+    }
+
+    public String getBase() {
+        return base;
+    }
+
+    public List<FlexiaModel> getWordsForms() {
+        return wordsForms;
+    }
+
+    public void setCanonicalForm(String canonicalForm) {
+        this.canonicalForm = canonicalForm;
+    }
+
+    public void setBase(String base) {
+        this.base = base;
+    }
+
+    public void setCanonicalSuffix(String canonicalSuffix) {
+        this.canonicalSuffix = canonicalSuffix;
+    }
+
+    public void setWordsForms(List<FlexiaModel> wordsForms) {
+        this.wordsForms = wordsForms;
+    }
+
+    @Override
+    public String toString() {
+        return "WordCard{" +
+                "canonicalForm='" + canonicalForm + '\'' +
+                ", base='" + base + '\'' +
+                ", canonicalSuffix='" + canonicalSuffix + '\'' +
+                ", wordsForms=" + wordsForms +
+                '}';
+    }
+}

+ 53 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java

@@ -0,0 +1,53 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.dictionary;
+
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+
+public class WordCleaner extends WordFilter {
+
+    private LetterDecoderEncoder decoderEncoder;
+
+    public WordCleaner(LetterDecoderEncoder decoderEncoder, WordProcessor wordProcessor) {
+        super(wordProcessor);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public List<WordCard> transform(WordCard wordCard) {
+        String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+
+        if (word.contains("-")) return Collections.emptyList();
+        if (!decoderEncoder.checkString(word)) return Collections.emptyList();
+
+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();
+        for (FlexiaModel fm : wordCard.getWordsForms()) {
+            if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) {
+                flexiaModelsToRemove.add(fm);
+            }
+        }
+        for (FlexiaModel fm : flexiaModelsToRemove) {
+            wordCard.removeFlexia(fm);
+        }
+
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
+    }
+}

+ 37 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordFilter.java

@@ -0,0 +1,37 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+import java.io.IOException;
+import java.util.List;
+
+
+abstract public class WordFilter implements WordProcessor {
+    private WordProcessor wordProcessor;
+
+    public WordFilter(WordProcessor wordProcessor) {
+        this.wordProcessor = wordProcessor;
+    }
+
+    abstract public List<WordCard> transform(WordCard wordCard);
+
+    public void process(WordCard wordCard) throws IOException {
+        for (WordCard wc : transform(wordCard)) {
+            wordProcessor.process(wc);
+        }
+    }
+}

+ 27 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.dictionary;
+
+import java.io.IOException;
+
+/**
+ * Interface allows get information from
+ */
+public interface WordProcessor {
+
+    public void process(WordCard wordCard) throws IOException;
+}

+ 52 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java

@@ -0,0 +1,52 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.dictionary;
+
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
+
+
+public class WordStringCleaner extends WordFilter {
+
+    private LetterDecoderEncoder decoderEncoder;
+
+    public WordStringCleaner(LetterDecoderEncoder decoderEncoder, WordProcessor wordProcessor) {
+        super(wordProcessor);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public List<WordCard> transform(WordCard wordCard) {
+        wordCard.setBase(cleanString(wordCard.getBase()));
+        wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));
+        wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
+        List<FlexiaModel> models = wordCard.getWordsForms();
+        for (FlexiaModel m : models) {
+            m.setSuffix(cleanString(m.getSuffix()));
+            m.setPrefix(cleanString(m.getPrefix()));
+            //made correct code
+            m.setCode(m.getCode().substring(0, 2));
+        }
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
+    }
+
+
+    private String cleanString(String s) {
+        return decoderEncoder.cleanString(s);
+    }
+}

+ 42 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java

@@ -0,0 +1,42 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.generator;
+
+import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;
+import org.apache.lucene.morphology.dictionary.*;
+
+import java.io.IOException;
+import java.util.HashSet;
+
+
+public class EnglishHeuristicBuilder {
+    public static void main(String[] args) throws IOException {
+
+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");
+        EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
+
+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
+
+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);
+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);
+        dictionaryReader.process(removeFlexiaWithPrefixes);
+        statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
+
+    }
+}

+ 42 - 0
dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java

@@ -0,0 +1,42 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.generator;
+
+import org.apache.lucene.morphology.dictionary.*;
+import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
+
+import java.io.IOException;
+import java.util.HashSet;
+
+
+public class RussianHeuristicBuilder {
+    public static void main(String[] args) throws IOException {
+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");
+        RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
+
+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
+
+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);
+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);
+        RussianAdvSplitterFilter russianAdvSplitterFilter = new RussianAdvSplitterFilter(removeFlexiaWithPrefixes);
+        dictionaryReader.process(russianAdvSplitterFilter);
+        statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
+
+    }
+}

+ 1 - 0
dictionary-reader/src/main/resources/russian-adv-main-code.txt

@@ -0,0 +1 @@
+葯

+ 150 - 0
dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java

@@ -0,0 +1,150 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene;
+
+import org.apache.lucene.morphology.*;
+import org.apache.lucene.morphology.dictionary.*;
+import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.apache.lucene.morphology.english.EnglishMorphology;
+import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
+import org.apache.lucene.morphology.russian.RussianMorphology;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.hamcrest.Matchers.hasItem;
+import static org.junit.Assert.assertThat;
+
+
+public class TestAllWords {
+
+    String prefix = "";
+
+    @Before
+    public void setUp() {
+        System.out.println(System.getProperty("user.dir"));
+        prefix = System.getProperty("user.dir").endsWith("dictionary-reader") ? "../" : "";
+
+    }
+
+    @Test
+    public void shouldEnglishMorphologyIncludeAllWordsFormsWithMorphInfo() throws IOException {
+        final MorphologyImpl morphology = new EnglishMorphology();
+        LetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
+        String pathToGramma = prefix + "dictonary/Dicts/Morph/egramtab.tab";
+        String pathToDict = prefix + "dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd";
+
+        testFullGramma(morphology, decoderEncoder, pathToGramma, pathToDict);
+
+    }
+
+    @Test
+    public void shouldRussianMorphologyIncludeAllWordsFormsWithMorphInfo() throws IOException {
+        final MorphologyImpl morphology = new RussianMorphology();
+        LetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
+        String pathToGramma = prefix + "dictonary/Dicts/Morph/rgramtab.tab";
+        String pathToDict = prefix + "dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd";
+
+        testFullGramma(morphology, decoderEncoder, pathToGramma, pathToDict);
+    }
+
+    private void testFullGramma(final MorphologyImpl morphology, LetterDecoderEncoder decoderEncoder, String pathToGramma, String pathToDict) throws IOException {
+        GrammarReader grammarInfo = new GrammarReader(pathToGramma);
+        final List<String> morphInfo = grammarInfo.getGrammarInfo();
+        final Map<String, Integer> inversIndex = grammarInfo.getGrammarInverseIndex();
+
+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet<String>());
+
+        final AtomicLong wordCount = new AtomicLong(0);
+        Long startTime = System.currentTimeMillis();
+
+        WordProcessor wordProcessor = new WordProcessor() {
+            public void process(WordCard wordCard) throws IOException {
+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+                for (FlexiaModel fm : wordCard.getWordsForms()) {
+                    String wordForm = wordCard.getBase() + fm.getSuffix();
+                    String morph = morphInfo.get(inversIndex.get(fm.getCode()));
+                    assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));
+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+                    wordCount.set(2L + wordCount.get());
+                }
+            }
+        };
+
+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, wordProcessor);
+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);
+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);
+        dictionaryReader.process(removeFlexiaWithPrefixes);
+        long time = System.currentTimeMillis() - startTime;
+        System.out.println("Done " + wordCount.get() + " in " + time + " ms. " + wordCount.get() / (time / 1000.0) + " word per second");
+    }
+
+    @Test
+    public void shouldEnglishLuceneMorphologyIncludeAllWords() throws IOException {
+        final LuceneMorphology morphology = new EnglishLuceneMorphology();
+
+        LetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
+        String pathToDic = prefix + "dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd";
+
+        testAllWordForLucene(morphology, decoderEncoder, pathToDic);
+    }
+
+    @Test
+    public void shouldIncludeAllWordsRussianInLuceneMorophology() throws IOException {
+        final LuceneMorphology morphology = new RussianLuceneMorphology();
+
+        LetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
+
+        String pathToDic = prefix + "dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd";
+
+        testAllWordForLucene(morphology, decoderEncoder, pathToDic);
+
+    }
+
+    private void testAllWordForLucene(final LuceneMorphology morphology, LetterDecoderEncoder decoderEncoder, String pathToDic) throws IOException {
+        final AtomicLong wordCount = new AtomicLong(0);
+        Long startTime = System.currentTimeMillis();
+
+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<String>());
+        WordProcessor wordProcessor = new WordProcessor() {
+            public void process(WordCard wordCard) throws IOException {
+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+                for (FlexiaModel fm : wordCard.getWordsForms()) {
+                    String wordForm = wordCard.getBase() + fm.getSuffix();
+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+                    wordCount.set(1L + wordCount.get());
+                }
+            }
+        };
+
+        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, wordProcessor);
+        WordStringCleaner wordStringCleaner = new WordStringCleaner(decoderEncoder, wordCleaner);
+        RemoveFlexiaWithPrefixes removeFlexiaWithPrefixes = new RemoveFlexiaWithPrefixes(wordStringCleaner);
+        dictionaryReader.process(removeFlexiaWithPrefixes);
+
+        long time = System.currentTimeMillis() - startTime;
+        System.out.println("Done " + wordCount.get() + " in " + time + " ms. " + wordCount.get() / (time / 1000.0) + " word per second");
+    }
+
+
+}

+ 163 - 0
dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java

@@ -0,0 +1,163 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
+import org.apache.lucene.morphology.analyzer.MorphologyFilter;
+import org.apache.lucene.morphology.english.EnglishAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.apache.lucene.morphology.russian.RussianAnalyzer;
+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
+import org.junit.Test;
+
+import java.io.*;
+import java.util.*;
+
+import static org.hamcrest.Matchers.equalTo;
+
+
+public class AnalyzersTest extends BaseTokenStreamTestCase {
+
+    @Test
+    public void shouldGiveCorrectWordsForEnglish() throws IOException {
+        Analyzer morphlogyAnalyzer = new EnglishAnalyzer();
+        String answerPath = "/english/english-analyzer-answer.txt";
+        String testPath = "/english/english-analyzer-data.txt";
+
+        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
+    }
+
+    @Test
+    public void shouldGiveCorrectWordsForRussian() throws IOException {
+        Analyzer morphlogyAnalyzer = new RussianAnalyzer();
+        String answerPath = "/russian/russian-analyzer-answer.txt";
+        String testPath = "/russian/russian-analyzer-data.txt";
+
+        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
+    }
+
+    @Test
+    public void emptyStringTest() throws IOException {
+        LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();
+        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
+
+        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), "UTF-8");
+        TokenStream stream = russianAnalyzer.tokenStream(null, reader);
+        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
+
+        englishFilter.reset();
+        while (englishFilter.incrementToken()) {
+            System.out.println(englishFilter.toString());
+        }
+    }
+
+    @Test
+    public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {
+        Analyzer morphlogyAnalyzer = new RussianAnalyzer();
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), "UTF-8");
+
+        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
+        tokenStream.reset();
+        Set<String> foromsOfWine = new HashSet<String>();
+        foromsOfWine.add("вина");
+        foromsOfWine.add("винo");
+        boolean wordSeen = false;
+        while (tokenStream.incrementToken()) {
+            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
+            PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
+            if(foromsOfWine.contains(charTerm.toString()) && wordSeen){
+                assertThat(position.getPositionIncrement(),equalTo(0));
+            }
+            if(foromsOfWine.contains(charTerm.toString())){
+                wordSeen = true;
+            }
+        }
+    }
+
+    private void testAnalayzer(Analyzer morphlogyAnalyzer, String answerPath, String testPath) throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream(answerPath);
+        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
+        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
+        stream.close();
+
+        stream = this.getClass().getResourceAsStream(testPath);
+
+        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
+
+        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
+        tokenStream.reset();
+        HashSet<String> result = new HashSet<String>();
+        while (tokenStream.incrementToken()) {
+            CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
+            result.add(attribute1.toString());
+        }
+
+        stream.close();
+
+        assertThat(result, equalTo(answer));
+    }
+
+    @Test
+    public void testPositionIncrement() throws IOException {
+        EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();
+        assertTokenStreamContents(
+                englishAnalyzer.tokenStream("test", "There are tests!"),
+                new String[]{"there", "are", "be", "test"},
+                new int[]{0, 6, 6, 10},
+                new int[]{5, 9, 9, 15},
+                new String[]{"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"},
+                new int[]{1, 1, 0, 1}
+        );
+    }
+
+    @Test
+    public void testKeywordHandling() throws IOException {
+        Analyzer analyzer = new EnglishKeywordTestAnalyzer();
+        assertTokenStreamContents(
+                analyzer.tokenStream("test", "Tests shouldn't be stemmed, but tests should!"),
+                new String[]{"tests", "shouldn't", "be", "stem", "but", "test", "shall"}
+        );
+    }
+
+    private static class EnglishKeywordTestAnalyzer extends Analyzer {
+        @Override
+        protected TokenStreamComponents createComponents(String s) {
+            StandardTokenizer src = new StandardTokenizer();
+            CharArraySet dontStem = new CharArraySet(1, false);
+            dontStem.add("Tests");
+            TokenFilter filter = new SetKeywordMarkerFilter(src, dontStem);
+            filter = new LowerCaseFilter(filter);
+            try {
+                filter = new MorphologyFilter(filter, new EnglishLuceneMorphology());
+            } catch (IOException ex) {
+                throw new RuntimeException("cannot create EnglishLuceneMorphology", ex);
+            }
+            return new TokenStreamComponents(src, filter);
+        }
+    }
+}

+ 66 - 0
dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java

@@ -0,0 +1,66 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;
+
+
+public class LuceneMorphTest {
+
+    @Test
+    public void englishMorphologyShouldGetCorrectNormalForm() throws IOException {
+        LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
+        String pathToTestData = "/english/english-morphology-test.txt";
+        testMorphology(luceneMorph, pathToTestData);
+    }
+
+    @Test
+    public void russianMorphologyShouldGetCorrectNormalForm() throws IOException {
+        LuceneMorphology luceneMorph = new RussianLuceneMorphology();
+        List<String> v = luceneMorph.getMorphInfo("вина");
+        System.out.println(v);
+        String pathToTestData = "/russian/russian-morphology-test.txt";
+        testMorphology(luceneMorph, pathToTestData);
+    }
+
+    private void testMorphology(LuceneMorphology luceneMorph, String pathToTestData) throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream(pathToTestData);
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
+            String[] qa = s.trim().split(" ");
+            Set<String> result = new HashSet<String>();
+            result.addAll(Arrays.asList(qa).subList(1, qa.length));
+            Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));
+            assertThat(stringList, equalTo(result));
+            s = bufferedReader.readLine();
+        }
+    }
+}

+ 1 - 0
dictionary-reader/src/test/resources/english/english-analyzer-answer.txt

@@ -0,0 +1 @@
+following follow the instruction exactly will be help ensure the best well good result

+ 1 - 0
dictionary-reader/src/test/resources/english/english-analyzer-data.txt

@@ -0,0 +1 @@
+Following the instructions exactly will help ensure the best results

+ 8 - 0
dictionary-reader/src/test/resources/english/english-morphology-test.txt

@@ -0,0 +1,8 @@
+purchases purchase
+existing exist
+was be
+men man
+bore bore bear
+grown grow grown
+came come
+md md

+ 1 - 0
dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt

@@ -0,0 +1 @@
+в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель

+ 1 - 0
dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt

@@ -0,0 +1 @@
+В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель

+ 20 - 0
dictionary-reader/src/test/resources/russian/russian-morphology-test.txt

@@ -0,0 +1,20 @@
+еду еда ехать
+тестов тест
+вина вино вина
+вино вино
+ехать ехать
+ананасов ананас ананасовый
+сухой сухой
+дураков дурак
+пушка пушка пушок
+пушок пушок
+пушек пушка
+козлов козлов козловый козел
+жуков жуков жук
+красив красить красивый
+красивая красивый
+тосклив тоскливый
+лучший хороший
+на на
+тест тест тесто
+наибольшую наибольший

+ 153 - 0
dictonary/Dicts/GraphAn/abbr.eng

@@ -0,0 +1,153 @@
+//  …ach token in this file should be  separated by a space. Otherwise it 
+// would not be recognized as an abbreviation. For example:
+// Mr. 
+// would  be treated as one token, while Graphan module divides it into
+// two tokens. So in this file it should be written as follows:
+// Mr . 
+
+ANON . 
+APPROX . 
+ASSN . 
+ASSOC . 
+AVG . 
+CAPT . 
+CC . 
+CH . 
+CIRC . 
+CM . 
+CO . 
+COL . 
+COMDR . 
+CONT . 
+CONTD . 
+COR . 
+CORP . 
+CPL . 
+CU . 
+DD . 
+DEC . 
+DEF . 
+DEP . 
+DIST . 
+DIV . 
+DLR . 
+DPT . /:u
+DR . /:u
+ED . /:u
+EG . 
+ELEV . 
+EQ . 
+ESQ . 
+EST . 
+EXC . 
+EXCH . 
+EXEC . 
+EXP . 
+EXT . 
+FF . 
+FIG . 
+FL . 
+FLD . 
+FR . 
+FT . 
+GA . 
+GAL . 
+GEN . 
+GL . 
+GM . 
+GR . 
+HRS . 
+IB . 
+IBID . 
+ID . 
+ILL . 
+ILLUS . 
+INC . 
+INCL . 
+INCOG . 
+IV . 
+JR . 
+KM . 
+LAT . 
+LB . 
+LG . 
+LGE . 
+LT . 
+LTD . 
+MA . 
+MAJ . 
+MI . 
+MIL . 
+MO . 
+MR . /:u
+MRS . /:u
+MS . /:u
+NO . 
+PCT . 
+PG . 
+PP . 
+PROF . 
+PT . 
+QT . 
+RE . 
+REF . 
+REP . 
+REV . 
+RF . 
+SGT . 
+SP . 
+SQ . 
+ST . 
+SW . 
+TN . 
+U . S . /:u
+U . S . 
+WK . 
+YR . 
+( ? )
+( ! )
+Ala . 
+Ariz . 
+Ark . 
+Calif . 
+Colo . 
+Conn . 
+Del . 
+Fla . 
+Ga . 
+Iowa Kan . 
+Ky . 
+La . 
+Md . 
+Ma . 
+Mass . 
+Me . 
+Mich . 
+Minn . 
+Miss . 
+Mo . 
+Mont . 
+Neb . 
+Nev . 
+N . H . 
+N . J . 
+N . M . 
+N . Y . 
+N . C . 
+N . D . 
+Okla . 
+Ore . 
+Pa . 
+RI 
+R . I . 
+S . C . 
+SD S . D . 
+Tenn . 
+Tex . 
+Texas
+Va . 
+Vt . 
+Wash . 
+W . V . 
+Wis . 
+Wy . 

File diff suppressed because it is too large
+ 3555 - 0
dictonary/Dicts/GraphAn/abbr.ger


+ 34 - 0
dictonary/Dicts/GraphAn/abbr.rus

@@ -0,0 +1,34 @@
+//  Åach token in this file should be  separated by a space. Otherwise it 
+// would not be recognized as an abbreviation. For example:
+// Mr. 
+// would  be treated as one token, while Graphan module divides it into
+// two tokens. So in this file it should be written as follows:
+// Mr . 
+
+ÑÌ . /:u
+ÓË . 
+ÊÂ . 
+ÏÏ . 
+ÃÃ . 
+ÏÎÑ . 
+ÒÎÂ . 
+ÒÀÁË . 
+Ó× . 
+ÒÀÁ . 
+ÐÈÑ . 
+ÃË . 
+ÐÓÊ . 
+ÒÅË . 
+ÏÎÄ . 
+ÝÒ . 
+ÑÒÐ . 
+ÒÛÑ . 
+ÌËÍ . 
+ÌËÐÄ . 
+ÒÐËÍ . 
+ØÒ . 
+È Ò . Ï . 
+È Ò . Ä . 
+P . Õð . 
+Ò . Å . /:a
+Ã . /:a

File diff suppressed because it is too large
+ 1708 - 0
dictonary/Dicts/GraphAn/abbrev.log


File diff suppressed because it is too large
+ 2185 - 0
dictonary/Dicts/GraphAn/enames.lem


File diff suppressed because it is too large
+ 9877 - 0
dictonary/Dicts/GraphAn/enames.txt


+ 36 - 0
dictonary/Dicts/GraphAn/extensions.txt

@@ -0,0 +1,36 @@
+arc
+arj
+bat
+bin
+bmp
+cmd
+com
+dbf
+dll
+doc
+dvi
+exe
+gz
+Hqx
+htm
+html
+lhz
+mrw
+nsf
+pdb
+ps
+rcf
+rtf
+shar
+sit
+slf
+syn
+sys
+tar
+tex
+txt
+wav
+win
+z
+zip
+zoo

+ 28 - 0
dictonary/Dicts/GraphAn/idents.txt

@@ -0,0 +1,28 @@
+3.1
+3.11
+a:
+b:
+c:
+C++
+CLIENT/SERVER
+d:
+FAT16
+FAT32
+I/O
+input/output
+M/B
+moderator/demoderator
+OS/2
+peer-to-peer
+POP1
+POP2
+POP3
+PS/2
+TCP/IP
+à:
+ñ:
+ñè++
+ÑÈ++
+áèò/ñ
+Unix-to-Unix
+terminate-and-stay

+ 29 - 0
dictonary/Dicts/GraphAn/keyboard.txt

@@ -0,0 +1,29 @@
+[modifiers]
+shift
+ctrl
+alt
+control
+Shift
+Ctrl
+Alt
+Control
+SHIFT
+CTRL
+ALT
+CONTROL
+
+[keys]
+PgUp
+F1
+F2
+@
+Page Down
+ScrLock 
+Scroll Lock
+Caps Lock
+CapsLock
+Num Lock
+NumLock
+Del
+Delete
+

+ 651 - 0
dictonary/Dicts/GraphAn/keyword

@@ -0,0 +1,651 @@
+%Z10  Правила заполнения  зон
+      cм. в файле  header.des
+
+  Правило 1
+  Помни! Если один <Вариант ТО> (левая часть распознающего предложения), на-
+  чинается с тех же слов, что и другой, то НУЖНО ставить более длинный вариант
+  перед более коротким!
+
+  Правило 2 
+  В переменных заглавные и строчные буквы различаются.
+
+	    
+  Минимальный незанятый номер зоны  :  23
+  Дата изменения : 03.10.95
+  Зарезервированные символы  # $ +
+
+
+%Z1  грифы приложений в тексте    03.07.95
+// 
+
+#10 утвержд     #n       = $A1   // комментарии
+#10 проект      #n       = $A2
+#10 одобрен     #n       = $A1
+#10 типовой проект #n    = $A2
+#10 выписка     #n       = $A3
+#10 форма N #   #n       = $A4
+#10 перевод с # #n       = $A5
+#10 приложени N  # #n    = $A1
+#10 приложени # #n       = $A1
+#10 приложени   #n       = $A1
+#10 приложен к # #n      = $A1
+.+         #n      = $A1
+
+
+%Z16   выходные данные для типа приложений в header (ТИП_ПРИЛ) 03.07.95
+
+ $A1  =  Приложение
+ $A2  =  Проект
+ $A3  =  Выписка
+ $A4  =  Форма
+ $A5  =  Перевод
+
+%Z2  инстанции  в тексте  03.07.95
+
+верховн совет РСФСР                           = $I12   
+вице - президент РФ                 = $I4
+вице - президент Российск Федераци  = $I4
+государственн советник РСФСР # # # # #  = $I5
+государственн секретар РСФСР # # # # #  = $I6
+законодательн собран санкт - петерб           = $IB 
+заместител председат совета министров  # # #  = $I8
+засед централь изб комисс росс фед        = $ID
+конституц суд росс федер                  = $IJ 
+минис связи росс федер                        = $IF
+минис внутр дел росс федер   =  $IL
+председател совет министров                   = $I3
+пpедседател пpавите pосс федеpа               = $I7
+президент РФ                        = $I1
+президент Российск Федераци         = $I1
+президент РСФСР                     = $I2
+президент Российск советск # # #    = $I2
+российск  федерац                             = $I9  
+российск  советс федерат социал респуб         = $I13   
+совет министров - # # #             = $I3
+совет федера федер собр росс федер            = $I10
+cовет федерации                               = $I10
+государ дум федер собр росс федер              = $I11
+государ дум                                   = $I11
+государ налогов служб росс федер             = $IE
+губернатор санкт - петербурга                 = $IA
+пленум верх суд росс федер   =  $IG
+правительств москв     =  $IK
+правительств РСФСР                         = $I7
+правительств РФ                     = $I3
+правительств Российск Федераци      = $I3
+РСФСР                                         = $I13  
+союз советс социал респ                     = $IE
+центральный банк россии                       = $IC
+централь изб комисс росс фед                  = $ID
+централь изб ком по выбор в гос дум фед соб рос фед  = $ID
+централь изб ком по выбор в сов фед и по выб в гос дум фед соб рос фед  = $ID
+централь изб ком по        = $ID
+централь изб ком            = $ID
+
+// $IL
+
+%Z13  выходные инстанции в header (ИНСТ) 02.08.95
+
+$I1  = Президент РФ
+$I2  = Президент  РСФСР
+$I3  = Правительство РФ
+$I4  = Вице-президент РФ
+$I5  = Государственный советник РСФСР
+$I6  = Государственный секретарь РСФСР
+$I7  = Правительство РСФСР
+$I8  = Заместитель Председателя Совета Министров
+$I9  = нет
+$I10 = Совет Федерации
+$I11 = Государственная Дума
+$I12 = Верховный Совет  РСФСР
+$I13 = РСФСР
+$IA  = Губернатор Санкт-Петербурга
+$IB  = Законодательное Собрание Санкт-Петербурга
+$IC  = Центральный Банк России 
+$ID  = Центральная Избирательная Комиссия
+$IE  = Государственная налоговая служба РФ
+$IF  = Министерство связи РФ
+$IG  = Пленум Верховного Суда РФ
+$IH  = Cоюз Cоветских Социалистических Республик
+$IJ  = Конституционный Суд РФ
+$IK  = Правительство Москвы 
+$IL  = Министерство Внутренних Дел РФ
+ 
+%Z3  классы документов в тексте    03.07.95
+
+распоряжение             = $C1
+указ                     = $C2
+постановление            = $C3
+послание                 = $C4
+
+федеральный конст закон  = $C5
+федеральный закон        = $C5
+закон санкт - петербург  = $C7 
+закон                    = $C5
+# кодекс росс федер      = $C5
+#     кодекс             = $C5
+приказ                   = $C6
+соглашение               = $C8
+договор                  = $C9
+письмо                   = $CA    
+телеграмма               = $CB
+протокол                 = $CC
+
+Выписка из протокол N # засед =  $CD
+
+%Z14  выходные классы документов в header (ТИП_ДОК) 03.07.95
+
+$C1   =  Распоряжение
+$C2   =  Указ
+$C3   =  Постановление
+$C4   =  Послание
+$C5   =  Закон
+$C6   =  Приказ
+$C7   =  Закон Санкт-Петербурга
+$C8   =  Соглашение
+$C9   =  Договор
+$CA   =  Письмо
+$CB   =  Телеграмма
+$CC   =  Протокол
+$CD   =  Выписка из протокола
+
+%Z8  построение типа документа 06.07.95
+
+$I1 $C1  =   $T1
+$I2 $C1  =   $T1
+
+$I1 $C2  =   $T2
+$I2 $C2  =   $T2
+$I1 $C3  =   $T2
+$I2  $C3  =   $T2
+
+$I4  $C1  =   $T3
+$I3  $C1  =   $T4
+$I7  $C1  =   $T4
+$I3  $C3  =   $T5
+$I7  $C3  =   $T5
+$I5  $C1  =   $T6
+$I6  $C1  =   $T9
+$I8  $C1  =   $T7
+$IA $C1  = $TA
+$IA $C6  = $TB
+
+$IB $C3  = $TC
+$IB $C7  = $TC
+
+$I9  $C5  =   $T8
+$IH  $C5  =   $TO
+#    $C5  =   $T8
+
+$T10 $T10 =   $T10
+
+$I3  $C8  =   $TD
+$I1  $C9  =   $TE
+$I1  $C9  =   $TF
+$I11 $C3  =   $T11
+$IC  $CA  =   $TG
+$IC  $CB  =   $TF
+
+$ID  $C3  =   $TH
+$ID  $CC  =   $TN
+
+
+$IE  $C3  =   $TJ
+$IE  $CA  =   $TI
+
+$IF  $C6  =   $TK
+
+$I10  $C3  =   $TL
+
+$IG   $C3  =  $TM
+
+$IJ   $C3  =  $TР
+
+$IL   $C6  =  $TS
+
+$ID $C3 $IK =  $TR
+$IK $C3 $ID =  $TR
+
+$ID $CD = $TQ
+
+%Z18  выходной тип документа для имени файла и header 06.07.95
+
+$T1  =    {"Распоряжение Президента РФ",ar}
+$T2  =    {"Указ Президента РФ",au}
+$T3  =    {"Распоряжение Вице-президента РФ",av}
+$T4  =    {"Распоряжение Правительства РФ",as}
+$T5  =    {"Постановление Правительства РФ",ap}
+$T6  =    {"Распоряжение Государственного секретаря",ab}
+$T7  =    {"Распоряжение Заместителя Председателя Правительства РФ",az}
+$T8  =    {"Закон РФ",al}
+$T9  =    {"Распоряжение Государственного советника",aа}
+$T10 =    {"Двухсторонний документ",ai}
+$TA  =    {"Распоряжение Губернатора Санкт-Петербурга",an}
+$TB  =    {"Приказ Губернатора Санкт-Петербурга",ak}
+$TC  =    {"Законы Санкт-Петербурга",ad}
+$TD  =    {"Соглашение правительства РФ",ac}
+$TE  =    {"Договор Президента РФ",ae}
+$TF  =    {"Соглашение Президента РФ",af}
+$T11  =    {"Постановление Государственной Думы",ag}
+$TG   =    {"Письмо Центрального Банка России",ao}
+$TF   =    {"Телеграмма Цетрального Банка России",aj}
+$TH   =    {"Постановление Центральной Избирательной Комиссии РФ",at}
+$TJ   =    {"Постановление Государственной налоговой службы РФ",ax}
+$TI   =    {"Письмо Государственной налоговой службы РФ",ay}
+$TK   =    {"Приказ Министерства связи РФ",aw}
+$TL   =    {"Постановление Совета Федерации РФ",am}
+$TM   =    {"Постановление Пленума Верховного Суда РФ",aq}
+$TN   =    {"Протокол заседания Центральной Избирательной Комиссии РФ",ah}
+$TO   =    {"Закон СССР",ba}
+$TР   =    {"Постановление Конституционного Суда РФ",bb}
+$TR   =    {"Совместное постановление Правительства Москвы и Центризбиркома РФ",bс}
+$TS   =	   {"Приказ Министерства Внутренних Дел РФ",bd}	
+$TQ   =    {"Выписка",bе}
+
+%Z4  Подпись и в тексте и в header (АВТОР)  02.08.95
+
+белобородов  =  А.Белобородов  
+булгак       =  В.Булгак
+бурбулис     =  Г.Бурбулис
+веденеев     =  Ю.Веденеев
+вешняков     =  А.Вешняков
+гайдар       =  Е.Гайдар
+галушко      =  И.Галушко
+горбачев     =  М.Горбачев
+демидов      =  В.Демидов
+ельцин       =  Б.Ельцин
+заверюха     =  А.Заверюха
+загуляев     =  В.Загуляев   
+иванченко    =  A.Иванченко
+исаев        =  Б.Исаев
+лобов        =  О.Лобов
+лебедев      =  В.Лебедев
+куликов      =  А.Куликов
+лужков       =  Ю.Лужков
+махарадзе    =  В.Махарадзе
+павлов       =  В.ПАВЛОВ
+парамонова   =  Т.Парамонова 
+полторанин   =  М.Полторанин
+руцкой       =  А.Руцкой
+рябов        =  Н.Рябов
+рыбкин       =  И.Рыбкин
+салтыков     =  Б.Салтыков
+сосковец     =  О.Сосковец
+кравцов      =  Ю.Кравцов
+некрасов     =  В.Некрасов
+селезнев     =  Г.Селезнев
+станкевич    =  С.Станкевич
+федоров      =  Б.Федоров
+хижа         =  Г.Хижа
+хандруев     =  А.Хандруев
+хасбулат     =  Р.Хасбулатов
+черномырдин  =  В.Черномырдин
+чубайс       =  А.Чубайс
+шахрай       =  С.Шахрай
+шохин        =  А.Шохин
+шумейко      =  В.Шумейко
+яковлев      =  В.Яковлев
+яров         =  Ю.Яров
+
+
+
+
+
+
+
+
+%Z5  первые слова в заголовках    29.09.95
+
+ввод = ВВОД
+ведомость = ВЕДОМОСТЬ
+вкладыш = ВКЛАДЫШ
+выделение = ВЫДЕЛЕНИЕ
+границы = ГРАНИЦЫ
+график = ГРАФИК
+декларация = ДЕКЛАРАЦИЯ
+договоры = ДОГОВОРЫ
+договор = ДОГОВОР
+дополнения = ДОПОЛНЕНИЯ
+доходы = ДОХОДЫ
+задание = ЗАДАНИЕ
+изменения = ИЗМЕНЕНИЯ
+инструкция = ИНСТРУКЦИЯ
+количество = КОЛИЧЕСТВО
+комитет = КОМИТЕТ
+комплекс = КОМПЛЕКС
+конвенция = КОНВЕНЦИЯ
+концепция = КОНЦЕПЦИЯ
+координаты = КООРДИНАТЫ
+критерии = КРИТЕРИИ
+лицензия = ЛИЦЕНЗИЯ
+меморандум = МЕМОРАНДУМ
+мероприятия = МЕРОПРИЯТИЯ
+номенклатура = НОМЕНКЛАТУРА
+нормативы отчислений = НОРМАТИВЫ ОТЧИСЛЕНИЙ
+нормативы = НОРМАТИВЫ
+норма = НОРМА
+нормы = НОРМЫ
+нота = НОТА
+образцы документов = ОБРАЗЦЫ ДОКУМЕНТОВ
+обращение = ОБРАЩЕНИЕ
+объемы = ОБЪЕМЫ
+объем = ОБЪЕМ
+описание = ОПИСАНИЕ
+оргкомитет = ОРГКОМИТЕТ
+перечень = ПЕРЕЧЕНЬ
+перечни = ПЕРЕЧНИ
+персональный состав = ПЕРСОНАЛЬНЫЙ СОСТАВ
+план мероприятий   = ПЛАН МЕРОПРИЯТИЙ
+план = ПЛАН
+положение = ПОЛОЖЕНИЕ
+порядок = ПОРЯДОК
+послание = ПОСЛАНИЕ
+постановление = ПОСТАНОВЛЕНИЕ
+правила = ПРАВИЛА
+предложения = ПРЕДЛОЖЕНИЯ
+предписание = ПРЕДПИСАНИЕ
+приложение = ПРИЛОЖЕНИЕ
+прогноз = ПРОГНОЗ
+программа = ПРОГРАММА
+производство = ПРОИЗВОДСТВО
+протокол = ПРОТОКОЛ
+размеры = РАЗМЕРЫ
+разъяснения = РАЗЪЯСНЕНИЯ
+распределение = РАСПРЕДЕЛЕНИЕ
+расчет = РАСЧЕТ
+регламент = РЕГЛАМЕНТ
+режим = РЕЖИМ
+сведения = СВЕДЕНИЯ
+совместное соглашение = СОВМЕСТНОЕ СОГЛАШЕНИЕ
+соглашение = СОГЛАШЕНИЕ
+сообщение = СООБЩЕНИЕ
+состав = СОСТАВ
+списки = СПИСКИ
+список = СПИСОК
+справка = СПРАВКА
+сроки = СРОКИ
+ставки = СТАВКИ
+статус = СТАТУС
+структура = СТРУКТУРА
+схема = СХЕМА
+текст = ТЕКСТ
+указания = УКАЗАНИЯ
+условия = УСЛОВИЯ
+устав = УСТАВ
+функции = ФУНКЦИИ
+штрафы = ШТРАФЫ
+экспликация = ЭКСПЛИКАЦИЯ
+
+
+%Z6   не используется  03.07.95
+
+о = $FW
+об = $FW
+вопросы = $FW
+
+%Z7  Место подписания в тексте 03.07.95
+
+москва , кремль                  = $SP1
+г .  Москва , кремль             = $SP1
+г . Москва                       = $SP2
+Москва , Дом Советов России      = $SP3
+москва                           = $SP2
+$Dig2 часов $Dig2 минут москва , кремль  = $SP1
+Санкт - Петербург , Мариин двор  = $SP4
+Санкт - Петербург               = $SP5
+
+%Z15  Место подписания в header (МЕСТО)  03.07.95
+
+$SP1  = Москва,Кремль
+$SP2  = Москва
+$SP3  = Москва, Дом Советов России
+$SP4  = Санкт-Петербург, Мариинский  дворец
+$SP5  = Санкт-Петербург
+
+%Z9  Расширение выходного номера в тексте    03.07.95
+
+- рп  = -рп 
+- рв  = -рв 
+- рз  = -рз 
+- р   = -р 
+- ргс = -ргс 
+- фз  = -фз 
+- I   = -I  
+- 1   = -1
+- к   = -к  
+- гд  = -гд 
+- сф  = -сф 
+- фкз = -фкз 
+- ФКЗ = -фкз 
+  сф  = _сф  
+  гд  = _гд  
+
+%Z11 Год подписания документа в тексте  03.07.95
+
+ #y года . = $Y|0
+ #y года   = $Y|0
+ #y г .    = $Y|0
+ #y г      = $Y|0
+ #у        = $Y|0
+ . #y года = $Y|1
+ . #y г .  = $Y|1
+ . #y      = $Y|1
+
+
+%Z12  должности в тексте    30.09.95
+
+президент РФ                        = $P1
+президент Российск Федераци         = $P1
+президент РСФСР                     = $P2
+президент Российск советск # # #    = $P2
+вице - президент РФ                   = $P3
+вице - президент Российск Федераци    = $P3
+государственн советник РСФСР # # # # #  = $P4
+государственн секретар РСФСР # # # # #  = $P5
+первый заместитель Председателя Центрального банка # # = $PD
+перв заместитель председател # # #      = $P6
+за Секретаря,член Центральной избирательной комиссии # # = $PT
+зам . председат  правительств #        = $P6
+замест председ верховн совета РСФСР              = $P10
+замест председ совета министров  # # #  = $P8
+замест председ правительств #  = $P6
+замест председ централь изб ком росс фед  = $PL
+замест руков госуд налоговой служ росс фед - гос сов налог служб 1 ранга =  $PG
+замест руков госуд налоговой служ росс фед - гос сов налог служб II ранга =  $PG
+замест руков госналог РФ - гос сов налог служб II ранга =  $
+министр генерал - полковник = $PS
+председ верхов совет РСФСР                    = $P9
+председ верхов совет СССР                    = $PP
+председ верхов суда росс федер                = $PN
+председ государ дум федер собран росс федер =  $PJ
+председ законод собр санкт - петер = $PA
+пpедсед пpавите pосс федеpа               = $P7
+председ совет министров - прав росс федер  = $P7
+председ совет министров                    = $P7
+председ совет федер федер собран росс федер = $PK
+председ центральной избирательной комиссии # # = $PF
+премьер правите москвы              =  $PR
+губернатор санкт - петер              = $P11
+И . о . Председателя Центрального банка # # =  $PB
+И . о . Секрет Центральной избирательной комиссии # # =  $PK
+Секретарь Центральной избирательной комиссии # # =  $PE
+Секретарь Плен , судья Верхов Суд росс  федер  = $PO
+федерал министр связи росс фед     =   $PI
+
+//  Max Number  = $PS
+
+
+						      
+%Z17  должности в header (ДОЛЖ_АВТ) 30.09.95
+
+$P1   = Президент РФ
+$P2   = Президент РСФСР
+$P3   = Вице-президент РФ
+$P4   = Государственный советник
+$P5   = Государственный секретарь
+$P6   = Заместитель Председателя Правительства РФ
+$P7   = Председатель Совета Министров РФ
+$P8   = Заместитель Председателя Совета Министров РФ
+$P9   = Председатель Верховного Совета РСФСР
+$P10  = Заместитель Председателя Верховного Совета РСФСР
+$P11  = Губернатор Санкт-Петербурга
+$PA   = Председатель Законодательного Собрания Санкт-Петербурга
+$PB   = И. о. Председателя Центрального банка РФ
+$PD   = Первый заместитель Председателя Центрального банка РФ
+$PE   = Секретарь Центральной избирательной комиссии РФ
+$PF   = Председатель Центральной избирательной комиссии РФ
+$PG   = Заместитель Руководителя Госналогслужбы РФ
+$PI   = Федеральный министр связи РФ
+$PJ   = Председатель Государственной Думы Федерального Собрания РФ
+$PK   = Председатель Совета Федерации Федерального Собрания РФ
+$PL   = Заместитель Председателя Центральной избирательной комиссии РФ                                          
+$PM   = И. о. Секретаря Центральной избирательной комиссии РФ
+$PN   = Председатель Верховного Суда РФ
+$PO   = Секретарь Пленума, судья Верховного Суда РФ
+$PP   = Председатель Верховного Совета СССР
+$PR   = Премьер Правительства Москвы
+$PS   = Министр генерал-полковник
+$PT   = За Секретаря,член Центральной избирательной комиссии РФ
+
+%Z19  стандарты  оформления приложений (пока не анализир.)  17.07.95
+
+ЛЕВ_ОТСТУП ТИП_ПРИЛ [АБЗАЦ] [ЗГЛ] ТЕКСТ = $AO
+
+%Z20  стандарты  оформления документов в graphmat.cfg (DocOrder)  17.07.95
+
+// Автомат, DocOrder 0
+АВТОМАТ  = $DA                {ESC}
+
+// Дата вверху, DocOrder  1
+[НАДПИСКИ] ТИП_ДОК ДАТА N_ТЕКСТ [МЕСТО] [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO0
+
+// Дата внизу,  DocOrder  2
+[НАДПИСКИ] ТИП_ДОК [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* [МЕСТО] ДАТА N_ТЕКСТ = $DO1
+
+// Закон со статусом, DocOrder  3
+ТИП_ДОК ДАТА N_ТЕКСТ [МЕСТО] [ЗГЛ] [ПРИНЯТ] [ОДОБР] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO2
+
+// советско-американские документы, DocOrder  4
+N_ТЕКСТ [ЗГЛ] ДАТА ТЕКСТ = $DO3
+
+// свободный текст, DocOrder 5
+[ЗГЛ] ТЕКСТ   = $DO5
+
+// Закон "со статусом" + "дата внизу"  , DocOrder  6
+ТИП_ДОК [ЗГЛ]  [ПРИНЯТ]  [ОДОБР] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* [МЕСТО] ДАТА N_ТЕКСТ = $DO2
+
+// Распоряжения Губернатора Санкт-Петербурга 
+// "дата вверху" + "с псевдографикой", DocOrder 7
+КЛАСС_ДОК СТРОКА("””””") ИНСТ ДАТА N_ТЕКСТ [ЗГЛ_НЕЦЕНТ] ТЕКСТ [ДОЛЖ_АВТ] [АВТОР] [АБЗАЦ("текст доку")] = $DO0
+	
+// Дата cверху, ссылка в шапке DocOrder  8
+КЛАСС_ССЫЛКИ КЛАСС_ДОК N_ТЕКСТ ИНСТ ДАТА [ЗГЛ] ТЕКСТ (ДОЛЖ_АВТ АВТОР)* = $DO4
+
+
+%Z21
+
+$DO0 $AO   = $DT0
+$DO1 $AO   = $DT0
+$DO2 $AO   = $DT1
+// выписки
+$DO4 #     = $DT4
+
+// свободный текст
+$DO5 #     = $DT3
+#    #     = $DT3
+
+%Z22 Порядок и факультативность полей в выходном файле 
+// Header-file
+// Error-file
+// Pss-file
+
+$DT0 = {"Собрание законодательных актов РФ",
+	 "НАЧАЛО ИНСТ КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МЕСТО_ПОДП ПОДПИСЬ ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",
+	 "ИНСТ КЛАСС_ДОК [ЗГЛ] ТИП_ДОК ДАТА_ПОДП N_ТЕКСТ ПОДПИСЬ",
+	 "ДАТА_ПОДП ИНСТ КЛАСС_ДОК ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП НАДПИСКИ",
+  // приложение
+	 "НАЧАЛО ИНСТ КЛАСС_ДОК ТИП_ДОК ТИП_ПРИЛ ГЛАВ_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ  МАССИВ РАЗМЕР КОНЕЦ",
+	 "[ЗГЛ]",
+	 "ТИП_ПРИЛ ШАПКА_ПРИЛ ЗГЛ"}
+
+$DT1 = {"Собрание законодательных актов РФ",
+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК [СТАТУС] ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",
+	 "КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП [ЗГЛ] N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",
+	 "ИНСТ КЛАСС_ДОК [CТАТУС] ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",
+  // приложение
+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК ТИП_ПРИЛ ГЛАВ_ДОК [СТАТУС] ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП КОЛ_ПРИЛ МАССИВ РАЗМЕР КОНЕЦ",
+	 "[ЗГЛ]",
+	 "ТИП_ПРИЛ ШАПКА_ПРИЛ ЗГЛ"}
+
+$DT2 = {"Российско-американские документы",
+	 "НАЧАЛО КВА_ТИП ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",
+	 "ТИП_ДОК ДАТА_ПОДП ЗГЛ N_ТЕКСТ",
+	 "ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ",
+	// приложение
+	 "НАЧАЛО КВА_ТИП ТИП_ПРИЛ ТИП_ДОК ГЛАВ_ДОК ДАТА_ПОДП ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",
+	 "[ЗГЛ]",
+	 "ТИП_ПРИЛ ЗГЛ"}
+
+$DT3 = {"Свободный текст",
+	 "НАЧАЛО ЗГЛ N_ВХОД N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",
+	 "[ЗГЛ]",
+	 "ЗГЛ",
+	// приложение
+	 "НАЧАЛО ТИП_ПРИЛ ГЛАВ_ДОК ЗГЛ N_ВХОД N_ВЫХОД МАССИВ РАЗМЕР КОНЕЦ",
+	 "",
+	 ""}
+
+$DT4 = {"Выписки",
+	 "НАЧАЛО КЛАСС_ДОК ТИП_ДОК ДАТА_ПОДП ДАТА_ДУБЛЬ ЗГЛ N_ВХОД N_ТЕКСТ N_ВЫХОД ПОДПИСЬ [МЕСТО_ПОДП] ДОЛЖ_ПОДП МАССИВ РАЗМЕР КОНЕЦ",
+	 "КЛАСС_ДОК ТИП_ДОК ДАТА_ДУБЛЬ ДАТА_ПОДП [ЗГЛ] N_ТЕКСТ ПОДПИСЬ [МЕСТО_ПОДП] ДОЛЖ_ПОДП",
+	 "ИНСТ КЛАСС_ДОК ДАТА_ДУБЛЬ ДАТА_ПОДП ЗГЛ N_ТЕКСТ ПОДПИСЬ МЕСТО_ПОДП ДОЛЖ_ПОДП",
+  // приложение
+	 "",
+	 "",
+	 ""}
+
+
+%Z23
+
+ принят   =  Принят
+ одобрен  =  Одобрен
+
+%Z24   // разделители приложений
+
+ _____   = $Del1
+ #       = $Del2
+
+%Z0
+// Объявление  ограниченных отрезков. Для того, чтобы объявить ограниченный 
+// отрезок, необходимо указать из каких символов он должен состоять,  
+// и какой максимальной длины он может быть. В символы нельзя включать пробелы и признаки 
+// конца строки.
+ 1234567890         3  = $Dig3
+ 1234567890         4  = $Dig4
+ 1234567890-        7  = $Dig6Hyp
+ 1234567890/_-ПВнНI 20  = $DNumRich 
+
+
+%Z25   Числовая часть выходного номера документа.
+// В Zone 9 описано расширение выходного номера документа.
+// Здесь используются ограниченные отрезки (Zone 0).
+// Самые свободные - наверх, в противном случае номер будет распознаваться
+// не полностью.
+
+N $DNumRich = $DNum|1
+ј $DNumRich = $DNum|1
+N $Dig6Hyp  = $DNum|1
+N $Dig4     = $DNum|1
+за N $Dig4  = $DNum|2
+$Dig3       = $DNum|0
+
+
+%Z26  День даты подписания 
+ 
+ от #d      = $D|1
+ #d .       = $D|0
+ " #d "     = $D|1
+ от q #d "     = $D|2
+ от " #d "  = $D|2
+ #d         = $D|0
+ 

+ 402 - 0
dictonary/Dicts/GraphAn/ross.txt

@@ -0,0 +1,402 @@
+abraham
+achim
+adam
+adolf
+agnes
+albert
+albrecht
+alex
+alexander
+alexandra
+alfons
+alfred
+ali
+alice
+alma
+andi
+andrea
+andreas
+andrew
+angela
+angelika
+anita
+anja
+anke
+ann
+anna
+anne
+anneliese
+annette
+antje
+antoine
+anton
+antonio
+aprilia
+armin
+arndt
+arnold
+arthur
+astrid
+axel
+barbara
+bartholomäus
+bastian
+beate
+ben
+benjamin
+benno
+bernd
+bernhard
+bernie
+bertha
+berthold
+berti
+bertold
+bettina
+bill
+birgit
+björn
+bob
+bobby
+bodo
+bogdan
+bonifatius
+boris
+brigitte
+bruno
+burkhard
+butros
+bärbel
+carina
+carl
+carlos
+carmen
+carsten
+charles
+charlie
+charlotte
+chris
+christian
+christiane
+christine
+christoph
+christopher
+clara
+claudia
+claus
+clemens
+constantin
+constanze
+cornelia
+dagmar
+daisy
+daniel
+daniela
+david
+dennis
+denny
+detlef
+dieter
+dietmar
+dietrich
+dirk
+doris
+dragomir
+eberhard
+eberhardt
+ebert
+eckard
+eckhard
+edgar
+edith
+edmund
+eduard
+egon
+elisabeth
+elke
+elmar
+eloise
+else
+elvis
+emil
+emma
+ercan
+erhard
+eric
+erich
+erika
+erwin
+eugen
+eva
+felipiano
+felix
+ferdinand
+fernando
+florian
+frank
+franz
+fred
+friedrich
+fritz
+gabi
+gabriele
+gallus
+georg
+george
+gerd
+gerhard
+gerhardt
+gerhold
+gernhardt
+gert
+gertrud
+gisela
+giuseppe
+gottfried
+gregor
+gudrun
+guido
+gustav
+götz
+günter
+günther
+hanna
+hannelore
+hannes
+hans
+harald
+harry
+hartmut
+heidemarie
+heidi
+heike
+heiko
+heiner
+heinrich
+heinz
+helga
+helmut
+henriette
+henry
+herbert
+heribert
+hermann
+hilde
+hildegard
+holger
+horst
+hubert
+hugo
+ibrahim
+igor
+ilse
+inge
+ingeborg
+ingo
+ingrid
+iris
+irmgard
+isabel
+ivan
+izabella
+jack
+jacques
+jakob
+james
+jan
+jean
+jeanne
+jens
+jerry
+jim
+joachim
+jochen
+joe
+johann
+johanna
+johannes
+john
+jon
+joschka
+josef
+josefine
+joseph
+juan
+julia
+jutta
+jörg
+jörn
+jürgen
+kai
+karin
+karina
+karl
+karlheinz
+karolin
+karoline
+karsten
+katharina
+katja
+katrin
+kerstin
+kevin
+kilian
+kim
+kirsten
+klaus
+konrad
+kurt
+käthe
+lars
+lech
+lee
+leo
+leonhard
+linda
+lisa
+lorenz
+lothar
+louis
+ludwig
+lulu
+lutz
+manfred
+manuela
+marc
+marcel
+marco
+margaret
+margarete
+margaretha
+margarethe
+margot
+margret
+margritte
+maria
+marianne
+marie
+mario
+marion
+markus
+martin
+mary
+mathias
+matthias
+matthäus
+maurice
+max
+mechthild
+michael
+michail
+michel
+mike
+mirjam
+mohammed
+monica
+monika
+moritz
+naxos
+neidhard
+nelson
+nicolas
+nicole
+norbert
+olaf
+olga
+oliver
+oscar
+oskar
+otto
+patrick
+paul
+pauline
+peter
+petra
+philip
+philipp
+pierre
+rainer
+ralf
+ralph
+regina
+reinhard
+reinhardt
+reinhold
+renate
+richard
+rita
+robert
+robin
+roger
+roland
+rolf
+romeo
+ronald
+roy
+rudi
+rudolf
+ruth
+rüdiger
+sabine
+saddam
+salman
+sandra
+sascha
+sebastian
+siegfried
+sigrid
+silke
+simon
+simone
+sonja
+stefan
+steffen
+steffi
+stephan
+steve
+susanne
+sven
+sylvia
+theo
+theodor
+thomas
+thorsten
+tim
+titus
+tom
+toni
+tony
+torsten
+udo
+ulf
+uli
+ulrich
+ulrike
+ursula
+uta
+ute
+uwe
+vera
+verena
+vicki
+viktoria
+viola
+vladimir
+volker
+walter
+waltraud
+werner
+whitney
+wilfried
+wilhelm
+willi
+william
+willie
+willy
+winfried
+wladimir
+wladyslaw
+wolfgang
+wolfram
+wynalda
+yvonne

+ 15 - 0
dictonary/Dicts/GraphAn/space.dic

@@ -0,0 +1,15 @@
+закон
+ПЕРЕЧЕНЬ
+СХЕМА
+СОСТАВ
+ПОЛОЖЕНИЕ
+СПИСОК
+ВЕДОМОСТЬ
+ОБЪЕМ
+ПОСТАНОВЛЯЮ
+ПОСТАНОВЛЯЕТ
+УКАЗ
+постановляет
+ПОСТАНОВЛЕНИЕ
+РАСПОРЯЖЕНИЕ
+РЕШЕНИЕ

+ 0 - 0
dictonary/Dicts/Morph/Eng/morph.options


+ 0 - 0
dictonary/Dicts/Morph/Rus/morph.options


+ 123 - 0
dictonary/Dicts/Morph/egramtab.tab

@@ -0,0 +1,123 @@
+aa 1 ADJECTIVE 
+ab 1 ADJECTIVE comp        
+ac 1 ADJECTIVE sup 
+
+// many, more  most
+xi 1 NUMERAL
+cb 1 NUMERAL comp
+cc 1 NUMERAL sup
+
+         
+//  for adjectives like "English", "Russian"
+ad 1 ADJECTIVE prop
+ba 1 ADVERB
+bb 1 ADVERB comp
+bc 1 ADVERB sup          
+va 1 VERB inf            
+vb 1 VERB prsa,sg,3    
+vc 1 VERB pasa          
+vd 1 VERB pp             
+ve 1 VERB ing            
+vf 1 MOD inf          
+vh 1 MOD pasa         
+ta 1 VBE inf          
+tb 1 VBE prsa,sg,1    
+td 1 VBE prsa,sg,3     
+te 1 VBE prsa,pl      
+tf 1 VBE ing          
+tg 1 VBE pasa,sg      
+ti 1 VBE pasa,pl      
+tj 1 VBE pp           
+tk 1 VBE fut,1,sg
+tl 1 VBE fut,sg,pl,1,2,3
+tm 1 VBE if,sg,1,2
+tn 1 VBE if,sg,3      
+to 1 VBE if,pl       
+pa 1 PN pers,nom      
+pb 1 PN pers,obj
+pc 1 PN pers,nom,sg,1
+pd 1 PN pers,obj,sg,1
+pe 1 PN pers,nom,2      
+pf 1 PN pers,obj,2
+pg 1 PN pers,nom,sg,3      
+ph 1 PN pers,obj,sg,3
+pi 1 PN pers,nom,pl,1
+pk 1 PN pers,obj,pl,1
+pl 1 PN pers,nom,pl,3      
+pm 1 PN pers,obj,pl,3
+da 1 PN ref,sg
+db 1 PN ref,pl       
+ea 1 PN_ADJ poss     
+eb 1 PN_ADJ poss,pred
+ec 1 PN_ADJ dem,sg
+ed 1 PN_ADJ dem,pl
+ee 1 PN_ADJ 
+ef 1 PRON 
+
+// "table", "town"
+na 1 NOUN narr,sg        
+nb 1 NOUN narr,pl
+
+//  analytical possessive
+fa 1 NOUN narr,poss
+
+//  nouns which can be mass  and uncount
+// "silk", "clay"
+nc 1 NOUN narr,mass,uncount,sg
+//  analytical possessive
+fb 1 NOUN narr,mass,uncount,poss
+
+
+//  mass nouns 
+// "water", "butter"
+ne 1 NOUN narr,mass,sg
+ng 1 NOUN narr,mass,pl
+//  analytical possessive
+fc 1 NOUN narr,mass,poss
+ 
+
+//  uncount nouns 
+// "acceleration", "activism"
+ni 1 NOUN narr,uncount,sg
+
+
+// "John", "James"
+oa 1 NOUN prop,m,sg   
+ob 1 NOUN prop,m,pl      
+
+//  analytical possessive
+fd 1 NOUN prop,m,poss
+
+// "Mary", "Jane"
+oc 1 NOUN prop,f,sg      
+od 1 NOUN prop,f,pl      
+//  analytical possessive
+fe 1 NOUN prop,f,poss
+
+// "Glen" "Lee" "Jerry"
+oe 1 NOUN prop,m,f,sg    
+of 1 NOUN prop,m,f,pl
+//  analytical possessive
+ff 1 NOUN prop,m,f,poss
+
+// general geographical names
+ga 1 NOUN prop
+//  analytical possessive
+fg 1 NOUN prop,poss
+
+xa 1 CONJ               
+xb 1 INT              
+xc 1 PREP             
+xd 1 PART             
+xf 1 ARTICLE
+xi 1 NUMERAL
+xp 1 ORDNUM              
+yc 1 POSS plsq
+yd 1 POSS plsgs
+ //‘¯¥æ¨ «ì­®¥ áãé¥á⢨⥫쭮¥ § £«ã誠, ­®¬¥à ª®¤  ¨á¯®«ì§ã¥âáï!
+xx 1 NOUN prop sg pl
+
+// type ancodes 
+za 1 * geo        
+zb 1 * name
+zc 1 * org

+ 878 - 0
dictonary/Dicts/Morph/rgramtab.tab

@@ -0,0 +1,878 @@
+//  ======  ���������������   ========
+
+// סףשוסעגטעוכ�ם�ו לףזסךמדמ נמהא
+אא A � לנ,וה,טל
+אב A � לנ,וה,נה
+�פ A � לנ,וה,נה,2
+אג A � לנ,וה,הע
+אד A � לנ,וה,גם
+אה A � לנ,וה,עג
+או A � לנ,וה,ןנ
+�ץ A � לנ,וה,ןנ,2
+אס A � לנ,וה,חג,
+אז A � לנ,לם,טל
+אח A � לנ,לם,נה
+אט A � לנ,לם,הע
+אי A � לנ,לם,גם
+אך A � לנ,לם,עג
+אכ A � לנ,לם,ןנ
+אל B � לנ,0
+אם B � לנ,וה,0
+
+// =============  נאחדמגמנם�י  ================
+�מ A � לנ,וה,טל,נאחד 
+�ן A � לנ,וה,נה,נאחד 
+�נ A � לנ,וה,הע,נאחד 
+�ס A � לנ,וה,גם,נאחד 
+�ע A � לנ,וה,עג,נאחד 
+�פ A � לנ,וה,ןנ,נאחד 
+�ץ A � לנ,וה,חג,נאחד 
+‗ב A � לנ,לם,טל,נאחד 
+‗א A � לנ,לם,נה,נאחד
+‗ג A � לנ,לם,הע,נאחד
+‗ד A � לנ,לם,גם,נאחד
+‗ה A � לנ,לם,עג,נאחד
+‗ז A � לנ,לם,ןנ,נאחד
+
+
+// =============  אנץאטחל  ================
+דמ A � לנ,וה,טל,אנץ 
+דן A � לנ,וה,נה,אנץ 
+דנ A � לנ,וה,הע,אנץ 
+דס A � לנ,וה,גם,אנץ 
+דע A � לנ,וה,עג,אנץ 
+דף A � לנ,וה,ןנ,אנץ 
+דפ A � לנ,לם,טל,אנץ 
+דץ A � לנ,לם,נה,אנץ
+דצ A � לנ,לם,הע,אנץ
+דק A � לנ,לם,גם,אנץ
+דר A � לנ,לם,עג,אנץ
+דש A � לנ,לם,ןנ,אנץ
+
+
+// סףשוסעגטעוכ�ם�ו לףזסךמדמ-זוםסךמדמ נמהא
+
+גא E � לנ-זנ,וה,טל
+גב E � לנ-זנ,וה,נה
+גג E � לנ-זנ,וה,הע
+גד E � לנ-זנ,וה,גם
+גה E � לנ-זנ,וה,עג
+גו E � לנ-זנ,וה,ןנ
+גז E � לנ-זנ,לם,טל
+גח E � לנ-זנ,לם,נה
+גט E � לנ-זנ,לם,הע
+גי E � לנ-זנ,לם,גם
+גך E � לנ-זנ,לם,עג
+גכ E � לנ-זנ,לם,ןנ
+גל F � לנ-זנ,0
+גם F � לנ-זנ,וה,0
+
+
+// סףשוסעגטעוכ�ם�ו לףזסךמדמ-זוםסךמדמ נמהא (אנץאטחל)
+גמ E � אנץ,לנ-זנ,וה,טל
+גן E � אנץ,לנ-זנ,וה,נה
+גנ E � אנץ,לנ-זנ,וה,הע
+גס E � אנץ,לנ-זנ,וה,גם
+גע E � אנץ,לנ-זנ,וה,עג
+גף E � אנץ,לנ-זנ,וה,ןנ
+גפ E � אנץ,לנ-זנ,לם,טל
+גץ E � אנץ,לנ-זנ,לם,נה
+גצ E � אנץ,לנ-זנ,לם,הע
+גק E � אנץ,לנ-זנ,לם,גם
+גר E � אנץ,לנ-זנ,לם,עג
+גש E � אנץ,לנ-זנ,לם,ןנ
+
+// סףשוסעגטעוכ�ם�ו זוםסךמדמ נמהא
+דא G � זנ,וה,טל
+דב G � זנ,וה,נה
+דג G � זנ,וה,הע
+דד G � זנ,וה,גם
+דה G � זנ,וה,עג
+דו G � זנ,וה,ןנ
+�ק G � זנ,וה,ןנ,2
+�ר G � זנ,וה,חג
+דז G � זנ,לם,טל
+דח G � זנ,לם,נה
+דט G � זנ,לם,הע
+די G � זנ,לם,גם
+דך G � זנ,לם,עג
+דכ G � זנ,לם,ןנ
+דל H � זנ,0
+דם H � זנ,וה,0
+
+// סףשוסעגטעוכ�ם�ו זוםסךמדמ (אנץאטחל)
+�א G � אנץ,זנ,וה,טל
+�ב G � אנץ,זנ,וה,נה
+�ג G � אנץ,זנ,וה,הע
+�ד G � אנץ,זנ,וה,גם
+�ה G � אנץ,זנ,וה,עג
+�ו G � אנץ,זנ,וה,ןנ
+�ז G � אנץ,זנ,לם,טל
+�ח G � אנץ,זנ,לם,נה
+�ט G � אנץ,זנ,לם,הע
+�י G � אנץ,זנ,לם,גם
+�ך G � אנץ,זנ,לם,עג
+�כ G � אנץ,זנ,לם,ןנ
+
+
+// סףשוסעגטעוכ�ם�ו זוםסךמדמ (נאחדמגמנם�י)
+�ל G � נאחד,זנ,וה,טל
+�ם G � נאחד,זנ,וה,נה
+�מ G � נאחד,זנ,וה,הע
+�ן G � נאחד,זנ,וה,גם
+�נ G � נאחד,זנ,וה,עג
+�ס G � נאחד,זנ,וה,ןנ
+�ע G � נאחד,זנ,לם,טל
+�ף G � נאחד,זנ,לם,נה
+�פ G � נאחד,זנ,לם,הע
+�ץ G � נאחד,זנ,לם,גם
+�צ G � נאחד,זנ,לם,עג
+�ק G � נאחד,זנ,לם,ןנ
+
+
+
+// סףשוסעגטעוכ�ם�ו סנוהםודמ נמהא
+
+וא K � סנ,וה,טל
+וב K � סנ,וה,נה
+וג K � סנ,וה,הע
+וד K � סנ,וה,גם
+וה K � סנ,וה,עג
+וו K � סנ,וה,ןנ
+וז K � סנ,לם,טל
+וח K � סנ,לם,נה
+וט K � סנ,לם,הע
+וי K � סנ,לם,גם
+וך K � סנ,לם,עג
+וכ K � סנ,לם,ןנ
+ול L � סנ,0
+ום L � סנ,וה,0
+
+// טל. �וםטםא
+�� K � סנ,וה,נה,אבבנ
+
+// סףשוסעגטעוכ�ם�ו סנוהםודמ נמהא (נאחדמגמנם�י)
+‗ח K � נאחד,סנ,וה,טל
+‗ט K � נאחד,סנ,וה,נה
+‗ך K � נאחד,סנ,וה,הע
+‗כ K � נאחד,סנ,וה,גם
+‗ל K � נאחד,סנ,וה,עג
+‗ם K � נאחד,סנ,וה,ןנ
+‗מ K � נאחד,סנ,לם,טל
+‗ן K � נאחד,סנ,לם,נה
+‗נ K � נאחד,סנ,לם,הע
+‗ס K � נאחד,סנ,לם,גם
+‗ע K � נאחד,סנ,לם,עג
+‗ף K � נאחד,סנ,לם,ןנ
+
+// pluralia tantum
+טז Q � לם,לם,טל
+טח Q � לם,לם,נה
+טט Q � לם,לם,הע
+טי Q � לם,לם,גם
+טך Q � לם,לם,עג
+טכ Q � לם,לם,ןנ
+טל R � לם,0
+
+//  אבבנוגטאעףנ�
+אמ B � לנ,אבבנ,0,
+אן B � לנ,וה,אבבנ,0
+אע H � זנ,אבבנ,0
+אף H � זנ,וה,אבבנ,0
+אצ H � סנ,אבבנ,0
+אק H � סנ,וה,אבבנ,0
+את R � לם,אבבנ,0
+
+// טלוםא
+
+במ C � לנ,טל�,וה,טל
+בן C � לנ,טל�,וה,נה
+בנ C � לנ,טל�,וה,הע
+בס C � לנ,טל�,וה,גם
+בע C � לנ,טל�,וה,עג
+בף C � לנ,טל�,וה,ןנ
+ב� C � לנ,טל�,וה,חג,נאחד
+בפ C � לנ,טל�,לם,טל
+בץ C � לנ,טל�,לם,נה
+בצ C � לנ,טל�,לם,הע
+בק C � לנ,טל�,לם,גם
+בר C � לנ,טל�,לם,עג
+בש C � לנ,טל�,לם,ןנ
+
+ב� I � לנ,טל�,0
+
+
+ג� E � לנ-זנ,טל�,0
+ג� E � לנ-זנ,טל�,וה,טל
+ג� E � לנ-זנ,טל�,וה,נה
+ג� E � לנ-זנ,טל�,וה,הע
+ג� E � לנ-זנ,טל�,וה,גם
+ג� E � לנ-זנ,טל�,וה,עג
+ג� E � לנ-זנ,טל�,וה,ןנ
+ג� E � לנ-זנ,טל�,וה,חג,נאחד
+ג� E � לנ-זנ,טל�,לם,טל
+ג� E � לנ-זנ,טל�,לם,נה
+ג� E � לנ-זנ,טל�,לם,הע
+ג� E � לנ-זנ,טל�,לם,גם
+ג� E � לנ-זנ,טל�,לם,עג
+ג� E � לנ-זנ,טל�,לם,ןנ
+
+
+המ I � זנ,טל�,וה,טל
+הן I � זנ,טל�,וה,נה
+הנ I � זנ,טל�,וה,הע
+הס I � זנ,טל�,וה,גם
+הע I � זנ,טל�,וה,עג
+הף I � זנ,טל�,וה,ןנ
+ה� I � זנ,טל�,וה,חג,נאחד
+הפ I � זנ,טל�,לם,טל
+הץ I � זנ,טל�,לם,נה
+הצ I � זנ,טל�,לם,הע
+הק I � זנ,טל�,לם,גם
+הר I � זנ,טל�,לם,עג
+הש I � זנ,טל�,לם,ןנ
+
+
+ה� I � זנ,טל�,0
+
+
+
+// לףזסךטו מעקוסעגא 
+
+�א Q � לנ,מעק,וה,טל,
+�ב Q � לנ,מעק,וה,נה,
+�ג Q � לנ,מעק,וה,הע,
+�ד Q � לנ,מעק,וה,גם,
+�ה Q � לנ,מעק,וה,עג,
+�ו Q � לנ,מעק,וה,ןנ,
+�ם Q � לנ,מעק,לם,טל,
+�מ Q � לנ,מעק,לם,נה,
+�ן Q � לנ,מעק,לם,הע,
+�נ Q � לנ,מעק,לם,גם,
+�ס Q � לנ,מעק,לם,עג,
+�ע Q � לנ,מעק,לם,ןנ,
+
+// זוםסךטו מעקוסעגא 
+
+�ז Q � זנ,מעק,וה,טל,
+�ח Q � זנ,מעק,וה,נה,
+�ט Q � זנ,מעק,וה,הע,
+�ך Q � זנ,מעק,וה,גם,
+�כ Q � זנ,מעק,וה,עג,
+�ל Q � זנ,מעק,וה,ןנ,
+�ף Q � זנ,מעק,לם,טל,
+�פ Q � זנ,מעק,לם,נה,
+�ץ Q � זנ,מעק,לם,הע,
+�צ Q � זנ,מעק,לם,גם,
+�ק Q � זנ,מעק,לם,עג,
+�ר Q � זנ,מעק,לם,ןנ,
+
+
+
+// לףזסךטו מעקוסעגא  (נאחד.)
+
+�א Q � לנ,מעק,נאחד,וה,טל,
+�ב Q � לנ,מעק,נאחד,וה,נה,
+�ג Q � לנ,מעק,נאחד,וה,הע,
+�ד Q � לנ,מעק,נאחד,וה,גם,
+�ה Q � לנ,מעק,נאחד,וה,עג,
+�ו Q � לנ,מעק,נאחד,וה,ןנ,
+�ם Q � לנ,מעק,נאחד,לם,טל,
+�מ Q � לנ,מעק,נאחד,לם,נה,
+�ן Q � לנ,מעק,נאחד,לם,הע,
+�נ Q � לנ,מעק,נאחד,לם,גם,
+�ס Q � לנ,מעק,נאחד,לם,עג,
+�ע Q � לנ,מעק,נאחד,לם,ןנ,
+
+// זוםסךטו מעקוסעגא  (נאחד.)
+
+�ז Q � זנ,מעק,נאחד,וה,טל,
+�ח Q � זנ,מעק,נאחד,וה,נה,
+�ט Q � זנ,מעק,נאחד,וה,הע,
+�ך Q � זנ,מעק,נאחד,וה,גם,
+�כ Q � זנ,מעק,נאחד,וה,עג,
+�ל Q � זנ,מעק,נאחד,וה,ןנ,
+�ף Q � זנ,מעק,נאחד,לם,טל,
+�פ Q � זנ,מעק,נאחד,לם,נה,
+�ץ Q � זנ,מעק,נאחד,לם,הע,
+�צ Q � זנ,מעק,נאחד,לם,גם,
+�ק Q � זנ,מעק,נאחד,לם,עג,
+�ר Q � זנ,מעק,נאחד,לם,ןנ,
+
+
+
+//  ======  ��������������   ========
+
+יא Y � לנ,וה,טל,מה,םמ
+יב Y � לנ,וה,נה,מה,םמ
+יג Y � לנ,וה,הע,מה,םמ
+יד Y � לנ,וה,גם,מה
+�ש Y � לנ,וה,גם,םמ
+יה Y � לנ,וה,עג,מה,םמ
+יו Y � לנ,וה,ןנ,מה,םמ
+יז Y � זנ,וה,טל,מה,םמ
+יח Y � זנ,וה,נה,מה,םמ
+יט Y � זנ,וה,הע,מה,םמ
+יי Y � זנ,וה,גם,מה,םמ
+יך Y � זנ,וה,עג,מה,םמ
+יכ Y � זנ,וה,ןנ,מה,םמ
+יל Y � סנ,וה,טל,מה,םמ
+ים Y � סנ,וה,נה,מה,םמ
+ימ Y � סנ,וה,הע,מה,םמ
+ין Y � סנ,וה,גם,מה,םמ
+ינ Y � סנ,וה,עג,מה,םמ
+יס Y � סנ,וה,ןנ,מה,םמ
+יע Y � לם,טל,מה,םמ
+יף Y � לם,נה,מה,םמ
+יפ Y � לם,הע,מה,םמ
+יץ Y � לם,גם,מה
+�� Y � לם,גם,םמ
+יצ Y � לם,עג,מה,םמ
+יק Y � לם,ןנ,מה,םמ
+יר Y ��_���� לנ,וה,מה,םמ
+יש Y ��_���� זנ,וה,מה,םמ
+י� Y ��_���� סנ,וה,מה,םמ
+י‎ Y ��_���� לם,מה,םמ
+י‏ Y � סנאגם,מה,םמ
+ית Y � סנאגם,2,מה,םמ
+י� Y � סנאגם,מה,םמ,נאחד
+י� Z � 0,מה,םמ
+
+//== ןנוגמסץמהםא� סעוןום� ןנטכאדאעוכ�םמדמ
+טא Y � ןנוג,לנ,וה,טל,מה,םמ
+טב Y � ןנוג,לנ,וה,נה,מה,םמ
+טג Y � ןנוג,לנ,וה,הע,מה,םמ
+טד Y � ןנוג,לנ,וה,גם,מה
+טה Y � ןנוג,לנ,וה,גם,םמ
+טו Y � ןנוג,לנ,וה,עג,מה,םמ
+�ב Y � ןנוג,לנ,וה,ןנ,מה,םמ
+�ג Y � ןנוג,זנ,וה,טל,מה,םמ
+�ד Y � ןנוג,זנ,וה,נה,מה,םמ
+�ה Y � ןנוג,זנ,וה,הע,מה,םמ
+�ו Y � ןנוג,זנ,וה,גם,מה,םמ
+�ז Y � ןנוג,זנ,וה,עג,מה,םמ
+�ח Y � ןנוג,זנ,וה,ןנ,מה,םמ
+טם Y � ןנוג,סנ,וה,טל,מה,םמ
+טמ Y � ןנוג,סנ,וה,נה,מה,םמ
+טן Y � ןנוג,סנ,וה,הע,מה,םמ
+טנ Y � ןנוג,סנ,וה,גם,מה,םמ
+טס Y � ןנוג,סנ,וה,עג,מה,םמ
+טע Y � ןנוג,סנ,וה,ןנ,מה,םמ
+טף Y � ןנוג,לם,טל,מה,םמ
+טפ Y � ןנוג,לם,נה,מה,םמ
+טץ Y � ןנוג,לם,הע,מה,םמ
+טצ Y � ןנוג,לם,גם,מה
+טק Y � ןנוג,לם,גם,םמ
+טר Y � ןנוג,לם,עג,מה,םמ
+טש Y � ןנוג,לם,ןנ,מה,םמ
+
+
+
+
+//  ========   בוחכטקם�ו דכאדמכ�  ============
+// ןמהףלאע�ס�,סגועאע�
+םנ a ��������� בוחכ
+// ןמהףלאועס�
+םס a � בוחכ,בףה 
+// ןמהףלאכמס�,סגועאכמ
+םע a � בוחכ,ןנר
+// סגועאוע
+םף a � בוחכ,םסע
+
+
+
+ךא a ��������� הסע
+
+//  ========================================
+//  ========   כטקם�ו פמנל� דכאדמכא  ============
+
+ךב a � הסע,םסע,1כ,וה
+ךג a � הסע,םסע,1כ,לם
+ךד a � הסע,םסע,2כ,וה
+ךה a � הסע,םסע,2כ,לם
+ךו a � הסע,םסע,3כ,וה
+ךז a � הסע,םסע,3כ,לם
+ךח a � הסע,ןנר,לנ,וה
+ךט a � הסע,ןנר,זנ,וה
+ךי a � הסע,ןנר,סנ,וה
+ךך a � הסע,ןנר,לם
+ךן a � הסע,בףה,1כ,וה
+ךנ a � הסע,בףה,1כ,לם
+ךס a � הסע,בףה,2כ,וה
+ךע a � הסע,בףה,2כ,לם
+ךף a � הסע,בףה,3כ,וה
+ךפ a � הסע,בףה,3כ,לם
+
+
+// נאחדמגמנם�ו פמנל�: "כמזף", "כמזטל","ה�רףע", "כמזףע", "כמזאע", "המזהףס�"
+�ת a � הסע,םסע,1כ,וה,נאחד
+�� a � הסע,םסע,1כ,לם,נאחד
+�‎ a � הסע,םסע,2כ,וה,נאחד
+�‏ a � הסע,םסע,2כ,לם,נאחד
+�� a � הסע,םסע,3כ,וה,נאחד
+ך‏ a � הסע,םסע,3כ,לם,נאחד
+ך� a � הסע,ןנר,לם,נאחד
+
+ך‎ a � הסע,בףה,1כ,וה,נאחד
+�א a � הסע,בףה,1כ,לם,נאחד
+�ב a � הסע,בףה,2כ,וה,נאחד
+�ג a � הסע,בףה,2כ,לם,נאחד
+�ד a � הסע,בףה,3כ,וה,נאחד
+�ה a � הסע,בףה,3כ,לם,נאחד
+
+
+
+// אנץאטקם�ו פמנל�: "ףךאחףוע", 
+�ו a � הסע,םסע,1כ,וה,אנץ
+�ז a � הסע,םסע,1כ,לם,אנץ
+�ח a � הסע,םסע,2כ,וה,אנץ
+�ט a � הסע,םסע,2כ,לם,אנץ
+�י a � הסע,םסע,3כ,וה,אנץ
+�ך a � הסע,םסע,3כ,לם,אנץ
+�כ a � הסע,ןנר,לם,אנץ
+
+�ל a � הסע,בףה,1כ,וה,אנץ
+�ם a � הסע,בףה,1כ,לם,אנץ
+�מ a � הסע,בףה,2כ,וה,אנץ
+�ן a � הסע,בףה,2כ,לם,אנץ
+�נ a � הסע,בףה,3כ,וה,אנץ
+�ס a � הסע,בףה,3כ,לם,אנץ
+
+
+// ===================================================
+// ==============   ������������ ====================
+// ===================================================
+ךם a ������������ הסע,םסע
+ךמ a ������������ הסע,ןנר
+
+// ===================================================
+// ==============   ������������ (אנץאטחל)  ==========
+// ===================================================
+�ע a ������������ הסע,םסע,אנץ
+�ף a ������������ הסע,ןנר,אנץ
+
+//===================================================
+// ==============   ���������    ====================
+// "םו בףהול זו חאב�גאע� םארטץ ןנוהךמג!"
+
+םן a � הסע,ןגכ,1כ,לם
+ךת a � הסע,ןגכ,1כ,וה
+ךכ a � הסע,ןגכ,2כ,וה
+ךל a � הסע,ןגכ,2כ,לם
+
+
+//  "ןנמשוגאיעו"
+כ� a � הסע,ןגכ,2כ,וה,נאחד
+ך� a � הסע,ןגכ,2כ,לם,נאחד
+
+// סל.
+�‏ a � הסע,ןגכ,2כ,וה,אבבנ 
+
+//  "טח�הט", "טח�הטעו"
+פת a � הסע,ןגכ,2כ,וה,אנץ
+פ‏ a � הסע,ןגכ,2כ,לם,אנץ
+
+
+//  הויסעגטעוכ�םמו ןנטקאסעטו םאסעמ�שודמ גנולוםט
+כא a ��������� מה,םמ,םסע,הסע,וה,לנ,טל
+כב a ��������� מה,םמ,םסע,הסע,וה,לנ,נה
+כג a ��������� מה,םמ,םסע,הסע,וה,לנ,הע
+כד a ��������� מה,םסע,הסע,וה,לנ,גם
+�א a ��������� םמ,םסע,הסע,וה,לנ,גם
+כה a ��������� מה,םמ,םסע,הסע,וה,לנ,עג
+כו a ��������� מה,םמ,םסע,הסע,וה,לנ,ןנ
+כח a ��������� מה,םמ,םסע,הסע,וה,זנ,טל
+כט a ��������� מה,םמ,םסע,הסע,וה,זנ,נה
+כי a ��������� מה,םמ,םסע,הסע,וה,זנ,הע
+כך a ��������� מה,םמ,םסע,הסע,וה,זנ,גם
+ככ a ��������� מה,םמ,םסע,הסע,וה,זנ,עג
+כל a ��������� מה,םמ,םסע,הסע,וה,זנ,ןנ
+כמ a ��������� מה,םמ,םסע,הסע,וה,סנ,טל
+כן a ��������� מה,םמ,םסע,הסע,וה,סנ,נה
+כנ a ��������� מה,םמ,םסע,הסע,וה,סנ,הע
+כס a ��������� מה,םמ,םסע,הסע,וה,סנ,גם
+כע a ��������� מה,םמ,םסע,הסע,וה,סנ,עג
+כף a ��������� מה,םמ,םסע,הסע,וה,סנ,ןנ
+כץ a ��������� מה,םמ,םסע,הסע,לם,טל
+כצ a ��������� מה,םמ,םסע,הסע,לם,נה
+כק a ��������� מה,םמ,םסע,הסע,לם,הע
+כר a ��������� מה,םסע,הסע,לם,גם
+�י a ��������� םמ,םסע,הסע,לם,גם
+כש a ��������� מה,םמ,םסע,הסע,לם,עג
+כ� a ��������� מה,םמ,םסע,הסע,לם,ןנ
+
+//  הויסעגטעוכ�םמו ןנטקאסעטו ןנמרוהרודמ גנולוםט
+לא a ��������� מה,םמ,ןנר,הסע,וה,לנ,טל
+לב a ��������� מה,םמ,ןנר,הסע,וה,לנ,נה
+לג a ��������� מה,םמ,ןנר,הסע,וה,לנ,הע
+לד a ��������� מה,ןנר,הסע,וה,לנ,גם
+�ב a ��������� םמ,ןנר,הסע,וה,לנ,גם
+לה a ��������� מה,םמ,ןנר,הסע,וה,לנ,עג
+לו a ��������� מה,םמ,ןנר,הסע,וה,לנ,ןנ
+לח a ��������� מה,םמ,ןנר,הסע,וה,זנ,טל
+לט a ��������� מה,םמ,ןנר,הסע,וה,זנ,נה
+לי a ��������� מה,םמ,ןנר,הסע,וה,זנ,הע
+לך a ��������� מה,םמ,ןנר,הסע,וה,זנ,גם
+לכ a ��������� מה,םמ,ןנר,הסע,וה,זנ,עג
+לל a ��������� מה,םמ,ןנר,הסע,וה,זנ,ןנ
+למ a ��������� מה,םמ,ןנר,הסע,וה,סנ,טל
+לן a ��������� מה,םמ,ןנר,הסע,וה,סנ,נה
+לנ a ��������� מה,םמ,ןנר,הסע,וה,סנ,הע
+לס a ��������� מה,םמ,ןנר,הסע,וה,סנ,גם
+לע a ��������� מה,םמ,ןנר,הסע,וה,סנ,עג
+לף a ��������� מה,םמ,ןנר,הסע,וה,סנ,ןנ
+לץ a ��������� מה,םמ,ןנר,הסע,לם,טל
+לצ a ��������� מה,םמ,ןנר,הסע,לם,נה
+לק a ��������� מה,םמ,ןנר,הסע,לם,הע
+לר a ��������� מה,ןנר,הסע,לם,גם
+�ך a ��������� םמ,ןנר,הסע,לם,גם
+לש a ��������� מה,םמ,ןנר,הסע,לם,עג
+ל� a ��������� מה,םמ,ןנר,הסע,לם,ןנ
+
+//  סענאהאעוכ�םמו ןנטקאסעטו םאסעמ�שודמ גנולוםט
+ןא b ��������� מה,םמ,םסע,סענ,וה,לנ,טל
+ןב b ��������� מה,םמ,םסע,סענ,וה,לנ,נה
+ןג b ��������� מה,םמ,םסע,סענ,וה,לנ,הע
+ןד b ��������� מה,םסע,סענ,וה,לנ,גם
+�ד b ��������� םמ,םסע,סענ,וה,לנ,גם
+ןה b ��������� מה,םמ,םסע,סענ,וה,לנ,עג
+ןו b ��������� מה,םמ,םסע,סענ,וה,לנ,ןנ
+ןז b ��_��������� מה,םמ,םסע,סענ,וה,לנ
+ןח b ��������� מה,םמ,םסע,סענ,וה,זנ,טל
+ןט b ��������� מה,םמ,םסע,סענ,וה,זנ,נה
+ןי b ��������� מה,םמ,םסע,סענ,וה,זנ,הע
+ןך b ��������� מה,םמ,םסע,סענ,וה,זנ,גם
+ןכ b ��������� מה,םמ,םסע,סענ,וה,זנ,עג
+ןל b ��������� מה,םמ,םסע,סענ,וה,זנ,ןנ
+ןם b ��_��������� מה,םמ,םסע,סענ,וה,זנ
+ןמ b ��������� מה,םמ,םסע,סענ,וה,סנ,טל
+ןן b ��������� מה,םמ,םסע,סענ,וה,סנ,נה
+ןנ b ��������� מה,םמ,םסע,סענ,וה,סנ,הע
+ןס b ��������� מה,םמ,םסע,סענ,וה,סנ,גם
+ןע b ��������� מה,םמ,םסע,סענ,וה,סנ,עג
+ןף b ��������� מה,םמ,םסע,סענ,וה,סנ,ןנ
+ןפ b ��_��������� מה,םמ,םסע,סענ,וה,סנ
+ןץ b ��������� מה,םמ,םסע,סענ,לם,טל
+ןצ b ��������� מה,םמ,םסע,סענ,לם,נה
+ןק b ��������� מה,םמ,םסע,סענ,לם,הע
+ןר b ��������� מה,םסע,סענ,לם,גם
+�ל b ��������� םמ,םסע,סענ,לם,גם
+ןש b ��������� מה,םמ,םסע,סענ,לם,עג
+ן� b ��������� מה,םמ,םסע,סענ,לם,ןנ
+ן‎ b ��_��������� מה,םמ,םסע,סענ,לם
+
+//  סענאהאעוכ�םמו ןנטקאסעטו ןנמרוהרודמ גנולוםט
+סא b ��������� מה,םמ,ןנר,סענ,וה,לנ,טל
+סב b ��������� מה,םמ,ןנר,סענ,וה,לנ,נה
+סג b ��������� מה,םמ,ןנר,סענ,וה,לנ,הע
+סד b ��������� מה,ןנר,סענ,וה,לנ,גם
+�ו b ��������� םמ,ןנר,סענ,וה,לנ,גם
+סה b ��������� מה,םמ,ןנר,סענ,וה,לנ,עג
+סו b ��������� מה,םמ,ןנר,סענ,וה,לנ,ןנ
+סז b ��_��������� מה,םמ,ןנר,סענ,וה,לנ
+סח b ��������� מה,םמ,ןנר,סענ,וה,זנ,טל
+סט b ��������� מה,םמ,ןנר,סענ,וה,זנ,נה
+סי b ��������� מה,םמ,ןנר,סענ,וה,זנ,הע
+סך b ��������� מה,םמ,ןנר,סענ,וה,זנ,גם
+סכ b ��������� מה,םמ,ןנר,סענ,וה,זנ,עג
+סל b ��������� מה,םמ,ןנר,סענ,וה,זנ,ןנ
+סם b ��_��������� מה,םמ,ןנר,סענ,וה,זנ
+סמ b ��������� מה,םמ,ןנר,סענ,וה,סנ,טל
+סן b ��������� מה,םמ,ןנר,סענ,וה,סנ,נה
+סנ b ��������� מה,םמ,ןנר,סענ,וה,סנ,הע
+סס b ��������� מה,םמ,ןנר,סענ,וה,סנ,גם
+סע b ��������� מה,םמ,ןנר,סענ,וה,סנ,עג
+סף b ��������� מה,םמ,ןנר,סענ,וה,סנ,ןנ
+ספ b ��_��������� מה,םמ,ןנר,סענ,וה,סנ
+סץ b ��������� מה,םמ,ןנר,סענ,לם,טל
+סצ b ��������� מה,םמ,ןנר,סענ,לם,נה
+סק b ��������� מה,םמ,ןנר,סענ,לם,הע
+סר b ��������� מה,ןנר,סענ,לם,גם
+�מ b ��������� םמ,ןנר,סענ,לם,גם
+סש b ��������� מה,םמ,ןנר,סענ,לם,עג
+ס� b ��������� מה,םמ,ןנר,סענ,לם,ןנ
+ס‎ b ��_��������� מה,םמ,ןנר,סענ,לם
+
+
+קא e �� 1כ,וה,טל
+קב e �� 1כ,וה,נה
+קג e �� 1כ,וה,הע
+קד e �� 1כ,וה,גם
+קה e �� 1כ,וה,עג
+קו e �� 1כ,וה,ןנ
+קז e �� 1כ,לם,טל
+קח e �� 1כ,לם,נה
+קט e �� 1כ,לם,הע
+קי e �� 1כ,לם,גם
+קך e �� 1כ,לם,עג
+קכ e �� 1כ,לם,ןנ
+קל e �� 2כ,וה,טל
+קם e �� 2כ,וה,נה
+קמ e �� 2כ,וה,הע
+קן e �� 2כ,וה,גם
+קנ e �� 2כ,וה,עג
+קס e �� 2כ,וה,ןנ
+קע e �� 2כ,לם,טל
+קף e �� 2כ,לם,נה
+קפ e �� 2כ,לם,הע
+קץ e �� 2כ,לם,גם
+קצ e �� 2כ,לם,עג
+קק e �� 2כ,לם,ןנ
+רא e �� 3כ,לנ,וה,טל
+רב e �� 3כ,לנ,וה,נה
+רג e �� 3כ,לנ,וה,הע
+רד e �� 3כ,לנ,וה,גם
+רה e �� 3כ,לנ,וה,עג
+רו e �� 3כ,לנ,וה,ןנ
+רז e �� 3כ,זנ,וה,טל
+רח e �� 3כ,זנ,וה,נה
+רט e �� 3כ,זנ,וה,הע
+רי e �� 3כ,זנ,וה,גם
+רך e �� 3כ,זנ,וה,עג
+רכ e �� 3כ,זנ,וה,ןנ
+רל e �� 3כ,סנ,וה,טל
+רם e �� 3כ,סנ,וה,נה
+רמ e �� 3כ,סנ,וה,הע
+רן e �� 3כ,סנ,וה,גם
+רנ e �� 3כ,סנ,וה,עג
+רס e �� 3כ,סנ,וה,ןנ
+רע e �� 3כ,לם,טל
+רף e �� 3כ,לם,נה
+רפ e �� 3כ,לם,הע
+רץ e �� 3כ,לם,גם
+רצ e �� 3כ,לם,עג
+רק e �� 3כ,לם,ןנ
+שא e �� לנ,וה,טל
+שב e �� לנ,וה,נה
+שג e �� לנ,וה,הע
+שד e �� לנ,וה,גם
+שה e �� לנ,וה,עג
+שו e �� לנ,וה,ןנ
+שז e �� זנ,וה,טל
+שח e �� זנ,וה,נה
+שט e �� זנ,וה,הע
+שי e �� זנ,וה,גם
+שך e �� זנ,וה,עג
+שכ e �� זנ,וה,ןנ
+של e �� סנ,וה,טל
+שם e �� סנ,וה,נה
+שמ e �� סנ,וה,הע
+שן e �� סנ,וה,גם
+שנ e �� סנ,וה,עג
+שס e �� סנ,וה,ןנ
+שע e �� לם,טל
+שף e �� לם,נה
+שפ e �� לם,הע
+שץ e �� לם,גם
+שצ e �� לם,עג
+שק e �� לם,ןנ
+שש e �� נה
+ש� e �� הע
+ש‎ e �� גם
+ש‏ e �� עג
+ש� e �� ןנ
+�א f ��-� לנ,וה,טל,מה,םמ
+�ב f ��-� לנ,וה,נה,מה,םמ
+�ג f ��-� לנ,וה,הע,מה,םמ
+�ד f ��-� לנ,וה,גם,םמ
+�פ f ��-� לנ,וה,גם,מה
+�ה f ��-� לנ,וה,עג,מה,םמ
+�ו f ��-� לנ,וה,ןנ,מה,םמ
+�ז f ��-� זנ,וה,טל,מה,םמ
+�ח f ��-� זנ,וה,נה,מה,םמ
+�ט f ��-� זנ,וה,הע,מה,םמ
+�י f ��-� זנ,וה,גם,מה,םמ
+�ך f ��-� זנ,וה,עג,מה,םמ
+�כ f ��-� זנ,וה,ןנ,מה,םמ
+�ל f ��-� סנ,וה,טל,מה,םמ
+�ם f ��-� סנ,וה,נה,מה,םמ
+�מ f ��-� סנ,וה,הע,מה,םמ
+�ן f ��-� סנ,וה,גם,מה,םמ
+�נ f ��-� סנ,וה,עג,מה,םמ
+�ס f ��-� סנ,וה,ןנ,מה,םמ
+�ע f ��-� לם,טל,מה,םמ
+�ף f ��-� לם,נה,מה,םמ
+�פ f ��-� לם,הע,מה,םמ
+�ץ f ��-� לם,גם,םמ
+�ץ f ��-� לם,גם,מה
+�צ f ��-� לם,עג,מה,םמ
+�ק f ��-� לם,ןנ,מה,םמ
+�ר f ��-� 0,מה,םמ
+�ש g ��-����� וה,נה
+�� g ��-����� וה,הע
+�‎ g ��-����� וה,גם
+�‏ g ��-����� וה,עג
+
+// ‎עמ אםאכ. פמנלא "םו מ  ךמל"
+�� g ��-����� וה,ןנ
+
+�� g ��-�����
+‎א h ���� טל
+‎ב h ���� נה
+‎ג h ���� הע
+‎ד h ���� גם
+‎ה h ���� עג
+‎ו h ���� ןנ
+
+�א h ���� טל,אנץ
+�ב h ���� נה,אנץ
+�ג h ���� הע,אנץ
+�ד h ���� גם,אנץ
+�ה h ���� עג,אנץ
+�ו h ���� ןנ,אנץ
+
+
+‎ז h ���� לנ,טל
+‎ח h ���� לנ,נה
+‎ט h ���� לנ,הע
+‎י h ���� לנ,גם
+‎ך h ���� לנ,עג
+‎כ h ���� לנ,ןנ
+‎ל h ���� זנ,טל
+‎ם h ���� זנ,נה
+‎מ h ���� זנ,הע
+‎ן h ���� זנ,גם
+‎נ h ���� זנ,עג
+‎ס h ���� זנ,ןנ
+‎ע h ���� סנ,טל
+‎ף h ���� סנ,נה
+‎פ h ���� סנ,הע
+‎ץ h ���� סנ,גם
+‎צ h ���� סנ,עג
+‎ק h ���� סנ,ןנ
+‎ר h ���� סנאגם
+‏א i ����-� לנ,וה,טל,מה,םמ
+‏ב i ����-� לנ,וה,נה,מה,םמ
+‏ג i ����-� לנ,וה,הע,מה,םמ
+‏ד i ����-� לנ,וה,גם,םמ
+�ע i ����-� לנ,וה,גם,מה
+‏ה i ����-� לנ,וה,עג,מה,םמ
+‏ו i ����-� לנ,וה,ןנ,מה,םמ
+‏ז i ����-� זנ,וה,טל,מה,םמ
+‏ח i ����-� זנ,וה,נה,מה,םמ
+‏ט i ����-� זנ,וה,הע,מה,םמ
+‏י i ����-� זנ,וה,גם,מה,םמ
+‏ך i ����-� זנ,וה,עג,מה,םמ
+‏כ i ����-� זנ,וה,ןנ,מה,םמ
+‏ל i ����-� סנ,וה,טל,מה,םמ
+‏ם i ����-� סנ,וה,נה,מה,םמ
+‏מ i ����-� סנ,וה,הע,מה,םמ
+‏ן i ����-� סנ,וה,גם,מה,םמ
+‏נ i ����-� סנ,וה,עג,מה,םמ
+‏ס i ����-� סנ,וה,ןנ,מה,םמ
+‏ע i ����-� לם,טל,מה,םמ
+‏ף i ����-� לם,נה,מה,םמ
+‏פ i ����-� לם,הע,מה,םמ
+‏ץ i ����-� לם,גם,םמ
+�ף i ����-� לם,גם,מה
+‏צ i ����-� לם,עג,מה,םמ
+‏ק i ����-� לם,ןנ,מה,םמ
+‏ש i ����-� נה,מה,םמ
+
+//  םאנוקט�
+�א j �
+�ם j � גמןנ
+�מ j � ףךאחאע
+�ן j � נאחד
+
+// "לםו טםעונוסםמ","לםו ב�כמ סענארםמ"
+�ב k ����� םסע
+�ך k ����� ןנר
+�כ k ����� 
+// לםו כףקרו
+�נ k ����� סנאגם,םסע
+
+//  "גמע", "ץנףסע�" (םוע אםאכ. פמנל)
+�ל k ����� 0
+
+�ג l �����
+�ד m ����
+�ה n ����
+�ו o ����
+�¸ o ���� נאחד
+�ז p ����
+�ח q �����
+�י s ����
+�ן b � סענ,בףה,1כ,וה
+�נ b � סענ,בףה,1כ,לם
+�ס b � סענ,בףה,2כ,וה
+�ע b � סענ,בףה,2כ,לם
+�ף b � סענ,בףה,3כ,וה
+�פ b � סענ,בףה,3כ,לם
+
+
+
+
+// מבשטו דנאללול� (סכמגממבנאחמגאעוכ�ם�ו)
+�א a * כמך
+// �ב a * 
+// �ג a * 
+// �ד a * 
+// �ה a * 
+�ו a * ךאק
+�ז a * הפסע
+�ח a * הפסע,מנד
+�ט a * הפסע,כמך
+//�ך a * 
+�כ a * סג,ןו
+�ל a * סג,םן
+�ם a * םס,ןו
+�מ a * םס,םן
+�ן a * סג,םס,ןו
+�נ a * סג,םס,םן
+
+
+// הכ� בוחכטקם�ץ דכאדמכמג
+�ס a * םס
+�ע a * סג
+
+�פ a * זאנד
+�ץ a * מןק
+�ק a * זאנד,מןק
+�צ a * מנד,זאנד
+�ר a * כמך,זאנד
+
+�ש a * םמ,כמך
+�� a * םמ,מנד
+�� a * מה,פאל
+�ת a * םמ,הפסע,כמך
+�‎ a * םמ,הפסע,מנד
+�‏ a * םמ,זאנד
+�� a * םמ,מןק,
+�א a * םמ,
+�ב a * מה,
+�ג a * מנד,זאנד,םמ
+�ד a * הפסע,םמ
+�ה a * הפסע,מה
+�ז a * מה,זאנד
+�ח a * טל�,ןנטע�ז
+�ט a * ןנטע�ז
+�ך a * סג,ןו,נאחד
+�כ a * סג,םן,נאחד
+�ם a * םס,ןו,נאחד
+�מ a * םס,םן,נאחד
+�ן a * םמ,נאחד
+�נ a * מה,נאחד
+�ס a * סג,ןו,זאנד
+�ע a * סג,םן,זאנד
+�ף a * םס,ןו,זאנד
+�פ a * םס,םן,זאנד
+�ץ a * נאחד
+�צ a * אנץ
+�ק a * סג,ןו,אנץ
+�ר a * סג,םן,אנץ
+�ש a * םס,ןו,אנץ
+�� a * םס,םן,אנץ
+�� a * םמ,אנץ
+�ת a * מה,אנץ
+�‎ a * םס,אנץ
+�‏ a * סג,אנץ
+�� a * ךאק,אנץ
+�¸ a * םמ,מה
+�א a * מה,מןק,
+�ב a * כמך,מןק,
+
+
+
+�‏ F � לנ,זנ,סנ,וה,טל,נה,הע,גם,עג,ןנ
+�� F � לנ,זנ,סנ,,וה,לם,טל,נה,הע,גם,עג,ןנ

+ 3 - 0
dictonary/Dicts/SrcMorph/Eng.mwz

@@ -0,0 +1,3 @@
+MRD_FILE 	EngSrc/morphs.mrd
+LANG	        ENGLISH
+USERS           gri,alex,boris,masha,af,oleg,nim

File diff suppressed because it is too large
+ 105124 - 0
dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd


+ 3 - 0
dictonary/Dicts/SrcMorph/Rus.mwz

@@ -0,0 +1,3 @@
+MRD_FILE 	RusSrc/morphs.mrd
+LANG	        RUSSIAN
+USERS           alex,vse-imena,accentor,user2008

File diff suppressed because it is too large
+ 179826 - 0
dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd


+ 258 - 0
dictonary/Docs/Morph_UNIX.txt

@@ -0,0 +1,258 @@
+This is a program of moprhological analysis (Russian, German, and English languages).
+
+This program is distributed under the Library GNU Public Licence, which is in the file
+COPYING.  
+
+This program was  written by Andrey Putrin, Alexey Sokirko.  
+The project started in Moscow in Dialing 
+Company (Russian and English language). The German part was created  
+at Berlin-Brandenburg Academy of Sciences and Humanities in  Berlin (the project DWDS). 
+
+The Russian  lexicon is based upon Zaliznyak's Dictionary .
+The German lexicon is based upon Morphy system (http://www-psycho.uni-paderborn.de/lezius/).
+The English  lexicon is based upon Wordnet.
+
+The project uses a regular expression library "PCRE" (Perl Compatible Regular Expressions).
+We test compilation only with version 6.4. Other versions were not tested. 
+One should download this version  from the official site and install it 
+to the default place. If you do not want to install it or you do not have enough
+rights to do it, then you should  create two environment variables: 
+	1.  RML_PCRE_LIB, that  points to PCRE library directory, where
+libpcre.a and libpcrecpp.a should be located, for example:
+	export RML_PCRE_LIB=~/RML/contrib/pcre-6.4/.libs
+    2  RML_PCRE_INCLUDE, that points to PCRE include catalog, 
+where "pcrecpp.h" is located, for example
+    export RML_PCRE_INCLUDE=~/RML/contrib/pcre-6.4
+
+
+The system has been developed under Windows 2000 (MS VS 6.0), but
+has also been compiled and run under Linux(GCC).  It should work with
+minor changes on other systems.
+
+Website of DDC: www.aot.ru, https://sf.net/projects/morph-lexicon/
+
+I compiled all sources with gcc 3.2. Lower versions are not supported.
+
+
+Contents of the this source archive
+
+1.	The main morphological  library (Source/LemmatizerLib).
+2.	Library for grammatical codes (Source/AgrgamtabLib).
+3.	Test morphological program  (Source/TestLem)..
+4.	Library for working with text version of the dictionaries (Source/MorphWizardLib).
+5.	Generator of morphological prediction base  (Source/GenPredIdx).
+6.	Generator of binary  format of the dictionaries (Source/MorphGen).
+
+
+=================================================
+====== 					 Installation       =====
+=================================================
+
+
+Unpacking
+
+* Create  a catalog and  register a system variable RML, which  points 
+to this catalog:
+	mkdir /home/sokirko/RML
+	export  RML=/home/sokirko/RML
+
+* Put "lemmatizer.tar.gz", "???-src-morph.tar.gz"
+to this catalog, "???" can be "rus", "ger" or "eng"
+according to what you have downloaded. Unpack it 
+ 	tar xfz lemmatizer.tar.gz
+	tar xfz ???-src-morph.tar.gz
+
+
+
+Compiling morphology
+
+  0. Do not forget to set  RML_PCRE (see above)
+
+
+  1.  cd $RML
+	
+
+  2.   ./compile_morph.sh  
+      This step should create all libraries and a test program $RML\Bin\TestLem.
+
+
+Building Morphological Dictionary
+
+  1.  cd $RML
+
+  2.   ./generate_morph_bin.sh <lang>
+     where <lang> can be Russian, German according to the dictionary
+    yo have  downloaded.
+
+  The script should terminate with message "Everything is OK". 
+  You can test the morphology 
+	$RML\Bin\TestLem <lang>
+
+
+
+
+
+If something goes wrong, write me to sokirko@yandex.ru.
+
+
+
+
+======================================================
+==========      MRD-file                  ============
+======================================================
+
+	This section describes the format of a mrd-file. Mrd-file is a text 
+file which contains one morphological dictionary for one natural language. 
+MRD is an abbreviation of "morphological dictionary".
+	The usual place for this file is 
+
+	$RML/Dicts/SrcMorph/xxxSrc/morphs.mrd,
+
+where  xxx can be "Eng", "Rus" or  "Ger" depending on the language. 
+    The encoding of the file depends also upon the language:
+	* Russian - Windows 1251
+	* German  - Windows 1252
+	* English - ASCII
+
+
+   Gramtab-files
+
+
+	A mrd-file refers to a gramtab-file, which is 
+language-dependent and which contains all possible full morphological 
+patterns for the words. One line in a gramtab-file looks like as follows:
+	<ancode> <unused_number> <part_of_speech> <grammems>
+	An ancode is an ID, which consists of two letters and which uniquely  
+identifies a morphological pattern. A morphological pattern consists of 
+<part_of_speech> and <grammems>. For example, here is a line from the English
+gramtab:
+
+	te 1 VBE prsa,pl      
+
+	Here "te" is an ancode,  "VBE" is a part of speech, "prsa,pl" are grammems,
+"1" is the obsolete  unused number.
+    In mrd-files we use ancodes to refer to a  morphological pattern.
+
+	Here is the list of all gramtab-files:
+	* Russian - $Rml/Dicts/Morph/rgramtab.tab
+	* German  - $Rml/Dicts/Morph/ggramtab.tab
+	* English - $Rml/Dicts/Morph/egramtab.tab
+
+
+
+   Common information 
+
+
+	All words in a mrd-file are written in uppercase.
+	One mrd-file consists of the following sections:
+		1. Section of flexion and prefix models;
+		2. Section of accentual models; 
+		3. Section of user sessions;
+	    4. Section of prefix sets;
+		5. Section of lemmas.
+	Each section is a set of records, one per line. The number of all records 
+of the section  is written in the very beginning of the section at 
+a separate line. For example, here is a possible variant 
+of the section of user sessions:
+
+1
+alex;17:10, 13 October 2003;17:12, 13 October 2003
+
+"1" means that this section contains only one record, which is written  
+on the next line, thus this section contains only two lines.
+
+
+
+	Section of possible flexion and prefix models
+
+
+	Each record of this section is a list of items. Each item 
+describes how one word form in a paradigm should be built. The whole list
+describes the whole paradigm (a set of word forms with morphological patterns). 
+The format  of one item is the following:
+		%<flexion>*<ancode>
+	or  %<flexion>*<ancode>*<prefix>
+		where  
+			<flexion> is a  flexion (a string, which should be added to right of the word base)
+			<prefix> is a  prefix (a string, which should be added to left of the word base)
+			<ancode> is an ancode.
+	Let us consider an example of an English flexion and prefix model:
+		%F*na%VES*nb
+	Here we have two items:
+		1. <flexion> = F;   <ancode> = na
+		2. <flexion> = VES;   <ancode> = nb
+		In order to decipher ancodes we should go the English gramtab-file. 
+There we can find the following lines:
+			na NOUN narr,sg        
+			nb NOUN narr,pl
+		If base "lea" would be ascribed to this model,  then its paradigm 
+would be the following:
+		leaf 	NOUN narr,sg
+		leaves	NOUN narr,pl
+	It is important, that each word of a morphological dictionary 
+should contain a reference  to a line in this section.
+
+
+	Section of possible accentual models
+
+
+	Each record of this section is a comma-delimited list of numbers, where 
+each number is an index of a stressed  vowel of a word form(counting 
+from the end). The whole list contains a position for each word 
+form in the paradigm.	
+	If an item of an accentual model of word is equal to 255, then it 
+is undefined, and it means that this word  form is unstressed.  
+	Each word in the dictionary should have a reference  to 
+an accentual model, even though this model can consist only of empty items.
+	For one word, the number and the order of items in the  accentual model 
+should be equal to the number and the order of items  in the flexion and 
+prefix model. For example we can ascribe to word "leaf" with the paradigm  
+		leaf 	NOUN narr,sg
+		leaves	NOUN narr,pl
+the following accentual model:
+
+	2,3
+
+	It produces the following accented paradigm: 
+		le'af 	NOUN narr,sg
+		le'aves	NOUN narr,pl
+
+		
+
+	Section of user section
+
+	This is a system section, which contains information about user edit 
+sessions.
+
+
+	Section of prefix sets
+
+	Each record of this section is a comma-delimited list of strings, where 
+each string is a prefix, which can be prefixed to the whole word. If a prefix 
+set is ascribed to a word, it means, that the words with these prefixes
+can also exist  in the language. For example, if "leaf" has 
+the prefix  set "anti,contra", it follows the existence of  words "antileaf",
+"contraleaf".
+	A flexion and prefix model can contain
+also a reference to a prefix, but this prefix is for 
+one separate word form, while a prefix set  is ascribed to the whole word 
+paradigm.
+
+	
+	Section of lemmas
+
+	A record of this section is a space-separated tuple of the following format:
+
+	<base> <flex_model_no> <accent_model_no> <session_no> <type_ancode> <prefix_set_no>
+
+	where 
+
+	<base> is a base (a constant part of a word in its paradigm)
+	<flex_model_no> is an index  of a flexion and prefix model
+	<accent_model_no> is an index of an accentual model
+	<session_no> is an index of the session,  by which the last user edited this word
+	<type_ancode> is ancode, which is ascribed to the whole word 
+						(intended: the common part of grammems in the paradigm)
+					   "-" if it is undefined 
+	<prefix_set_no> is an index of a prefix set, or "-" if it is undefined
+

File diff suppressed because it is too large
+ 144 - 0
dictonary/copying


+ 23 - 0
english/pom.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <parent>
+        <artifactId>morphology</artifactId>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <version>1.5</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>english</artifactId>
+    <name>english</name>
+    <version>1.5</version>
+    <url>http://maven.apache.org</url>
+    <dependencies>
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>morph</artifactId>
+            <version>1.5</version>
+        </dependency>
+
+    </dependencies>
+</project>

+ 29 - 0
english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java

@@ -0,0 +1,29 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english;
+
+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
+
+import java.io.IOException;
+
+
+public class EnglishAnalyzer extends MorphologyAnalyzer {
+
+    public EnglishAnalyzer() throws IOException {
+        super(new EnglishLuceneMorphology());
+    }
+
+}

+ 115 - 0
english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java

@@ -0,0 +1,115 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english;
+
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+import org.apache.lucene.morphology.SuffixToLongException;
+import org.apache.lucene.morphology.WrongCharaterException;
+
+import java.util.ArrayList;
+
+
+public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
+    public static final int ENGLISH_SMALL_LETTER_OFFSET = 96;
+    static public int SUFFIX_LENGTH = 6;
+    public static final int DASH_CHAR = 45;
+    public static final int DASH_CODE = 27;
+
+    public Integer encode(String string) {
+        if (string.length() > 6) throw new SuffixToLongException("Suffix length should not be greater then " + 12);
+        int result = 0;
+        for (int i = 0; i < string.length(); i++) {
+            int c = 0 + string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
+            if (c == 45 - ENGLISH_SMALL_LETTER_OFFSET) {
+                c = DASH_CODE;
+            }
+            if (c < 0 || c > 27)
+                throw new WrongCharaterException("Symbol " + string.charAt(i) + " is not small cirillic letter");
+            result = result * 28 + c;
+        }
+        for (int i = string.length(); i < 6; i++) {
+            result *= 28;
+        }
+        return result;
+    }
+
+    public int[] encodeToArray(String s) {
+
+        ArrayList<Integer> integers = new ArrayList<Integer>();
+        while (s.length() > 6) {
+            integers.add(encode(s.substring(0, 6)));
+            s = s.substring(6);
+        }
+        integers.add(encode(s));
+        int[] ints = new int[integers.size()];
+        int pos = 0;
+        for (Integer i : integers) {
+            ints[pos] = i;
+            pos++;
+        }
+        return ints;
+    }
+
+    public String decodeArray(int[] array) {
+        String result = "";
+        for (int i : array) {
+            result += decode(i);
+        }
+        return result;
+    }
+
+
+    public String decode(Integer suffixN) {
+        String result = "";
+        while (suffixN > 27) {
+            int c = suffixN % 28 + ENGLISH_SMALL_LETTER_OFFSET;
+            if (c == ENGLISH_SMALL_LETTER_OFFSET) {
+                suffixN /= 28;
+                continue;
+            }
+            if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
+            result = (char) c + result;
+            suffixN /= 28;
+        }
+        long c = suffixN + ENGLISH_SMALL_LETTER_OFFSET;
+        if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
+        result = (char) c + result;
+        return result;
+    }
+
+    public boolean checkCharacter(char c) {
+        int code = 0 + c;
+        if (code == 45) return true;
+        code -= ENGLISH_SMALL_LETTER_OFFSET;
+        if (code > 0 && code < 27) return true;
+        return false;
+    }
+
+
+    public boolean checkString(String word) {
+        for (int i = 0; i < word.length(); i++) {
+            if (!checkCharacter(word.charAt(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public String cleanString(String s) {
+        return s;
+    }
+
+}

+ 28 - 0
english/src/main/java/org/apache/lucene/morphology/english/EnglishLuceneMorphology.java

@@ -0,0 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english;
+
+import org.apache.lucene.morphology.LuceneMorphology;
+
+import java.io.IOException;
+
+
+public class EnglishLuceneMorphology extends LuceneMorphology {
+
+    public EnglishLuceneMorphology() throws IOException {
+        super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
+    }
+}

+ 28 - 0
english/src/main/java/org/apache/lucene/morphology/english/EnglishMorphology.java

@@ -0,0 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english;
+
+import org.apache.lucene.morphology.MorphologyImpl;
+
+import java.io.IOException;
+
+
+public class EnglishMorphology extends MorphologyImpl {
+
+    public EnglishMorphology() throws IOException {
+        super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
+    }
+}

+ 45 - 0
english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java

@@ -0,0 +1,45 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english.stemmer;
+
+
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+
+import java.util.List;
+
+public class EnglishStemmer {
+    private EnglishLuceneMorphology englishLuceneMorphology;
+
+    public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
+        this.englishLuceneMorphology = englishLuceneMorphology;
+    }
+
+    public String getStemmedWord(String word){
+        if(!englishLuceneMorphology.checkString(word)){
+            return word;
+        }
+        List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
+        if(normalForms.size() == 1){
+            return normalForms.get(0);
+        }
+        normalForms.remove(word);
+        if(normalForms.size() == 1){
+            return normalForms.get(0);
+        }
+        return word;
+    }
+
+}

+ 48 - 0
english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerFilter.java

@@ -0,0 +1,48 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english.stemmer;
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+import java.io.IOException;
+
+public class EnglishStemmerFilter extends TokenFilter {
+    private EnglishStemmer englishStemmer;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+    public EnglishStemmerFilter(TokenStream input, EnglishStemmer englishStemmer) {
+        super(input);
+        this.englishStemmer = englishStemmer;
+    }
+
+
+    final public boolean incrementToken() throws IOException {
+
+        boolean b = input.incrementToken();
+        if (!b) {
+            return false;
+        }
+        String s = new String(termAtt.buffer(), 0, termAtt.length());
+        s = englishStemmer.getStemmedWord(s);
+        termAtt.setEmpty();
+        termAtt.append(s);
+        return true;
+    }
+
+}

+ 0 - 0
english/src/main/resources/org/apache/lucene/morphology/english/exceptions.txt


File diff suppressed because it is too large
+ 208467 - 0
english/src/main/resources/org/apache/lucene/morphology/english/morph.info


+ 40 - 0
english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java

@@ -0,0 +1,40 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+import org.junit.Before;
+
+
+public class EnglishLetterDecoderEncoderTest {
+    private EnglishLetterDecoderEncoder decoderEncoder;
+
+    @Before
+    public void setUp() {
+        decoderEncoder = new EnglishLetterDecoderEncoder();
+    }
+
+    @org.junit.Test
+    public void testDecodeEncodeToArray() {
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));
+
+    }
+}

+ 49 - 0
english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java

@@ -0,0 +1,49 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.english.stemmer;
+
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.junit.Test;
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+
+
+public class EnglishStemmerTest {
+    @Test
+    public void testGetStemmedWord() throws Exception {
+        EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
+        EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
+        assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
+        assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
+        assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
+        assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
+        assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
+        assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
+        assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
+        assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
+        assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
+        assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
+        assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
+        assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
+        assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
+        assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
+        assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
+        assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
+    }
+}

+ 13 - 0
etc/header.txt

@@ -0,0 +1,13 @@
+Copyright 2009 Alexander Kuznetsov 
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

+ 15 - 0
morph/pom.xml

@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <parent>
+        <artifactId>morphology</artifactId>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <version>1.5</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>morph</artifactId>
+    <name>morph</name>
+    <version>1.5</version>
+    <url>http://maven.apache.org</url>
+
+</project>

+ 55 - 0
morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java

@@ -0,0 +1,55 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology;
+
+import java.util.ArrayList;
+
+
+public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
+    public int[] encodeToArray(String s) {
+        ArrayList<Integer> integers = new ArrayList<Integer>();
+        while (s.length() > 6) {
+            integers.add(encode(s.substring(0, 6)));
+            s = s.substring(6);
+        }
+        integers.add(encode(s));
+        int[] ints = new int[integers.size()];
+        int pos = 0;
+        for (Integer i : integers) {
+            ints[pos] = i;
+            pos++;
+        }
+        return ints;
+    }
+
+    public String decodeArray(int[] array) {
+        String result = "";
+        for (int i : array) {
+            result += decode(i);
+        }
+        return result;
+    }
+
+    public boolean checkString(String word) {
+        for (int i = 0; i < word.length(); i++) {
+            if (!checkCharacter(word.charAt(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+}

+ 92 - 0
morph/src/main/java/org/apache/lucene/morphology/Heuristic.java

@@ -0,0 +1,92 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+import java.io.Serializable;
+
+
+public class Heuristic implements Serializable {
+    byte actualSuffixLength;
+    String actualNormalSuffix;
+    short formMorphInfo;
+    short normalFormMorphInfo;
+
+    public Heuristic(String s) {
+        String[] strings = s.split("\\|");
+        actualSuffixLength = Byte.valueOf(strings[0]);
+        actualNormalSuffix = strings[1];
+        formMorphInfo = Short.valueOf(strings[2]);
+        normalFormMorphInfo = Short.valueOf(strings[3]);
+    }
+
+    public Heuristic(byte actualSuffixLength, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
+        this.actualSuffixLength = actualSuffixLength;
+        this.actualNormalSuffix = actualNormalSuffix;
+        this.formMorphInfo = formMorphInfo;
+        this.normalFormMorphInfo = normalFormMorphInfo;
+    }
+
+    public StringBuilder transformWord(String w) {
+        if (w.length() - actualSuffixLength < 0) return new StringBuilder(w);
+        return new StringBuilder(w.substring(0, w.length() - actualSuffixLength)).append(actualNormalSuffix);
+    }
+
+    public byte getActualSuffixLength() {
+        return actualSuffixLength;
+    }
+
+    public String getActualNormalSuffix() {
+        return actualNormalSuffix;
+    }
+
+    public short getFormMorphInfo() {
+        return formMorphInfo;
+    }
+
+    public short getNormalFormMorphInfo() {
+        return normalFormMorphInfo;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        Heuristic heuristic = (Heuristic) o;
+
+        if (actualSuffixLength != heuristic.actualSuffixLength) return false;
+        if (formMorphInfo != heuristic.formMorphInfo) return false;
+        if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;
+        if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)
+            return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = (int) actualSuffixLength;
+        result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);
+        result = 31 * result + (int) formMorphInfo;
+        result = 31 * result + (int) normalFormMorphInfo;
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "" + actualSuffixLength + "|" + actualNormalSuffix + "|" + formMorphInfo + "|" + normalFormMorphInfo;
+    }
+}

+ 33 - 0
morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java

@@ -0,0 +1,33 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+
+public interface LetterDecoderEncoder {
+    public Integer encode(String string);
+
+    public int[] encodeToArray(String s);
+
+    public String decodeArray(int[] array);
+
+    public String decode(Integer suffixN);
+
+    public boolean checkCharacter(char c);
+
+    public boolean checkString(String word);
+
+    public String cleanString(String s);
+}

+ 70 - 0
morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java

@@ -0,0 +1,70 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+
+public class LuceneMorphology extends MorphologyImpl {
+
+    public LuceneMorphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
+        super(fileName, decoderEncoder);
+    }
+
+    public LuceneMorphology(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
+        super(inputStream, decoderEncoder);
+    }
+
+    protected void readRules(BufferedReader bufferedReader) throws IOException {
+        String s;
+        Integer amount;
+        s = bufferedReader.readLine();
+        amount = Integer.valueOf(s);
+        rules = new Heuristic[amount][];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            Integer ruleLenght = Integer.valueOf(s1);
+            Heuristic[] heuristics = new Heuristic[ruleLenght];
+            for (int j = 0; j < ruleLenght; j++) {
+                heuristics[j] = new Heuristic(bufferedReader.readLine());
+            }
+            rules[i] = modeifyHeuristic(heuristics);
+        }
+    }
+
+
+    private Heuristic[] modeifyHeuristic(Heuristic[] heuristics) {
+        ArrayList<Heuristic> result = new ArrayList<Heuristic>();
+        for (Heuristic heuristic : heuristics) {
+            boolean isAdded = true;
+            for (Heuristic ch : result) {
+                isAdded = isAdded && !(ch.getActualNormalSuffix().equals(heuristic.getActualNormalSuffix()) && (ch.getActualSuffixLength() == heuristic.getActualSuffixLength()));
+            }
+            if (isAdded) {
+                result.add(heuristic);
+            }
+        }
+        return result.toArray(new Heuristic[result.size()]);
+    }
+
+    public boolean checkString(String s) {
+        return decoderEncoder.checkString(s);
+    }
+}

+ 27 - 0
morph/src/main/java/org/apache/lucene/morphology/Morphology.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+import java.util.List;
+
+
+public interface Morphology {
+
+    List<String> getNormalForms(String s);
+
+    List<String> getMorphInfo(String s);
+
+}

+ 208 - 0
morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java

@@ -0,0 +1,208 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.List;
+
+
+public class MorphologyImpl implements Morphology {
+    protected int[][] separators;
+    protected short[] rulesId;
+    protected Heuristic[][] rules;
+    protected String[] grammarInfo;
+    protected LetterDecoderEncoder decoderEncoder;
+
+
+    public MorphologyImpl(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
+        readFromFile(fileName);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public MorphologyImpl(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
+        readFromInputStream(inputStream);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammarInfo) {
+        this.separators = separators;
+        this.rulesId = rulesId;
+        this.rules = rules;
+        this.grammarInfo = grammarInfo;
+    }
+
+    public List<String> getNormalForms(String s) {
+        ArrayList<String> result = new ArrayList<String>();
+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
+        int ruleId = findRuleId(ints);
+        boolean notSeenEmptyString = true;
+        for (Heuristic h : rules[rulesId[ruleId]]) {
+            String e = h.transformWord(s).toString();
+            if (e.length() > 0) {
+                result.add(e);
+            } else if (notSeenEmptyString) {
+                result.add(s);
+                notSeenEmptyString = false;
+            }
+        }
+        return result;
+    }
+
+    public List<String> getMorphInfo(String s) {
+        ArrayList<String> result = new ArrayList<String>();
+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
+        int ruleId = findRuleId(ints);
+        for (Heuristic h : rules[rulesId[ruleId]]) {
+            result.add(h.transformWord(s).append("|").append(grammarInfo[h.getFormMorphInfo()]).toString());
+        }
+        return result;
+    }
+
+    protected int findRuleId(int[] ints) {
+        int low = 0;
+        int high = separators.length - 1;
+        int mid = 0;
+        while (low <= high) {
+            mid = (low + high) >>> 1;
+            int[] midVal = separators[mid];
+
+            int comResult = compareToInts(ints, midVal);
+            if (comResult > 0)
+                low = mid + 1;
+            else if (comResult < 0)
+                high = mid - 1;
+            else
+                break;
+        }
+        if (compareToInts(ints, separators[mid]) >= 0) {
+            return mid;
+        } else {
+            return mid - 1;
+        }
+
+    }
+
+    private int compareToInts(int[] i1, int[] i2) {
+        int minLength = Math.min(i1.length, i2.length);
+        for (int i = 0; i < minLength; i++) {
+            int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
+            if (i3 != 0) return i3;
+        }
+        return i1.length - i2.length;
+    }
+
+    public void writeToFile(String fileName) throws IOException {
+        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
+        writer.write(separators.length + "\n");
+        for (int[] i : separators) {
+            writer.write(i.length + "\n");
+            for (int j : i) {
+                writer.write(j + "\n");
+            }
+        }
+        for (short i : rulesId) {
+            writer.write(i + "\n");
+        }
+        writer.write(rules.length + "\n");
+        for (Heuristic[] heuristics : rules) {
+            writer.write(heuristics.length + "\n");
+            for (Heuristic heuristic : heuristics) {
+                writer.write(heuristic.toString() + "\n");
+            }
+        }
+        writer.write(grammarInfo.length + "\n");
+        for (String s : grammarInfo) {
+            writer.write(s + "\n");
+        }
+        writer.close();
+    }
+
+    public void readFromFile(String fileName) throws IOException {
+        FileInputStream inputStream = new FileInputStream(fileName);
+        readFromInputStream(inputStream);
+    }
+
+    private void readFromInputStream(InputStream inputStream) throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        Integer amount = Integer.valueOf(s);
+
+        readSeparators(bufferedReader, amount);
+
+        readRulesId(bufferedReader, amount);
+
+        readRules(bufferedReader);
+        readGrammaInfo(bufferedReader);
+        bufferedReader.close();
+    }
+
+    private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
+        String s;
+        Integer amount;
+        s = bufferedReader.readLine();
+        amount = Integer.valueOf(s);
+        grammarInfo = new String[amount];
+        for (int i = 0; i < amount; i++) {
+            grammarInfo[i] = bufferedReader.readLine();
+        }
+    }
+
+    protected void readRules(BufferedReader bufferedReader) throws IOException {
+        String s;
+        Integer amount;
+        s = bufferedReader.readLine();
+        amount = Integer.valueOf(s);
+        rules = new Heuristic[amount][];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            Integer ruleLength = Integer.valueOf(s1);
+            rules[i] = new Heuristic[ruleLength];
+            for (int j = 0; j < ruleLength; j++) {
+                rules[i][j] = new Heuristic(bufferedReader.readLine());
+            }
+        }
+    }
+
+    private void readRulesId(BufferedReader bufferedReader, Integer amount) throws IOException {
+        rulesId = new short[amount];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            rulesId[i] = Short.valueOf(s1);
+        }
+    }
+
+    private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {
+        separators = new int[amount][];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            Integer wordLenght = Integer.valueOf(s1);
+            separators[i] = new int[wordLenght];
+            for (int j = 0; j < wordLenght; j++) {
+                separators[i][j] = Integer.valueOf(bufferedReader.readLine());
+            }
+        }
+    }
+
+    protected String revertWord(String s) {
+        StringBuilder result = new StringBuilder();
+        for (int i = 1; i <= s.length(); i++) {
+            result.append(s.charAt(s.length() - i));
+        }
+        return result.toString();
+    }
+}

+ 28 - 0
morph/src/main/java/org/apache/lucene/morphology/SuffixToLongException.java

@@ -0,0 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology;
+
+
+public class SuffixToLongException extends RuntimeException {
+
+    public SuffixToLongException() {
+    }
+
+    public SuffixToLongException(String message) {
+        super(message);
+    }
+}

+ 27 - 0
morph/src/main/java/org/apache/lucene/morphology/WrongCharaterException.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology;
+
+
+public class WrongCharaterException extends RuntimeException {
+    public WrongCharaterException() {
+    }
+
+    public WrongCharaterException(String message) {
+        super(message);
+    }
+}

+ 78 - 0
morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java

@@ -0,0 +1,78 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.analyzer;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.payloads.PayloadHelper;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+import org.apache.lucene.morphology.LuceneMorphology;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+
+public class MorphologyAnalyzer extends Analyzer {
+    private LuceneMorphology luceneMorph;
+
+    public MorphologyAnalyzer(LuceneMorphology luceneMorph) {
+        this.luceneMorph = luceneMorph;
+    }
+
+    public MorphologyAnalyzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
+        luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);
+    }
+
+    public MorphologyAnalyzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
+        luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
+    }
+
+
+    @Override
+    protected TokenStreamComponents createComponents(String s) {
+
+        StandardTokenizer src = new StandardTokenizer();
+        final PayloadEncoder encoder = new PayloadEncoder() {
+            @Override
+            public BytesRef encode(char[] buffer) {
+                final Float payload = Float.valueOf(new String(buffer));
+                System.out.println(payload);
+                final byte[] bytes = PayloadHelper.encodeFloat(payload);
+                return new BytesRef(bytes, 0, bytes.length);
+            }
+
+            @Override
+            public BytesRef encode(char[] buffer, int offset, int length) {
+
+                final Float payload = Float.valueOf(new String(buffer, offset, length));
+                System.out.println(payload);
+                final byte[] bytes = PayloadHelper.encodeFloat(payload);
+
+                return new BytesRef(bytes, 0, bytes.length);
+            }
+        };
+
+        TokenFilter filter = new LowerCaseFilter(src);
+        filter = new MorphologyFilter(filter, luceneMorph);
+
+        return new TokenStreamComponents(r -> src.setReader(r), filter);
+    }
+}

+ 87 - 0
morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java

@@ -0,0 +1,87 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.analyzer;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.morphology.LuceneMorphology;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+
+public class MorphologyFilter extends TokenFilter {
+    private LuceneMorphology luceneMorph;
+    private Iterator<String> iterator;
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
+    private final PositionIncrementAttribute position = addAttribute(PositionIncrementAttribute.class);
+    private State state = null;
+
+    public MorphologyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
+        super(tokenStream);
+        this.luceneMorph = luceneMorph;
+    }
+
+
+    final public boolean incrementToken() throws IOException {
+        if (iterator != null) {
+            if (iterator.hasNext()) {
+                restoreState(state);
+                position.setPositionIncrement(0);
+                termAtt.setEmpty().append(iterator.next());
+                return true;
+            } else {
+                state = null;
+                iterator = null;
+            }
+        }
+        while (true) {
+            boolean b = input.incrementToken();
+            if (!b) {
+                return false;
+            }
+            if (!keywordAttr.isKeyword() && termAtt.length() > 0) {
+                String s = new String(termAtt.buffer(), 0, termAtt.length());
+                if (luceneMorph.checkString(s)) {
+                    List<String> forms = luceneMorph.getNormalForms(s);
+                    if (forms.isEmpty()) {
+                        continue;
+                    } else if (forms.size() == 1) {
+                        termAtt.setEmpty().append(forms.get(0));
+                    } else {
+                        state = captureState();
+                        iterator = forms.iterator();
+                        termAtt.setEmpty().append(iterator.next());
+                    }
+                }
+            }
+            return true;
+        }
+    }
+
+    @Override
+    public void reset() throws IOException {
+        super.reset();
+        state = null;
+        iterator = null;
+    }
+}

+ 183 - 0
pom.xml

@@ -0,0 +1,183 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>morphology</artifactId>
+    <packaging>pom</packaging>
+    <version>1.5</version>
+    <name>morphology</name>
+    <url>http://maven.apache.org</url>
+
+    <scm>
+        <connection>scm:git:https://github.com/AKuznetsov/russianmorphology.git</connection>
+        <developerConnection>scm:git:git@github.com:AKuznetsov/russianmorphology.git</developerConnection>
+        <url>https://github.com/AKuznetsov/russianmorphology</url>
+        <tag>HEAD</tag>
+    </scm>
+
+    <properties>
+        <lucene.version>8.0.0</lucene.version>
+        <morphology.version>1.5</morphology.version>
+        <junit.version>4.12</junit.version>
+    </properties>
+
+    <distributionManagement>
+        <repository>
+            <id>bintray</id>
+            <url>https://api.bintray.com/maven/akuznetsov/russianmorphology/morphology</url>
+        </repository>
+    </distributionManagement>
+
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+        </license>
+    </licenses>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-test-framework</artifactId>
+            <version>${lucene.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.hamcrest</groupId>
+            <artifactId>hamcrest-all</artifactId>
+            <version>1.1</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+            <version>${lucene.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>${lucene.version}</version>
+        </dependency>
+    </dependencies>
+
+    <repositories>
+        <repository>
+            <id>maven2-repository.dev.java.net</id>
+            <name>Java.net Repository for Maven</name>
+            <url>http://download.java.net/maven/2/</url>
+        </repository>
+        <repository>
+            <id>bintray</id>
+            <url>http://dl.bintray.com/akuznetsov/russianmorphology</url>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+        </repository>
+    </repositories>
+    
+    <pluginRepositories>
+        <pluginRepository>
+            <id>mc-release</id>
+            <name>maven-license-plugin repository of releases</name>
+            <url>http://mc-repo.googlecode.com/svn/maven2/releases</url>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+        </pluginRepository>
+    </pluginRepositories>
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-release-plugin</artifactId>
+                <version>2.5.3</version>
+                <configuration>
+                    <useReleaseProfile>false</useReleaseProfile>
+                    <releaseProfiles>release</releaseProfiles>
+                    <autoVersionSubmodules>true</autoVersionSubmodules>
+                </configuration>
+            </plugin>
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.7.0</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+            <plugin>                <!--                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                -->
+                <artifactId>maven-license-plugin</artifactId>
+                <groupId>com.google.code.maven-license-plugin</groupId>
+                <version>1.4.0</version>
+                <configuration>
+                    <basedir>${project.parent.basedir}</basedir>
+                    <header>etc/header.txt</header>
+                    <excludes>
+                        <exclude>**/*.txt</exclude>
+                        <exclude>**/*.info</exclude>
+                        <exclude>**/pom.xml</exclude>
+                    </excludes>
+                    <includes>
+                        <include>**/src/**</include>
+                    </includes>
+                </configuration>
+                <executions>
+                    <execution>
+                        <phase>test</phase>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+    <profiles>
+        <profile>
+            <id>release</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <artifactId>maven-source-plugin</artifactId>
+                        <version>3.0.1</version>
+                        <executions>
+                            <execution>
+                                <id>attach-sources</id>
+                                <goals>
+                                    <goal>jar</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <artifactId>maven-javadoc-plugin</artifactId>
+                        <version>2.10.4</version>
+                        <executions>
+                            <execution>
+                                <id>attach-javadocs</id>
+                                <goals>
+                                    <goal>jar</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+    <modules>
+        <module>morph</module>
+        <module>dictionary-reader</module>
+        <module>russian</module>
+        <module>english</module>
+        <module>solr-morphology-analysis</module>
+    </modules>
+</project>

+ 31 - 0
russian/pom.xml

@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <parent>
+        <artifactId>morphology</artifactId>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <version>1.5</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>russian</artifactId>
+    <name>russian</name>
+    <version>1.5</version>
+    <url>http://maven.apache.org</url>
+    <dependencies>
+
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>morph</artifactId>
+            <version>1.5</version>
+        </dependency>
+
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.8.2</version>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+</project>

+ 27 - 0
russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
+
+import java.io.IOException;
+
+
+public class RussianAnalyzer extends MorphologyAnalyzer {
+    public RussianAnalyzer() throws IOException {
+        super(new RussianLuceneMorphology());
+    }
+}

+ 123 - 0
russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java

@@ -0,0 +1,123 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.morphology.LetterDecoderEncoder;
+import org.apache.lucene.morphology.SuffixToLongException;
+import org.apache.lucene.morphology.WrongCharaterException;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+
+/**
+ * This helper class allow encode suffix of russian word
+ * to long value and decode from it.
+ * Assumed that suffix contains only small russian letters and dash.
+ * Also assumed that letter � and � coinsed.
+ */
+public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
+    public static final int RUSSIAN_SMALL_LETTER_OFFSET = 1071;
+    public static final int WORD_PART_LENGHT = 6;
+    public static final int EE_CHAR = 34;
+    public static final int E_CHAR = 6;
+    public static final int DASH_CHAR = 45;
+    public static final int DASH_CODE = 33;
+
+    public Integer encode(String string) {
+        if (string.length() > WORD_PART_LENGHT)
+            throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);
+        int result = 0;
+        for (int i = 0; i < string.length(); i++) {
+            int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
+            if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
+                c = DASH_CODE;
+            }
+            if (c == EE_CHAR) c = E_CHAR;
+            if (c < 0 || c > 33)
+                throw new WrongCharaterException("Symbol " + string.charAt(i) + " is not small cirillic letter");
+            result = result * 34 + c;
+        }
+        for (int i = string.length(); i < WORD_PART_LENGHT; i++) {
+            result *= 34;
+        }
+        return result;
+    }
+
+    public int[] encodeToArray(String s) {
+        LinkedList<Integer> integers = new LinkedList<Integer>();
+        while (s.length() > WORD_PART_LENGHT) {
+            integers.add(encode(s.substring(0, WORD_PART_LENGHT)));
+            s = s.substring(WORD_PART_LENGHT);
+        }
+        integers.add(encode(s));
+        int[] ints = new int[integers.size()];
+        int pos = 0;
+        for (Integer i : integers) {
+            ints[pos] = i;
+            pos++;
+        }
+        return ints;
+    }
+
+    public String decodeArray(int[] array) {
+        String result = "";
+        for (int i : array) {
+            result += decode(i);
+        }
+        return result;
+    }
+
+
+    public String decode(Integer suffixN) {
+        String result = "";
+        while (suffixN > 33) {
+            int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;
+            if (c == RUSSIAN_SMALL_LETTER_OFFSET) {
+                suffixN /= 34;
+                continue;
+            }
+            if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
+            result = (char) c + result;
+            suffixN /= 34;
+        }
+        long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
+        if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
+        result = (char) c + result;
+        return result;
+    }
+
+    public boolean checkCharacter(char c) {
+        int code = 0 + c;
+        if (code == 45) return true;
+        code -= RUSSIAN_SMALL_LETTER_OFFSET;
+        if (code > 0 && code < 33) return true;
+        return false;
+    }
+
+    public boolean checkString(String word) {
+        for (int i = 0; i < word.length(); i++) {
+            if (!checkCharacter(word.charAt(i))) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    public String cleanString(String s) {
+        return s.replace((char) (EE_CHAR + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (E_CHAR + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
+    }
+}

+ 27 - 0
russian/src/main/java/org/apache/lucene/morphology/russian/RussianLuceneMorphology.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.morphology.LuceneMorphology;
+
+import java.io.IOException;
+
+public class RussianLuceneMorphology extends LuceneMorphology {
+
+    public RussianLuceneMorphology() throws IOException {
+        super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
+    }
+}

+ 27 - 0
russian/src/main/java/org/apache/lucene/morphology/russian/RussianMorphology.java

@@ -0,0 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.morphology.MorphologyImpl;
+
+import java.io.IOException;
+
+public class RussianMorphology extends MorphologyImpl {
+
+    public RussianMorphology() throws IOException {
+        super(RussianMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
+    }
+}

File diff suppressed because it is too large
+ 2031238 - 0
russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info


+ 92 - 0
russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java

@@ -0,0 +1,92 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.morphology.SuffixToLongException;
+import org.apache.lucene.morphology.WrongCharaterException;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+
+public class RussianLetterDecoderEncoderTest {
+    private RussianLetterDecoderEncoder decoderEncoder;
+
+    @Before
+    public void setUp() {
+        decoderEncoder = new RussianLetterDecoderEncoder();
+    }
+
+
+    @Test
+    public void testShouldPreserverStringComporision() throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
+            String[] qa = s.trim().split(" ");
+            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
+                assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
+            }
+            s = bufferedReader.readLine();
+        }
+    }
+
+
+    @Test
+    public void testShouldCorrectDecodeEncode() throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
+            String[] qa = s.trim().split(" ");
+            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
+                Integer encodedSuffix = decoderEncoder.encode(qa[0]);
+                assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
+            }
+            s = bufferedReader.readLine();
+        }
+    }
+
+    @Test
+    public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
+            String[] qa = s.trim().split(" ");
+            int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
+            assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
+            s = bufferedReader.readLine();
+        }
+    }
+
+    @Test(expected = SuffixToLongException.class)
+    public void shouldThrownExeptionIfSuffixToLong() {
+        decoderEncoder.encode("1234567890123");
+    }
+
+    @Test(expected = WrongCharaterException.class)
+    public void shouldThrownExceptionIfSuffixContainWrongCharater() {
+        decoderEncoder.encode("1");
+    }
+}

+ 15 - 0
russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt

@@ -0,0 +1,15 @@
+тест тест
+ёж еж
+естера естера
+что-то что-то
+а а
+яяяяяя яяяяяя
+яяяя яяяя
+аа аа
+аааааа аааааа
+аааааааааааа аааааааааааа
+аааааааааааааааааа аааааааааааааааааа
+ааааааааааааааааа ааааааааааааааааа
+йфячыцувс йфячыцувс
+ёёё еее
+ёёёе ееее

+ 10 - 0
russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-data.txt

@@ -0,0 +1,10 @@
+яяя яяя
+юяю юяю
+тест тест
+ёж еж
+естера естера
+что-то что-то
+а а
+яяяяяя яяяяяя
+яяяя яяяя
+аа аа

+ 7 - 0
russian/src/test/resources/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt

@@ -0,0 +1,7 @@
+а аа
+ааа ббб
+ммм нннн
+ммм ммн
+аа ба
+ииа к
+удд уде

+ 40 - 0
solr-morphology-analysis/pom.xml

@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>morphology</artifactId>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <version>1.5</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.apache.lucene.analysis</groupId>
+    <artifactId>morphology</artifactId>
+    <name>solr-morphology-analysis</name>
+    <version>${morphology.version}</version>
+    <url>http://maven.apache.org</url>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>russian</artifactId>
+            <version>${morphology.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>english</artifactId>
+            <version>${morphology.version}</version>
+        </dependency>
+
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>${junit.version}</version>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+
+</project>

+ 69 - 0
solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java

@@ -0,0 +1,69 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morphology;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.morphology.LuceneMorphology;
+import org.apache.lucene.morphology.analyzer.MorphologyFilter;
+
+import java.util.Map;
+
+/**
+ * Factory for {@link MorphologyFilter}, with configurable language
+ * <p>
+ * <b>Note:</b> Two languages are available now: English (default value) and Russian.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="content" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
+ *     &lt;filter class="solr.MorphologyFilterFactory" language="English"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ */
+public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{
+
+    private static final String LANGUAGE_KEY = "language";
+
+    private String language;
+    private LuceneMorphology luceneMorphology;
+
+    public MorphologyFilterFactory(Map<String, String> args) {
+        super(args);
+
+        language = get(args, LANGUAGE_KEY, "English");
+        if (!args.isEmpty()) {
+            throw new IllegalArgumentException("Unknown parameters: " + args);
+        }
+    }
+
+    public TokenStream create(TokenStream input) {
+        return new MorphologyFilter(input, luceneMorphology);
+    }
+
+    public void inform(ResourceLoader loader) {
+
+        String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology";
+        luceneMorphology = loader.newInstance(className, LuceneMorphology.class);
+    }
+
+    public LuceneMorphology getLuceneMorphology() {
+        return luceneMorphology;
+    }
+}

+ 75 - 0
solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java

@@ -0,0 +1,75 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morphology;
+
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.morphology.LuceneMorphology;
+import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class MorphologyFilterFactoryTest {
+
+    private static final String LANGUAGE_KEY = "language";
+    private ResourceLoader loader = new ClasspathResourceLoader();
+    private Map<String, String> args;
+
+    @Before
+    public void setUp(){
+        args = new HashMap<>();
+    }
+
+    @Test
+    public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){
+
+        args.put(LANGUAGE_KEY, "Russian");
+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
+        morphologyFilterFactory.inform(loader);
+
+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
+
+        Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology);
+    }
+
+    @Test
+    public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){
+
+        args.put(LANGUAGE_KEY, "English");
+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
+        morphologyFilterFactory.inform(loader);
+
+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
+
+        Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology);
+    }
+
+    @Test
+    public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){
+
+        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
+        morphologyFilterFactory.inform(loader);
+
+        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
+
+        Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology);
+    }
+}