LUCENE-10312: Revert changes in PersianAnalyzer from 9x (apache#904)

mocobeta · mocobeta · commit ba8c3a806ada · 2022-05-19T21:44:58.000+09:00
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,7 +29,6 @@
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.IOUtils;
 
@@ -87,8 +86,6 @@ private static class DefaultSetHolder {
     }
   }
 
-  private final CharArraySet stemExclusionSet;
-
   /** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
   public PersianAnalyzer() {
     this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@ public PersianAnalyzer() {
    * @param stopwords a stopword set
    */
   public PersianAnalyzer(CharArraySet stopwords) {
-    this(stopwords, CharArraySet.EMPTY_SET);
-  }
-
-  /**
-   * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
-   * this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
-   *
-   * @param stopwords a stopword set
-   * @param stemExclusionSet a set of terms not to be stemmed
-   */
-  public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
     super(stopwords);
-    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
   }
 
   /**
@@ -136,11 +121,7 @@ protected TokenStreamComponents createComponents(String fieldName) {
      * the order here is important: the stopword list is normalized with the
      * above!
      */
-    result = new StopFilter(result, stopwords);
-    if (!stemExclusionSet.isEmpty()) {
-      result = new SetKeywordMarkerFilter(result, stemExclusionSet);
-    }
-    return new TokenStreamComponents(source, new PersianStemFilter(result));
+    return new TokenStreamComponents(source, new StopFilter(result, stopwords));
   }
 
   @Override
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java
@@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase {
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    a = new PersianAnalyzer();
+    a =
+        new Analyzer() {
+          @Override
+          protected TokenStreamComponents createComponents(String fieldName) {
+            final Tokenizer source = new MockTokenizer();
+            return new TokenStreamComponents(source, new PersianStemFilter(source));
+          }
+        };
   }
 
   @Override