Skip to content

Commit ba8c3a8

Browse files
committed
LUCENE-10312: Revert changes in PersianAnalyzer from 9x (apache#904)
1 parent 978eef5 commit ba8c3a8

File tree

2 files changed

+9
-21
lines changed

2 files changed

+9
-21
lines changed

lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.apache.lucene.analysis.WordlistLoader;
3030
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
3131
import org.apache.lucene.analysis.core.DecimalDigitFilter;
32-
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
3332
import org.apache.lucene.analysis.standard.StandardTokenizer;
3433
import org.apache.lucene.util.IOUtils;
3534

@@ -87,8 +86,6 @@ private static class DefaultSetHolder {
8786
}
8887
}
8988

90-
private final CharArraySet stemExclusionSet;
91-
9289
/** Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}. */
9390
public PersianAnalyzer() {
9491
this(DefaultSetHolder.DEFAULT_STOP_SET);
@@ -100,19 +97,7 @@ public PersianAnalyzer() {
10097
* @param stopwords a stopword set
10198
*/
10299
public PersianAnalyzer(CharArraySet stopwords) {
103-
this(stopwords, CharArraySet.EMPTY_SET);
104-
}
105-
106-
/**
107-
* Builds an analyzer with the given stop word. If a none-empty stem exclusion set is provided
108-
* this analyzer will add a {@link SetKeywordMarkerFilter} before {@link PersianStemFilter}.
109-
*
110-
* @param stopwords a stopword set
111-
* @param stemExclusionSet a set of terms not to be stemmed
112-
*/
113-
public PersianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
114100
super(stopwords);
115-
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
116101
}
117102

118103
/**
@@ -136,11 +121,7 @@ protected TokenStreamComponents createComponents(String fieldName) {
136121
* the order here is important: the stopword list is normalized with the
137122
* above!
138123
*/
139-
result = new StopFilter(result, stopwords);
140-
if (!stemExclusionSet.isEmpty()) {
141-
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
142-
}
143-
return new TokenStreamComponents(source, new PersianStemFilter(result));
124+
return new TokenStreamComponents(source, new StopFilter(result, stopwords));
144125
}
145126

146127
@Override

lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase {
3232
@Override
3333
public void setUp() throws Exception {
3434
super.setUp();
35-
a = new PersianAnalyzer();
35+
a =
36+
new Analyzer() {
37+
@Override
38+
protected TokenStreamComponents createComponents(String fieldName) {
39+
final Tokenizer source = new MockTokenizer();
40+
return new TokenStreamComponents(source, new PersianStemFilter(source));
41+
}
42+
};
3643
}
3744

3845
@Override

0 commit comments

Comments
 (0)