Skip to content

Commit 9caeb93

Browse files
authored
Fix normalization in TeluguAnalyzer (apache#13059)
DecimalDigitFilter and IndicNormalizationFilter were mistakenly omitted.
1 parent d4c0eaf commit 9caeb93

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

lucene/CHANGES.txt

+2
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ Bug Fixes
250250

251251
* GITHUB#13031: ScorerSupplier created by QueryProfilerWeight will propagate topLevelScoringClause to the sub ScorerSupplier. (Shintaro Murakami)
252252

253+
* GITHUB#13059: Fixed missing IndicNormalization and DecimalDigit filters in TeluguAnalyzer normalization (Dmitry Cherniachenko)
254+
253255
Build
254256
---------------------
255257

lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ protected TokenStreamComponents createComponents(String fieldName) {
127127
@Override
128128
protected TokenStream normalize(String fieldName, TokenStream in) {
129129
TokenStream result = new DecimalDigitFilter(in);
130-
result = new IndicNormalizationFilter(in);
131-
result = new TeluguNormalizationFilter(in);
130+
result = new IndicNormalizationFilter(result);
131+
result = new TeluguNormalizationFilter(result);
132132
return result;
133133
}
134134
}

lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java

+12
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.apache.lucene.analysis.Analyzer;
2020
import org.apache.lucene.analysis.CharArraySet;
2121
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
22+
import org.apache.lucene.util.BytesRef;
2223

2324
public class TestTeluguAnalyzer extends BaseTokenStreamTestCase {
2425

@@ -48,6 +49,17 @@ public void testDigits() throws Exception {
4849
a.close();
4950
}
5051

52+
public void testNormalization() {
53+
TeluguAnalyzer a = new TeluguAnalyzer();
54+
// DecimalDigitsFilter
55+
assertEquals(new BytesRef("1234"), a.normalize("dummy", "౧౨౩౪"));
56+
// IndicNormalizationFilter
57+
assertEquals(new BytesRef("ऑऑ"), a.normalize("dummy", "अाॅअाॅ"));
58+
// TeluguNormalizationFilter
59+
assertEquals(new BytesRef("ఓనమాల"), a.normalize("dummy", "ఒౕనమాల"));
60+
a.close();
61+
}
62+
5163
/** Send some random strings to the analyzer */
5264
public void testRandomStrings() throws Exception {
5365
TeluguAnalyzer analyzer = new TeluguAnalyzer();

0 commit comments

Comments
 (0)