Skip to content

Commit 0096dcd

Browse files
committed
OAK-11568 Elastic: improved compatibility for aggregation definitions
1 parent 18412a8 commit 0096dcd

File tree

2 files changed

+46
-1
lines changed

2 files changed

+46
-1
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import co.elastic.clients.elasticsearch._types.analysis.Analyzer;
2020
import co.elastic.clients.elasticsearch._types.analysis.CharFilterDefinition;
2121
import co.elastic.clients.elasticsearch._types.analysis.CustomAnalyzer;
22+
import co.elastic.clients.elasticsearch._types.analysis.NGramTokenizer;
2223
import co.elastic.clients.elasticsearch._types.analysis.TokenFilterDefinition;
2324
import co.elastic.clients.elasticsearch._types.analysis.TokenizerDefinition;
2425
import co.elastic.clients.elasticsearch.indices.IndexSettingsAnalysis;
@@ -172,10 +173,28 @@ private static TokenizerDefinition loadTokenizer(NodeState state) {
172173
}
173174
}
174175
name = normalize(name);
176+
if ("n_gram".equals(name)) {
177+
// OAK-11568
178+
// https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-ngram-tokenizer.html
179+
Integer minGramSize = getIntegerSetting(args, "minGramSize", 2);
180+
Integer maxGramSize = getIntegerSetting(args, "maxGramSize", 3);
181+
TokenizerDefinition ngram = TokenizerDefinition.of(t -> t.ngram(
182+
NGramTokenizer.of(n -> n.minGram(minGramSize).maxGram(maxGramSize))));
183+
return ngram;
184+
}
175185
args.put(ANALYZER_TYPE, name);
176186
return new TokenizerDefinition(name, JsonData.of(args));
177187
}
178188

189+
private static Integer getIntegerSetting(Map<String, Object> args, String name, Integer defaultValue) {
190+
Object value = args.getOrDefault(name, defaultValue);
191+
if (!(value instanceof Integer)) {
192+
LOG.warn("Setting {} value {} is not an integer; using default: {}", name, value, defaultValue);
193+
return defaultValue;
194+
}
195+
return (Integer) value;
196+
}
197+
179198
private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
180199
Function<String, Class<? extends AbstractAnalysisFactory>> lookup,
181200
BiFunction<String, JsonData, FD> factory) {

oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,7 +1092,7 @@ public void analyzerWithEmptyCharFilterMapping() throws Exception {
10921092

10931093
defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER)
10941094
.setProperty(FulltextIndexConstants.ANL_NAME, "Standard");
1095-
Tree filters = analyzers.addChild(FulltextIndexConstants.ANL_FILTERS);
1095+
Tree filters = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_FILTERS);
10961096
filters.setOrderableChildren(true);
10971097
filters.addChild("LowerCase");
10981098
});
@@ -1106,6 +1106,32 @@ public void analyzerWithEmptyCharFilterMapping() throws Exception {
11061106
});
11071107
}
11081108

1109+
// OAK-11568
1110+
@Test
1111+
public void analyzerWithNGramTokenizer() throws Exception {
1112+
setup(List.of("foo"), idx -> {
1113+
Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS);
1114+
Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
1115+
Tree tokenizer = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER);
1116+
tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "NGram");
1117+
tokenizer.setProperty("maxGramSize", 2);
1118+
tokenizer.setProperty("minGramSize", 3);
1119+
});
1120+
1121+
Tree content = root.getTree("/").addChild("content");
1122+
content.addChild("bar").setProperty("foo", "foob bart");
1123+
root.commit();
1124+
1125+
assertEventually(() -> {
1126+
assertQuery("select * from [nt:base] where contains(*, 'fo')", List.of("/content/bar"));
1127+
assertQuery("select * from [nt:base] where contains(*, 'foo')", List.of("/content/bar"));
1128+
assertQuery("select * from [nt:base] where contains(*, 'oob')", List.of("/content/bar"));
1129+
assertQuery("select * from [nt:base] where contains(*, 'ba')", List.of("/content/bar"));
1130+
assertQuery("select * from [nt:base] where contains(*, 'bar')", List.of("/content/bar"));
1131+
assertQuery("select * from [nt:base] where contains(*, 'art')", List.of("/content/bar"));
1132+
});
1133+
}
1134+
11091135
protected Tree addFilter(Tree analyzer, String filterName) {
11101136
Tree filter = analyzer.addChild(filterName);
11111137
// mimics nodes api

0 commit comments

Comments
 (0)