Skip to content

Commit 1055323

Browse files
committed
OAK-11568 Elastic: improved compatibility for aggregation definitions
1 parent 0096dcd commit 1055323

File tree

2 files changed

+72
-3
lines changed

2 files changed

+72
-3
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
202202
int i = 0;
203203
//Need to read children in order
204204
Tree tree = TreeFactory.createReadOnlyTree(state);
205+
String wordDelimiterFilterKey = null;
205206
for (Tree t : tree.getChildren()) {
206207
NodeState child = state.getChildNode(t.getName());
207208

@@ -276,7 +277,20 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
276277
if (skipEntry) {
277278
continue;
278279
}
279-
filters.put(name + "_" + i, factory.apply(name, JsonData.of(args)));
280+
String key = name + "_" + i;
281+
filters.put(key, factory.apply(name, JsonData.of(args)));
282+
if (name.equals("word_delimiter")) {
283+
wordDelimiterFilterKey = key;
284+
} else if (name.equals("synonym")) {
285+
if (wordDelimiterFilterKey != null) {
286+
// re-order the synonyms filter _before_ the word delimiter, to avoid
287+
// "Token filter [word_delimiter_1] cannot be used to parse synonyms"
288+
i++;
289+
String newKey = key = "word_delimiter_" + i;
290+
filters.put(newKey, filters.remove(wordDelimiterFilterKey));
291+
wordDelimiterFilterKey = newKey;
292+
}
293+
}
280294
i++;
281295
}
282296
return filters;

oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,8 @@ public void analyzerWithNGramTokenizer() throws Exception {
11141114
Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
11151115
Tree tokenizer = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER);
11161116
tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "NGram");
1117-
tokenizer.setProperty("maxGramSize", 2);
1118-
tokenizer.setProperty("minGramSize", 3);
1117+
tokenizer.setProperty("minGramSize", 2);
1118+
tokenizer.setProperty("maxGramSize", 3);
11191119
});
11201120

11211121
Tree content = root.getTree("/").addChild("content");
@@ -1129,6 +1129,61 @@ public void analyzerWithNGramTokenizer() throws Exception {
11291129
assertQuery("select * from [nt:base] where contains(*, 'ba')", List.of("/content/bar"));
11301130
assertQuery("select * from [nt:base] where contains(*, 'bar')", List.of("/content/bar"));
11311131
assertQuery("select * from [nt:base] where contains(*, 'art')", List.of("/content/bar"));
1132+
// not found with Elasticsearch, but found with Lucene
1133+
// assertQuery("select * from [nt:base] where contains(*, 'foo bar')", List.of("/content/bar"));
1134+
});
1135+
}
1136+
1137+
// OAK-11568
1138+
@Test
1139+
public void analyzerWithPatternTokenizer() throws Exception {
1140+
setup(List.of("foo"), idx -> {
1141+
Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS);
1142+
Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
1143+
Tree tokenizer = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER);
1144+
tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "pattern");
1145+
tokenizer.setProperty("pattern", "[^\\p{L}\\d-_]");
1146+
});
1147+
1148+
Tree content = root.getTree("/").addChild("content");
1149+
content.addChild("bar").setProperty("foo", "foo bar");
1150+
root.commit();
1151+
1152+
assertEventually(() -> {
1153+
assertQuery("select * from [nt:base] where contains(*, 'foo')", List.of("/content/bar"));
1154+
});
1155+
}
1156+
1157+
// OAK-11568
1158+
@Test
1159+
public void analyzerWithWordDelimiterAndSynonyms() throws Exception {
1160+
setup(List.of("foo"), idx -> {
1161+
Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS);
1162+
Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
1163+
Tree tokenizer = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER);
1164+
tokenizer.setProperty(FulltextIndexConstants.ANL_NAME, "Standard");
1165+
Tree filters = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_FILTERS);
1166+
filters.setOrderableChildren(true);
1167+
filters.addChild("LowerCase");
1168+
// internally, this is re-ordered _after_ the synonyms filter
1169+
filters.addChild("WordDelimiter");
1170+
Tree synonym = filters.addChild("Synonym");
1171+
synonym.setProperty("format", "solr");
1172+
synonym.setProperty("ignoreCase", true);
1173+
synonym.setProperty("synonyms", "synonyms.txt");
1174+
Tree synonymTxt = synonym.addChild("synonyms.txt");
1175+
Tree content = synonymTxt.addChild("jcr:content");
1176+
content.setProperty("jcr:data", "find => replace\n");
1177+
content.setProperty("jcr:mimeType", "text/plain");
1178+
filters.addChild("PorterStem");
1179+
});
1180+
1181+
Tree content = root.getTree("/").addChild("content");
1182+
content.addChild("bar").setProperty("foo", "replace");
1183+
root.commit();
1184+
1185+
assertEventually(() -> {
1186+
assertQuery("select * from [nt:base] where contains(*, 'find')", List.of("/content/bar"));
11321187
});
11331188
}
11341189

0 commit comments

Comments
 (0)