Skip to content

Commit 18412a8

Browse files
committed
OAK-11568 Elastic: improved compatibility for aggregation definitions
1 parent 9635838 commit 18412a8

File tree

3 files changed

+72
-1
lines changed

3 files changed

+72
-1
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzer.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.lucene.analysis.AbstractAnalysisFactory;
4141
import org.apache.lucene.analysis.CharFilterFactory;
4242
import org.apache.lucene.analysis.TokenFilterFactory;
43+
import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
4344
import org.apache.lucene.analysis.en.AbstractWordsFileFilterFactory;
4445
import org.apache.lucene.util.ResourceLoader;
4546
import org.jetbrains.annotations.NotNull;
@@ -188,6 +189,7 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
188189
String name;
189190
List<String> content = null;
190191
List<ParameterTransformer> transformers;
192+
boolean skipEntry = false;
191193
try {
192194
Class<? extends AbstractAnalysisFactory> tff = lookup.apply(t.getName());
193195

@@ -208,6 +210,13 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
208210
wordsFF.inform(new NodeStateResourceLoader(child));
209211
content = wordsFF.getWords().stream().map(w -> new String(((char[]) w))).collect(Collectors.toList());
210212
}
213+
if (luceneFactory instanceof MappingCharFilterFactory) {
214+
MappingCharFilterFactory map = (MappingCharFilterFactory) luceneFactory;
215+
if (map.getOriginalArgs().isEmpty()) {
216+
skipEntry = true;
217+
LOG.warn("Empty CharFilter mapping: ignoring");
218+
}
219+
}
211220

212221
name = normalize((String) tff.getField("NAME").get(null));
213222
transformers = LUCENE_ELASTIC_TRANSFORMERS.entrySet().stream()
@@ -245,6 +254,9 @@ private static <FD> LinkedHashMap<String, FD> loadFilters(NodeState state,
245254
}
246255
args.put(ANALYZER_TYPE, name);
247256

257+
if (skipEntry) {
258+
continue;
259+
}
248260
filters.put(name + "_" + i, factory.apply(name, JsonData.of(args)));
249261
i++;
250262
}

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public void analyzerWithEmptyTokenizer() {
105105
Tree defaultAnalyzer = analyzer.addChild("default");
106106
defaultAnalyzer.setProperty(FulltextIndexConstants.ANL_CLASS, "org.apache.lucene.analysis.en.EnglishAnalyzer");
107107
defaultAnalyzer.addChild("tokenizer");
108-
defaultAnalyzer.addChild("filter");
108+
defaultAnalyzer.addChild("filters");
109109

110110
NodeState nodeState = builder.build();
111111
ElasticIndexDefinition definition =
@@ -128,6 +128,34 @@ public void analyzerWithEmptyDefault() {
128128
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
129129
}
130130

131+
@Test
132+
public void analyzerWithWordDelimiter() {
133+
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
134+
IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("idxRule");
135+
indexRule.property("foo").type("String").useInSimilarity();
136+
137+
Tree analyzer = builder.getBuilderTree().addChild("analyzers");
138+
Tree defaultAnalyzer = analyzer.addChild("default");
139+
Tree tokenizer = defaultAnalyzer.addChild("tokenizer");
140+
tokenizer.setProperty("name", "Standard");
141+
Tree filters = defaultAnalyzer.addChild("filters");
142+
filters.addChild("LowerCase");
143+
filters.addChild("WordDelimiter");
144+
Tree synonym = filters.addChild("Synonym");
145+
synonym.setProperty("format", "solr");
146+
synonym.setProperty("ignoreCase", true);
147+
synonym.setProperty("synonyms", "synonyms.txt");
148+
Tree synonymsText = filters.addChild("synonyms.txt");
149+
Tree synonymsContent = synonymsText.addChild("jcr:content");
150+
synonymsContent.setProperty("jcr:data", "test");
151+
filters.addChild("PorterStem");
152+
153+
NodeState nodeState = builder.build();
154+
ElasticIndexDefinition definition =
155+
new ElasticIndexDefinition(nodeState, nodeState, "path", "prefix");
156+
ElasticIndexHelper.createIndexRequest("prefix.path", definition);
157+
}
158+
131159
@Test()
132160
public void indexSettingsAreCorrectlySet() {
133161
IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();

oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,37 @@ public void wildcardQueryToLookupUnanalyzedText() throws Exception {
10751075
});
10761076
}
10771077

1078+
// OAK-11568
1079+
@Test
1080+
public void analyzerWithEmptyCharFilterMapping() throws Exception {
1081+
setup(List.of("foo"), idx -> {
1082+
Tree analyzers = idx.addChild(FulltextIndexConstants.ANALYZERS);
1083+
Tree defaultAnalyzers = analyzers.addChild(FulltextIndexConstants.ANL_DEFAULT);
1084+
Tree charFilters = defaultAnalyzers.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS);
1085+
charFilters.addChild("HTMLStrip");
1086+
1087+
// having the mappings, but not having any content, resulted in:
1088+
// co.elastic.clients.elasticsearch._types.ElasticsearchException:
1089+
// [es/indices.create] failed: [illegal_argument_exception]
1090+
// mapping requires either `mappings` or `mappings_path` to be configured
1091+
charFilters.addChild("Mapping");
1092+
1093+
defaultAnalyzers.addChild(FulltextIndexConstants.ANL_TOKENIZER)
1094+
.setProperty(FulltextIndexConstants.ANL_NAME, "Standard");
1095+
Tree filters = analyzers.addChild(FulltextIndexConstants.ANL_FILTERS);
1096+
filters.setOrderableChildren(true);
1097+
filters.addChild("LowerCase");
1098+
});
1099+
1100+
Tree content = root.getTree("/").addChild("content");
1101+
content.addChild("bar").setProperty("foo", "foo bar");
1102+
root.commit();
1103+
1104+
assertEventually(() -> {
1105+
assertQuery("select * from [nt:base] where CONTAINS(*, 'foo')", List.of("/content/bar"));
1106+
});
1107+
}
1108+
10781109
protected Tree addFilter(Tree analyzer, String filterName) {
10791110
Tree filter = analyzer.addChild(filterName);
10801111
// mimics nodes api

0 commit comments

Comments
 (0)