Skip to content

Commit 6c2f146

Browse files
OAK-11536: elastic synonym filter has to be lenient to incorrect rules (#2125)
* OAK-11536: elastic synonym filter has to be lenient to incorrect rules * OAK-11536: improve code reuse in ElasticIndexWriter * OAK-11536: fix import error after merge
1 parent 48b26b5 commit 6c2f146

File tree

3 files changed

+16
-17
lines changed

3 files changed

+16
-17
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticCustomAnalyzerMappings.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,14 @@ protected interface ParameterTransformer {
187187
reKey.apply(luceneParams, Map.of("mapping", "mappings"))
188188
);
189189

190-
LUCENE_ELASTIC_TRANSFORMERS.put(SynonymFilterFactory.class, luceneParams ->
191-
reKey.apply(luceneParams, Map.of("tokenizerFactory", "tokenizer"))
192-
);
190+
LUCENE_ELASTIC_TRANSFORMERS.put(SynonymFilterFactory.class, luceneParams -> {
191+
// lucene does not support this option (see UNSUPPORTED_LUCENE_PARAMETERS) and it's lenient by default
192+
// elastic is not lenient by default, so we need to set it to true in case it's not present
193+
if (!luceneParams.containsKey("lenient")) {
194+
luceneParams.put("lenient", "true");
195+
}
196+
return reKey.apply(luceneParams, Map.of("tokenizerFactory", "tokenizer"));
197+
});
193198

194199
LUCENE_ELASTIC_TRANSFORMERS.put(KeywordMarkerFilterFactory.class, luceneParams ->
195200
reKey.apply(luceneParams, Map.of("protected", "keywords"))

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexWriter.java

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*/
1717
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
1818

19-
import co.elastic.clients.elasticsearch._types.AcknowledgedResponseBase;
19+
import co.elastic.clients.elasticsearch._types.AcknowledgedResponse;
2020
import co.elastic.clients.elasticsearch._types.ElasticsearchException;
2121
import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
2222
import co.elastic.clients.elasticsearch.indices.CreateIndexResponse;
@@ -186,11 +186,7 @@ private void provisionIndex() throws IOException {
186186
}
187187

188188
final CreateIndexRequest request = ElasticIndexHelper.createIndexRequest(indexName, indexDefinition);
189-
if (LOG.isDebugEnabled()) {
190-
StringBuilder sb = new StringBuilder();
191-
JsonpUtils.toString(request, sb);
192-
LOG.debug("Creating Index with request {}", sb);
193-
}
189+
LOG.debug("Creating Index with request {}", request);
194190
// create the new index
195191
try {
196192
final CreateIndexResponse response = esClient.create(request);
@@ -268,13 +264,7 @@ private void enableIndex() throws IOException {
268264
deleteOldIndices(client, aliasResponse.result().keySet());
269265
}
270266

271-
private void checkResponseAcknowledgement(AcknowledgedResponseBase response, String exceptionMessage) {
272-
if (!response.acknowledged()) {
273-
throw new IllegalStateException(exceptionMessage);
274-
}
275-
}
276-
277-
private void checkResponseAcknowledgement(CreateIndexResponse response, String exceptionMessage) {
267+
private void checkResponseAcknowledgement(AcknowledgedResponse response, String exceptionMessage) {
278268
if (!response.acknowledged()) {
279269
throw new IllegalStateException(exceptionMessage);
280270
}

oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextAnalyzerCommonTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,11 @@ public void synonyms() throws Exception {
10051005
Tree synFilter = addFilter(filters, "Synonym");
10061006
synFilter.setProperty("synonyms", "syn.txt");
10071007
synFilter.addChild("syn.txt").addChild(JCR_CONTENT)
1008-
.setProperty(JCR_DATA, "plane, airplane, aircraft\nflies=>scars");
1008+
.setProperty(JCR_DATA, "plane, airplane, aircraft\n" +
1009+
"flies=>scars\n" +
1010+
// this rule is incorrect: "term: + was completely eliminated by analyzer"
1011+
// by default, the configuration has to be lenient and not fail on such cases
1012+
"plus,+,addition");
10091013
});
10101014

10111015
Tree content = root.getTree("/").addChild("content");

0 commit comments

Comments
 (0)