Skip to content

Commit a33c43d

Browse files
OAK-11775: improve is null queries for elasticsearch indexes (#2346)
* OAK-11775 index nullProperties * OAK-11775 fix query to use :nullCheck field, handle backward compatibility * OAK-11775 improve log * OAK-11775 improve log * OAK-11775 improve backward compatibility logic * OAK-11775 log at error level if backward compatibility is not supported anymore * Update oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java fix typo Co-authored-by: Thomas Mueller <thomas.tom.mueller@gmail.com> * OAK-11775 ElasticRequestHandler improvements --------- Co-authored-by: Thomas Mueller <thomas.tom.mueller@gmail.com>
1 parent 6d8d3bb commit a33c43d

File tree

8 files changed

+53
-18
lines changed

8 files changed

+53
-18
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,17 @@ public class ElasticIndexDefinition extends IndexDefinition {
182182
private final List<PropertyDefinition> similarityProperties;
183183
private final List<PropertyDefinition> similarityTagsProperties;
184184
private final String[] similarityTagsFields;
185+
private final ElasticSemVer mappingVersion;
185186

186187
public ElasticIndexDefinition(NodeState root, NodeState defn, String indexPath, String indexPrefix) {
187188
super(root, defn, determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
188189
this.indexPrefix = indexPrefix;
190+
this.mappingVersion = ElasticSemVer.fromString(getOptionalValue(definition, PROP_INDEX_MAPPING_VERSION, "1.0.0"));
189191
// the alias contains the internal mapping major version. In case of a breaking change, an index with the new version can
190192
// be created without affecting the existing queries of an instance running with the old version.
191193
// This strategy has been introduced from version 1. For compatibility reasons, the alias is not changed for the first version.
192-
if (MAPPING_VERSION.getMajor() > 1) {
193-
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath() + "_v" + MAPPING_VERSION.getMajor());
194+
if (this.mappingVersion.getMajor() > 1) {
195+
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath() + "_v" + mappingVersion.getMajor());
194196
} else {
195197
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath());
196198
}
@@ -367,8 +369,8 @@ public boolean analyzerConfigSplitOnNumerics() {
367369
* Returns the mapping version for this index definition.
368370
* If the version is not specified, the default value is {@code 1.0.0}.
369371
*/
370-
public String getMappingVersion() {
371-
return getOptionalValue(definition, PROP_INDEX_MAPPING_VERSION, "1.0.0");
372+
public ElasticSemVer getMappingVersion() {
373+
return mappingVersion;
372374
}
373375

374376
@Override

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSemVer.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import java.util.Objects;
2020

21-
public class ElasticSemVer {
21+
public class ElasticSemVer implements Comparable<ElasticSemVer> {
2222
private final int major;
2323
private final int minor;
2424
private final int patch;
@@ -62,6 +62,17 @@ public int getPatch() {
6262
return patch;
6363
}
6464

65+
@Override
66+
public int compareTo(ElasticSemVer other) {
67+
if (this.major != other.major) {
68+
return Integer.compare(this.major, other.major);
69+
}
70+
if (this.minor != other.minor) {
71+
return Integer.compare(this.minor, other.minor);
72+
}
73+
return Integer.compare(this.patch, other.patch);
74+
}
75+
6576
@Override
6677
public String toString() {
6778
return major + "." + minor + "." + patch;

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ public class ElasticDocument {
5252
public final Set<Map<String, String>> suggest;
5353
@JsonProperty(FieldNames.SPELLCHECK)
5454
public final Set<String> spellcheck;
55+
@JsonProperty(FieldNames.NULL_PROPS)
56+
public final Set<String> nullProperties;
5557
@JsonProperty(ElasticIndexDefinition.DYNAMIC_BOOST_FULLTEXT)
5658
public final Set<String> dbFullText;
5759
@JsonProperty(ElasticIndexDefinition.SIMILARITY_TAGS)
@@ -79,6 +81,7 @@ public class ElasticDocument {
7981
this.fulltext = new LinkedHashSet<>();
8082
this.suggest = new LinkedHashSet<>();
8183
this.spellcheck = new LinkedHashSet<>();
84+
this.nullProperties = new LinkedHashSet<>();
8285
this.properties = new HashMap<>();
8386
this.dynamicProperties = new ArrayList<>();
8487
this.dbFullText = new LinkedHashSet<>();
@@ -124,6 +127,10 @@ void addSpellcheck(String value) {
124127
spellcheck.add(value);
125128
}
126129

130+
void addNullProperty(String fieldName) {
131+
nullProperties.add(fieldName);
132+
}
133+
127134
// ES for String values (that are not interpreted as date or numbers etc.) would analyze in the same
128135
// field and would index a sub-field "keyword" for non-analyzed value.
129136
// ref: https://www.elastic.co/blog/strings-are-dead-long-live-strings

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,15 @@ protected void indexAncestors(ElasticDocument doc, String path) { /* empty */ }
229229

230230
@Override
231231
protected void indexNotNullProperty(ElasticDocument doc, PropertyDefinition pd) {
232-
// Elastic support exist queries for specific fields
232+
// Elastic efficiently supports exist queries thanks to the special _field_names metadata field
233+
// to determine which fields are present in each document
233234
}
234235

235236
@Override
236237
protected void indexNullProperty(ElasticDocument doc, PropertyDefinition pd) {
237-
// Elastic support not exist queries for specific fields
238+
// Elasticsearch performs poorly with must_not + exists because it needs to identify documents that do not
239+
// have a field — and it has no inverted index for "non-existence"
240+
doc.addNullProperty(pd.name);
238241
}
239242

240243
@Override

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,8 @@ private static void mapInternalProperties(@NotNull TypeMapping.Builder builder)
199199
)
200200
)
201201
)
202-
.properties(ElasticIndexDefinition.LAST_UPDATED, b -> b.date(d -> d));
203-
// TODO: the mapping below is for features currently not supported. These need to be reviewed
204-
// mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
205-
// .field("type", "keyword")
206-
// .endObject();
207-
// mappingBuilder.startObject(FieldNames.NULL_PROPS)
208-
// .field("type", "keyword")
209-
// .endObject();
202+
.properties(ElasticIndexDefinition.LAST_UPDATED, b -> b.date(d -> d))
203+
.properties(FieldNames.NULL_PROPS, p -> p.keyword(k -> k.docValues(false)));
210204
}
211205

212206
private static void mapInferenceDefinition(@NotNull TypeMapping.Builder builder, @NotNull ElasticIndexDefinition.InferenceDefinition inferenceDefinition) {

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
4646
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
4747
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
48+
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticSemVer;
4849
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.facets.ElasticFacetProvider;
4950
import org.apache.jackrabbit.oak.plugins.index.elastic.query.inference.InferenceConfig;
5051
import org.apache.jackrabbit.oak.plugins.index.elastic.query.inference.InferenceConstants;
@@ -143,6 +144,10 @@ public class ElasticRequestHandler {
143144
private final String propertyRestrictionQuery;
144145
private final NodeState rootState;
145146

147+
// Min/max version of ElasticSearch that supports null checks
148+
private static final ElasticSemVer MINIMUM_NULL_CHECK_VERSION = new ElasticSemVer(1, 4, 0);
149+
private static final ElasticSemVer MAXIMUM_NULL_CHECK_VERSION = new ElasticSemVer(1, 5, 0);
150+
146151
ElasticRequestHandler(@NotNull IndexPlan indexPlan, @NotNull FulltextIndexPlanner.PlanResult planResult,
147152
NodeState rootState) {
148153
this.indexPlan = indexPlan;
@@ -1123,7 +1128,20 @@ private Query createQuery(String propertyName, Filter.PropertyRestriction pr, Pr
11231128
final String field = elasticIndexDefinition.getElasticKeyword(propertyName);
11241129

11251130
if (pr.isNullRestriction()) {
1126-
return Query.of(q -> q.bool(b -> b.mustNot(m -> m.exists(e -> e.field(field)))));
1131+
// nullProps check has been added in 1.4.0. Use the old strategy when version is lower
1132+
if (elasticIndexDefinition.getMappingVersion().compareTo(MINIMUM_NULL_CHECK_VERSION) < 0) {
1133+
// check if the default mapping is >= 1.5.0
1134+
if (ElasticIndexDefinition.MAPPING_VERSION != null &&
1135+
ElasticIndexDefinition.MAPPING_VERSION.compareTo(MAXIMUM_NULL_CHECK_VERSION) >= 0) {
1136+
LOG.error("Backward compatibility for null check is not supported anymore. Query results may be incorrect. " +
1137+
"Please reindex to update the internal mapping version.");
1138+
} else {
1139+
LOG.warn("Using deprecated null check strategy for field: {}. Please reindex to update the internal mapping version. " +
1140+
"It will be removed with default index mapping version 1.5.0.", field);
1141+
return Query.of(q -> q.bool(b -> b.mustNot(mn -> mn.exists(e -> e.field(field)))));
1142+
}
1143+
}
1144+
return Query.of(q -> q.term(t -> t.field(FieldNames.NULL_PROPS).value(field)));
11271145
}
11281146
if (pr.isNotNullRestriction()) {
11291147
return Query.of(q -> q.exists(e -> e.field(field)));

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public void indexStoresMappingVersion() throws Exception {
3939
Tree index = setIndex(UUID.randomUUID().toString(), builder);
4040
root.commit();
4141

42-
assertEventually(() -> assertEquals(ElasticIndexDefinition.MAPPING_VERSION.toString(),
42+
assertEventually(() -> assertEquals(ElasticIndexDefinition.MAPPING_VERSION,
4343
getElasticIndexDefinition(index).getMappingVersion()));
4444
}
4545

oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,6 @@ public static IndexingMode from(String indexingMode) {
443443
* needed from an outside process that does not have visibility to the specific index module.
444444
*/
445445
Map<String, String> INDEX_VERSION_BY_TYPE = Map.of(
446-
"elasticsearch", "1.3.1"
446+
"elasticsearch", "1.4.0"
447447
);
448448
}

0 commit comments

Comments
 (0)