Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -182,15 +182,17 @@ public class ElasticIndexDefinition extends IndexDefinition {
private final List<PropertyDefinition> similarityProperties;
private final List<PropertyDefinition> similarityTagsProperties;
private final String[] similarityTagsFields;
private final ElasticSemVer mappingVersion;

public ElasticIndexDefinition(NodeState root, NodeState defn, String indexPath, String indexPrefix) {
super(root, defn, determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
this.indexPrefix = indexPrefix;
this.mappingVersion = ElasticSemVer.fromString(getOptionalValue(definition, PROP_INDEX_MAPPING_VERSION, "1.0.0"));
// the alias contains the internal mapping major version. In case of a breaking change, an index with the new version can
// be created without affecting the existing queries of an instance running with the old version.
// This strategy has been introduced from version 1. For compatibility reasons, the alias is not changed for the first version.
if (MAPPING_VERSION.getMajor() > 1) {
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath() + "_v" + MAPPING_VERSION.getMajor());
if (this.mappingVersion.getMajor() > 1) {
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath() + "_v" + mappingVersion.getMajor());
} else {
this.indexAlias = ElasticIndexNameHelper.getElasticSafeIndexName(indexPrefix, getIndexPath());
}
Expand Down Expand Up @@ -367,8 +369,8 @@ public boolean analyzerConfigSplitOnNumerics() {
* Returns the mapping version for this index definition.
* If the version is not specified, the default value is {@code 1.0.0}.
*/
public String getMappingVersion() {
return getOptionalValue(definition, PROP_INDEX_MAPPING_VERSION, "1.0.0");
public ElasticSemVer getMappingVersion() {
return mappingVersion;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import java.util.Objects;

public class ElasticSemVer {
public class ElasticSemVer implements Comparable<ElasticSemVer> {
private final int major;
private final int minor;
private final int patch;
Expand Down Expand Up @@ -62,6 +62,17 @@ public int getPatch() {
return patch;
}

@Override
public int compareTo(ElasticSemVer other) {
if (this.major != other.major) {
return Integer.compare(this.major, other.major);
}
if (this.minor != other.minor) {
return Integer.compare(this.minor, other.minor);
}
return Integer.compare(this.patch, other.patch);
}

@Override
public String toString() {
return major + "." + minor + "." + patch;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ public class ElasticDocument {
public final Set<Map<String, String>> suggest;
@JsonProperty(FieldNames.SPELLCHECK)
public final Set<String> spellcheck;
@JsonProperty(FieldNames.NULL_PROPS)
public final Set<String> nullProperties;
@JsonProperty(ElasticIndexDefinition.DYNAMIC_BOOST_FULLTEXT)
public final Set<String> dbFullText;
@JsonProperty(ElasticIndexDefinition.SIMILARITY_TAGS)
Expand Down Expand Up @@ -79,6 +81,7 @@ public class ElasticDocument {
this.fulltext = new LinkedHashSet<>();
this.suggest = new LinkedHashSet<>();
this.spellcheck = new LinkedHashSet<>();
this.nullProperties = new LinkedHashSet<>();
this.properties = new HashMap<>();
this.dynamicProperties = new ArrayList<>();
this.dbFullText = new LinkedHashSet<>();
Expand Down Expand Up @@ -124,6 +127,10 @@ void addSpellcheck(String value) {
spellcheck.add(value);
}

void addNullProperty(String fieldName) {
nullProperties.add(fieldName);
}

// ES for String values (that are not interpreted as date or numbers etc.) would analyze in the same
// field and would index a sub-field "keyword" for non-analyzed value.
// ref: https://www.elastic.co/blog/strings-are-dead-long-live-strings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,15 @@ protected void indexAncestors(ElasticDocument doc, String path) { /* empty */ }

@Override
protected void indexNotNullProperty(ElasticDocument doc, PropertyDefinition pd) {
// Elastic support exist queries for specific fields
// Elastic efficiently supports exist queries thanks to the special _field_names metadata field
// to determine which fields are present in each document
}

@Override
protected void indexNullProperty(ElasticDocument doc, PropertyDefinition pd) {
// Elastic support not exist queries for specific fields
// Elasticsearch performs poorly with must_not + exists because it needs to identify documents that do not
// have a field — and it has no inverted index for "non-existence"
doc.addNullProperty(pd.name);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,8 @@ private static void mapInternalProperties(@NotNull TypeMapping.Builder builder)
)
)
)
.properties(ElasticIndexDefinition.LAST_UPDATED, b -> b.date(d -> d));
// TODO: the mapping below is for features currently not supported. These need to be reviewed
// mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
// .field("type", "keyword")
// .endObject();
// mappingBuilder.startObject(FieldNames.NULL_PROPS)
// .field("type", "keyword")
// .endObject();
.properties(ElasticIndexDefinition.LAST_UPDATED, b -> b.date(d -> d))
.properties(FieldNames.NULL_PROPS, p -> p.keyword(k -> k.docValues(false)));
}

private static void mapInferenceDefinition(@NotNull TypeMapping.Builder builder, @NotNull ElasticIndexDefinition.InferenceDefinition inferenceDefinition) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticSemVer;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.facets.ElasticFacetProvider;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.inference.InferenceConfig;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.inference.InferenceConstants;
Expand Down Expand Up @@ -143,6 +144,10 @@ public class ElasticRequestHandler {
private final String propertyRestrictionQuery;
private final NodeState rootState;

// Min/max version of ElasticSearch that supports null checks
private static final ElasticSemVer MINIMUM_NULL_CHECK_VERSION = new ElasticSemVer(1, 4, 0);
private static final ElasticSemVer MAXIMUM_NULL_CHECK_VERSION = new ElasticSemVer(1, 5, 0);

ElasticRequestHandler(@NotNull IndexPlan indexPlan, @NotNull FulltextIndexPlanner.PlanResult planResult,
NodeState rootState) {
this.indexPlan = indexPlan;
Expand Down Expand Up @@ -1123,7 +1128,20 @@ private Query createQuery(String propertyName, Filter.PropertyRestriction pr, Pr
final String field = elasticIndexDefinition.getElasticKeyword(propertyName);

if (pr.isNullRestriction()) {
return Query.of(q -> q.bool(b -> b.mustNot(m -> m.exists(e -> e.field(field)))));
// nullProps check has been added in 1.4.0. Use the old strategy when version is lower
if (elasticIndexDefinition.getMappingVersion().compareTo(MINIMUM_NULL_CHECK_VERSION) < 0) {
// check if the default mapping is >= 1.5.0
if (ElasticIndexDefinition.MAPPING_VERSION != null &&
ElasticIndexDefinition.MAPPING_VERSION.compareTo(MAXIMUM_NULL_CHECK_VERSION) >= 0) {
LOG.error("Backward compatibility for null check is not supported anymore. Query results may be incorrect. " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should decrease logging these on each query execution., may be once every 100/1000 queries.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about that but I decided to not add this complexity for the following reasons:

  • this message is not printed for every query, only the once using is null (which shouldn't be that common)
  • we have a way to get rid of these logs: reindexing is required and it will improve both response time and resource usage

"Please reindex to update the internal mapping version.");
} else {
LOG.warn("Using deprecated null check strategy for field: {}. Please reindex to update the internal mapping version. " +
"It will be removed with default index mapping version 1.5.0.", field);
return Query.of(q -> q.bool(b -> b.mustNot(mn -> mn.exists(e -> e.field(field)))));
}
}
return Query.of(q -> q.term(t -> t.field(FieldNames.NULL_PROPS).value(field)));
}
if (pr.isNotNullRestriction()) {
return Query.of(q -> q.exists(e -> e.field(field)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void indexStoresMappingVersion() throws Exception {
Tree index = setIndex(UUID.randomUUID().toString(), builder);
root.commit();

assertEventually(() -> assertEquals(ElasticIndexDefinition.MAPPING_VERSION.toString(),
assertEventually(() -> assertEquals(ElasticIndexDefinition.MAPPING_VERSION,
getElasticIndexDefinition(index).getMappingVersion()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,6 @@ public static IndexingMode from(String indexingMode) {
* needed from an outside process that does not have visibility to the specific index module.
*/
Map<String, String> INDEX_VERSION_BY_TYPE = Map.of(
"elasticsearch", "1.3.1"
"elasticsearch", "1.4.0"
);
}
Loading