Skip to content

Commit 2a862b0

Browse files
thomasmuellernfsantos
authored andcommitted
OAK-11555 Elastic: support dot in property and function names (apache#2145)
* OAK-11555 Elastic: support dot in property and function names * OAK-11555 Elastic: support dot in property and function names * OAK-11555 Elastic: support dot in property and function names * Update oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java Co-authored-by: Nuno Santos <nsantos@adobe.com> * OAK-11555 Elastic: support dot in property and function names * OAK-11555 Elastic: support dot in property and function names * OAK-11555 Elastic: support dot in property and function names * OAK-11555 Elastic: support dot in property and function names --------- Co-authored-by: Nuno Santos <nsantos@adobe.com>
1 parent b5aa2ac commit 2a862b0

20 files changed

+450
-63
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import org.apache.jackrabbit.oak.api.Type;
3333
import org.apache.jackrabbit.oak.commons.collections.StreamUtils;
34+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
3435
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
3536
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
3637
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -318,19 +319,18 @@ public float getSimilarityTagsBoost() {
318319
*/
319320
public String getElasticKeyword(String propertyName) {
320321
List<PropertyDefinition> propertyDefinitions = propertiesByName.get(propertyName);
322+
String field = ElasticIndexUtils.fieldName(propertyName);
321323
if (propertyDefinitions == null) {
322324
// if there are no property definitions we return the default keyword name
323325
// this can happen for properties that were not explicitly defined (eg: created with a regex)
324326
ElasticPropertyDefinition pd = getMatchingRegexPropertyDefinition(propertyName);
325-
if (pd != null) {
326-
if (pd.isFlattened()) {
327-
return FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName + "." + propertyName;
328-
}
327+
if (pd != null && pd.isFlattened()) {
328+
return FieldNames.FLATTENED_FIELD_PREFIX +
329+
ElasticIndexUtils.fieldName(pd.nodeName) + "." + field;
330+
} else {
331+
return field + ".keyword";
329332
}
330-
return propertyName + ".keyword";
331333
}
332-
333-
String field = propertyName;
334334
// it's ok to look at the first property since we are sure they all have the same type
335335
int type = propertyDefinitions.get(0).getType();
336336
if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import java.util.concurrent.TimeUnit;
2626

2727
import co.elastic.clients.elasticsearch._types.query_dsl.Query;
28+
29+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2830
import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics;
2931
import org.jetbrains.annotations.NotNull;
3032
import org.jetbrains.annotations.Nullable;
@@ -111,8 +113,9 @@ public int numDocs() {
111113
*/
112114
@Override
113115
public int getDocCountFor(String field) {
116+
String elasticField = ElasticIndexUtils.fieldName(field);
114117
return countCache.getUnchecked(
115-
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), field, null)
118+
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), elasticField, null)
116119
);
117120
}
118121

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ void addProperty(String fieldName, Object value) {
147147
properties.put(fieldName, finalValue);
148148
}
149149

150-
void addSimilarityField(String name, Blob value) throws IOException {
150+
void addSimilarityField(String fieldName, Blob value) throws IOException {
151151
byte[] bytes = value.getNewStream().readAllBytes();
152-
addProperty(FieldNames.createSimilarityFieldName(name), toFloats(bytes));
152+
addProperty(FieldNames.createSimilarityFieldName(fieldName), toFloats(bytes));
153153
}
154154

155155
void indexAncestors(String path) {
@@ -160,8 +160,8 @@ void indexAncestors(String path) {
160160
addProperty(FieldNames.PATH_DEPTH, depth);
161161
}
162162

163-
void addDynamicBoostField(String propName, String value, double boost) {
164-
addProperty(propName,
163+
void addDynamicBoostField(String fieldName, String value, double boost) {
164+
addProperty(fieldName,
165165
Map.of(
166166
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_VALUE, value,
167167
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_BOOST, boost

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.jackrabbit.oak.commons.log.LogSilencer;
2525
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2626
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
27+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2728
import org.apache.jackrabbit.oak.plugins.index.search.Aggregate;
2829
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2930
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -165,16 +166,17 @@ protected boolean addTypedFields(ElasticDocument doc, PropertyState property, St
165166
}
166167

167168
@Override
168-
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) {
169+
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String propertyName, PropertyDefinition pd, int i) {
169170
// Get the Type tag from the defined index definition here - and not from the actual persisted property state - this way in case
170171
// If the actual property value is different from the property type defined in the index definition/mapping - this will try to convert the property if possible,
171172
// otherwise will log a warning and not try and add the property to index. If we try and index incompatible data types (like String to Date),
172173
// we would get an exception while indexing the node on elastic search and other properties for the node will also don't get indexed. (See OAK-9665).
173-
String fieldName = pname;
174+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
174175
if (pd.isRegexp) {
175176
ElasticPropertyDefinition epd = (ElasticPropertyDefinition) pd;
176177
if (epd.isFlattened()) {
177-
fieldName = FieldNames.FLATTENED_FIELD_PREFIX + epd.nodeName + "." + pname;
178+
fieldName = FieldNames.FLATTENED_FIELD_PREFIX +
179+
ElasticIndexUtils.fieldName(epd.nodeName) + "." + fieldName;
178180
}
179181
}
180182
int tag = pd.getType();
@@ -197,7 +199,7 @@ protected void indexTypedProperty(ElasticDocument doc, PropertyState property, S
197199
if (!LOG_SILENCER.silence(LOG_KEY_COULD_NOT_CONVERT_PROPERTY)) {
198200
LOG.warn(
199201
"[{}] Ignoring property. Could not convert property {} (field {}) of type {} to type {} for path {}. Error: {}",
200-
getIndexName(), pname, fieldName,
202+
getIndexName(), propertyName, fieldName,
201203
Type.fromTag(property.getType().tag(), false),
202204
Type.fromTag(tag, false), path, e.toString());
203205
}
@@ -252,7 +254,7 @@ protected void indexSimilarityBinaries(ElasticDocument doc, PropertyDefinition p
252254
if (pd.getSimilaritySearchDenseVectorSize() == blob.length() / BLOB_LENGTH_DIVISOR) {
253255
// see https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch
254256
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html
255-
doc.addSimilarityField(pd.name, blob);
257+
doc.addSimilarityField(ElasticIndexUtils.fieldName(pd.name), blob);
256258
} else {
257259
if (!LOG_SILENCER.silence(LOG_KEY_SIMILARITY_BINARIES_WRONG_DIMENSION)) {
258260
LOG.warn("[{}] Ignoring binary property {} for path {}. Expected dimension is {} but got {}",
@@ -275,7 +277,7 @@ protected boolean augmentCustomFields(String path, ElasticDocument doc, NodeStat
275277
@Override
276278
protected boolean indexDynamicBoost(ElasticDocument doc, String parent, String nodeName, String token, double boost) {
277279
if (!token.isEmpty()) {
278-
doc.addDynamicBoostField(nodeName, token, boost);
280+
doc.addDynamicBoostField(ElasticIndexUtils.fieldName(nodeName), token, boost);
279281
return true;
280282
}
281283
return false;

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.jackrabbit.oak.api.Type;
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
25+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2526
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2627
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
2728
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
@@ -149,7 +150,8 @@ private static void mapInferenceDefinition(@NotNull TypeMapping.Builder builder,
149150
builder.meta("inference", JsonData.of(inferenceDefinition));
150151

151152
if (inferenceDefinition.properties != null) {
152-
inferenceDefinition.properties.forEach(p -> builder.properties(p.name,
153+
inferenceDefinition.properties.forEach(p -> builder.properties(
154+
ElasticIndexUtils.fieldName(p.name),
153155
b -> b.object(bo -> bo
154156
.properties("value", pb -> pb.denseVector(dv ->
155157
dv.index(true)
@@ -243,13 +245,15 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
243245
if (epd.isFlattened()) {
244246
Property.Builder pBuilder = new Property.Builder();
245247
pBuilder.flattened(b2 -> b2.index(true));
246-
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName, pBuilder.build());
248+
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX +
249+
ElasticIndexUtils.fieldName(pd.nodeName), pBuilder.build());
247250
}
248251
}
249252
}
250253
for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
251-
final String name = entry.getKey();
252-
final List<PropertyDefinition> propertyDefinitions = entry.getValue();
254+
String propertyName = entry.getKey();
255+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
256+
List<PropertyDefinition> propertyDefinitions = entry.getValue();
253257
Type<?> type = null;
254258
for (PropertyDefinition pd : propertyDefinitions) {
255259
type = Type.fromTag(pd.getType(), false);
@@ -280,10 +284,10 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
280284
pBuilder.keyword(b1 -> b1.ignoreAbove(256));
281285
}
282286
}
283-
builder.properties(name, pBuilder.build());
287+
builder.properties(fieldName, pBuilder.build());
284288

285289
for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
286-
builder.properties(pd.nodeName,
290+
builder.properties(ElasticIndexUtils.fieldName(pd.nodeName),
287291
b1 -> b1.nested(
288292
b2 -> b2.properties(DYNAMIC_BOOST_NESTED_VALUE,
289293
b3 -> b3.text(
@@ -305,7 +309,9 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
305309
.similarity(DEFAULT_SIMILARITY_METRIC)
306310
.build();
307311

308-
builder.properties(FieldNames.createSimilarityFieldName(pd.name), b1 -> b1.denseVector(denseVectorProperty));
312+
builder.properties(FieldNames.createSimilarityFieldName(
313+
ElasticIndexUtils.fieldName(pd.name)),
314+
b1 -> b1.denseVector(denseVectorProperty));
309315
}
310316

311317
builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS,

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ public Optional<KnnQuery> similarityQuery(@NotNull String text, List<PropertyDef
263263
continue;
264264
}
265265

266-
String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name);
266+
String similarityPropFieldName = FieldNames.createSimilarityFieldName(
267+
ElasticIndexUtils.fieldName(pd.name));
267268
KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build();
268269
return Optional.of(knnQuery);
269270
}
@@ -648,13 +649,20 @@ private ObjectBuilder<BoolQuery> inference(BoolQuery.Builder b, String propertyN
648649
}
649650

650651
private Stream<NestedQuery> dynamicScoreQueries(String text) {
651-
return elasticIndexDefinition.getDynamicBoostProperties().stream().map(pd -> NestedQuery.of(n -> n
652-
.path(pd.nodeName)
653-
.query(q -> q.functionScore(s -> s
652+
return elasticIndexDefinition.getDynamicBoostProperties().stream()
653+
.map(pd -> {
654+
String field = ElasticIndexUtils.fieldName(pd.nodeName);
655+
return NestedQuery.of(n -> n
656+
.path(field)
657+
.query(q -> q.functionScore(s -> s
654658
.boost(DYNAMIC_BOOST_WEIGHT)
655-
.query(fq -> fq.match(m -> m.field(pd.nodeName + ".value").query(FieldValue.of(text))))
656-
.functions(f -> f.fieldValueFactor(fv -> fv.field(pd.nodeName + ".boost")))))
657-
.scoreMode(ChildScoreMode.Avg))
659+
.query(fq -> fq.match(m -> m.field(
660+
field + ".value").
661+
query(FieldValue.of(text))))
662+
.functions(f -> f.fieldValueFactor(fv -> fv.field(
663+
field + ".boost")))))
664+
.scoreMode(ChildScoreMode.Avg));
665+
}
658666
);
659667
}
660668

@@ -889,8 +897,8 @@ private static QueryStringQuery.Builder fullTextQuery(String text, String fieldN
889897
.type(TextQueryType.CrossFields)
890898
.tieBreaker(0.5d);
891899
if (FieldNames.FULLTEXT.equals(fieldName)) {
892-
for(PropertyDefinition pd: pr.indexingRule.getNodeScopeAnalyzedProps()) {
893-
qsqBuilder.fields(pd.name + "^" + pd.boost);
900+
for (PropertyDefinition pd : pr.indexingRule.getNodeScopeAnalyzedProps()) {
901+
qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" + pd.boost);
894902
}
895903
// dynamic boost is included only for :fulltext field
896904
if (includeDynamicBoostedValues) {
@@ -951,6 +959,11 @@ private String getElasticFulltextFieldName(@Nullable String propertyName) {
951959
if (planResult.isPathTransformed()) {
952960
propertyName = PathUtils.getName(propertyName);
953961
}
954-
return propertyName;
962+
if ("*".equals(propertyName)) {
963+
// elasticsearch does support the pseudo-field "*" meaning all fields,
964+
// but (arguably) what we really want is the field ":fulltext".
965+
return FieldNames.FULLTEXT;
966+
}
967+
return ElasticIndexUtils.fieldName(propertyName);
955968
}
956969
}

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener;
25+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2526
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
2627
import org.slf4j.Logger;
2728
import org.slf4j.LoggerFactory;
@@ -57,7 +58,9 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe
5758
) {
5859
this.elasticResponseHandler = elasticResponseHandler;
5960
this.isAccessible = isAccessible;
60-
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
61+
this.facetFields = elasticRequestHandler.facetFields().
62+
map(ElasticIndexUtils::fieldName).
63+
collect(Collectors.toSet());
6164
}
6265

6366
@Override
@@ -129,6 +132,7 @@ public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName
129132
throw new IllegalStateException("Error while waiting for facets", e);
130133
}
131134
LOG.trace("Reading facets for {} from {}", columnName, facets);
132-
return facets != null ? facets.get(FulltextIndex.parseFacetField(columnName)) : null;
135+
String field = ElasticIndexUtils.fieldName(FulltextIndex.parseFacetField(columnName));
136+
return facets != null ? facets.get(field) : null;
133137
}
134138
}

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
2525
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
26+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2627
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2728
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
2829
import org.slf4j.Logger;
@@ -74,7 +75,9 @@ public class ElasticStatisticalFacetAsyncProvider implements ElasticFacetProvide
7475

7576
this.elasticResponseHandler = elasticResponseHandler;
7677
this.isAccessible = isAccessible;
77-
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
78+
this.facetFields = elasticRequestHandler.facetFields().
79+
map(ElasticIndexUtils::fieldName).
80+
collect(Collectors.toSet());
7881

7982
SearchRequest searchRequest = SearchRequest.of(srb -> srb.index(indexDefinition.getIndexAlias())
8083
.trackTotalHits(thb -> thb.enabled(true))
@@ -128,7 +131,8 @@ public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName
128131
throw new IllegalStateException("Error while waiting for facets", e);
129132
}
130133
LOG.trace("Reading facets for {} from {}", columnName, facets);
131-
return facets != null ? facets.get(FulltextIndex.parseFacetField(columnName)) : null;
134+
String field = ElasticIndexUtils.fieldName(FulltextIndex.parseFacetField(columnName));
135+
return facets != null ? facets.get(field) : null;
132136
}
133137

134138
private void processHit(Hit<ObjectNode> searchHit) {

0 commit comments

Comments
 (0)