Skip to content

Commit b58c345

Browse files
committed
OAK-11555 Elastic: support dot in property and function names
1 parent 49913d2 commit b58c345

File tree

16 files changed

+309
-51
lines changed

16 files changed

+309
-51
lines changed

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import org.apache.jackrabbit.oak.api.Type;
3333
import org.apache.jackrabbit.oak.commons.collections.StreamUtils;
34+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
3435
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
3536
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
3637
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -324,13 +325,17 @@ public String getElasticKeyword(String propertyName) {
324325
ElasticPropertyDefinition pd = getMatchingRegexPropertyDefinition(propertyName);
325326
if (pd != null) {
326327
if (pd.isFlattened()) {
327-
return FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName + "." + propertyName;
328+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
329+
String flattenedFieldName = FieldNames.FLATTENED_FIELD_PREFIX +
330+
ElasticIndexUtils.fieldName(pd.nodeName) + "." + fieldName;
331+
return flattenedFieldName;
328332
}
329333
}
330-
return propertyName + ".keyword";
334+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
335+
return fieldName + ".keyword";
331336
}
332337

333-
String field = propertyName;
338+
String field = ElasticIndexUtils.fieldName(propertyName);
334339
// it's ok to look at the first property since we are sure they all have the same type
335340
int type = propertyDefinitions.get(0).getType();
336341
if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import java.util.concurrent.TimeUnit;
2626

2727
import co.elastic.clients.elasticsearch._types.query_dsl.Query;
28+
29+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2830
import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics;
2931
import org.jetbrains.annotations.NotNull;
3032
import org.jetbrains.annotations.Nullable;
@@ -111,8 +113,9 @@ public int numDocs() {
111113
*/
112114
@Override
113115
public int getDocCountFor(String field) {
116+
String elasticField = ElasticIndexUtils.fieldName(field);
114117
return countCache.getUnchecked(
115-
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), field, null)
118+
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), elasticField, null)
116119
);
117120
}
118121

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ void addProperty(String fieldName, Object value) {
147147
properties.put(fieldName, finalValue);
148148
}
149149

150-
void addSimilarityField(String name, Blob value) throws IOException {
150+
void addSimilarityField(String fieldName, Blob value) throws IOException {
151151
byte[] bytes = value.getNewStream().readAllBytes();
152-
addProperty(FieldNames.createSimilarityFieldName(name), toFloats(bytes));
152+
addProperty(FieldNames.createSimilarityFieldName(fieldName), toFloats(bytes));
153153
}
154154

155155
void indexAncestors(String path) {
@@ -160,8 +160,8 @@ void indexAncestors(String path) {
160160
addProperty(FieldNames.PATH_DEPTH, depth);
161161
}
162162

163-
void addDynamicBoostField(String propName, String value, double boost) {
164-
addProperty(propName,
163+
void addDynamicBoostField(String fieldName, String value, double boost) {
164+
addProperty(fieldName,
165165
Map.of(
166166
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_VALUE, value,
167167
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_BOOST, boost

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.jackrabbit.oak.commons.log.LogSilencer;
2525
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2626
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
27+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2728
import org.apache.jackrabbit.oak.plugins.index.search.Aggregate;
2829
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2930
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
@@ -165,16 +166,17 @@ protected boolean addTypedFields(ElasticDocument doc, PropertyState property, St
165166
}
166167

167168
@Override
168-
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) {
169+
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String propertyName, PropertyDefinition pd, int i) {
169170
// Get the Type tag from the defined index definition here - and not from the actual persisted property state - this way in case
170171
// If the actual property value is different from the property type defined in the index definition/mapping - this will try to convert the property if possible,
171172
// otherwise will log a warning and not try and add the property to index. If we try and index incompatible data types (like String to Date),
172173
// we would get an exception while indexing the node on elastic search and other properties for the node will also don't get indexed. (See OAK-9665).
173-
String fieldName = pname;
174+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
174175
if (pd.isRegexp) {
175176
ElasticPropertyDefinition epd = (ElasticPropertyDefinition) pd;
176177
if (epd.isFlattened()) {
177-
fieldName = FieldNames.FLATTENED_FIELD_PREFIX + epd.nodeName + "." + pname;
178+
fieldName = FieldNames.FLATTENED_FIELD_PREFIX +
179+
ElasticIndexUtils.fieldName(epd.nodeName) + "." + fieldName;
178180
}
179181
}
180182
int tag = pd.getType();
@@ -197,7 +199,7 @@ protected void indexTypedProperty(ElasticDocument doc, PropertyState property, S
197199
if (!LOG_SILENCER.silence(LOG_KEY_COULD_NOT_CONVERT_PROPERTY)) {
198200
LOG.warn(
199201
"[{}] Ignoring property. Could not convert property {} (field {}) of type {} to type {} for path {}. Error: {}",
200-
getIndexName(), pname, fieldName,
202+
getIndexName(), propertyName, fieldName,
201203
Type.fromTag(property.getType().tag(), false),
202204
Type.fromTag(tag, false), path, e.toString());
203205
}
@@ -252,7 +254,7 @@ protected void indexSimilarityBinaries(ElasticDocument doc, PropertyDefinition p
252254
if (pd.getSimilaritySearchDenseVectorSize() == blob.length() / BLOB_LENGTH_DIVISOR) {
253255
// see https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch
254256
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html
255-
doc.addSimilarityField(pd.name, blob);
257+
doc.addSimilarityField(ElasticIndexUtils.fieldName(pd.name), blob);
256258
} else {
257259
if (!LOG_SILENCER.silence(LOG_KEY_SIMILARITY_BINARIES_WRONG_DIMENSION)) {
258260
LOG.warn("[{}] Ignoring binary property {} for path {}. Expected dimension is {} but got {}",
@@ -275,7 +277,7 @@ protected boolean augmentCustomFields(String path, ElasticDocument doc, NodeStat
275277
@Override
276278
protected boolean indexDynamicBoost(ElasticDocument doc, String parent, String nodeName, String token, double boost) {
277279
if (!token.isEmpty()) {
278-
doc.addDynamicBoostField(nodeName, token, boost);
280+
doc.addDynamicBoostField(ElasticIndexUtils.fieldName(nodeName), token, boost);
279281
return true;
280282
}
281283
return false;

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.jackrabbit.oak.api.Type;
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
25+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2526
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2627
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
2728
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
@@ -149,7 +150,8 @@ private static void mapInferenceDefinition(@NotNull TypeMapping.Builder builder,
149150
builder.meta("inference", JsonData.of(inferenceDefinition));
150151

151152
if (inferenceDefinition.properties != null) {
152-
inferenceDefinition.properties.forEach(p -> builder.properties(p.name,
153+
inferenceDefinition.properties.forEach(p -> builder.properties(
154+
ElasticIndexUtils.fieldName(p.name),
153155
b -> b.object(bo -> bo
154156
.properties("value", pb -> pb.denseVector(dv ->
155157
dv.index(true)
@@ -243,13 +245,15 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
243245
if (epd.isFlattened()) {
244246
Property.Builder pBuilder = new Property.Builder();
245247
pBuilder.flattened(b2 -> b2.index(true));
246-
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName, pBuilder.build());
248+
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX +
249+
ElasticIndexUtils.fieldName(pd.nodeName), pBuilder.build());
247250
}
248251
}
249252
}
250253
for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
251-
final String name = entry.getKey();
252-
final List<PropertyDefinition> propertyDefinitions = entry.getValue();
254+
String propertyName = entry.getKey();
255+
String fieldName = ElasticIndexUtils.fieldName(propertyName);
256+
List<PropertyDefinition> propertyDefinitions = entry.getValue();
253257
Type<?> type = null;
254258
for (PropertyDefinition pd : propertyDefinitions) {
255259
type = Type.fromTag(pd.getType(), false);
@@ -280,10 +284,10 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
280284
pBuilder.keyword(b1 -> b1.ignoreAbove(256));
281285
}
282286
}
283-
builder.properties(name, pBuilder.build());
287+
builder.properties(fieldName, pBuilder.build());
284288

285289
for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
286-
builder.properties(pd.nodeName,
290+
builder.properties(ElasticIndexUtils.fieldName(pd.nodeName),
287291
b1 -> b1.nested(
288292
b2 -> b2.properties(DYNAMIC_BOOST_NESTED_VALUE,
289293
b3 -> b3.text(
@@ -305,7 +309,9 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
305309
.similarity(DEFAULT_SIMILARITY_METRIC)
306310
.build();
307311

308-
builder.properties(FieldNames.createSimilarityFieldName(pd.name), b1 -> b1.denseVector(denseVectorProperty));
312+
builder.properties(FieldNames.createSimilarityFieldName(
313+
ElasticIndexUtils.fieldName(pd.name)),
314+
b1 -> b1.denseVector(denseVectorProperty));
309315
}
310316

311317
builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS,

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ public Optional<KnnQuery> similarityQuery(@NotNull String text, List<PropertyDef
263263
continue;
264264
}
265265

266-
String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name);
266+
String similarityPropFieldName = FieldNames.createSimilarityFieldName(
267+
ElasticIndexUtils.fieldName(pd.name));
267268
KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build();
268269
return Optional.of(knnQuery);
269270
}
@@ -649,11 +650,14 @@ private ObjectBuilder<BoolQuery> inference(BoolQuery.Builder b, String propertyN
649650

650651
private Stream<NestedQuery> dynamicScoreQueries(String text) {
651652
return elasticIndexDefinition.getDynamicBoostProperties().stream().map(pd -> NestedQuery.of(n -> n
652-
.path(pd.nodeName)
653+
.path(ElasticIndexUtils.fieldName(pd.nodeName))
653654
.query(q -> q.functionScore(s -> s
654655
.boost(DYNAMIC_BOOST_WEIGHT)
655-
.query(fq -> fq.match(m -> m.field(pd.nodeName + ".value").query(FieldValue.of(text))))
656-
.functions(f -> f.fieldValueFactor(fv -> fv.field(pd.nodeName + ".boost")))))
656+
.query(fq -> fq.match(m -> m.field(
657+
ElasticIndexUtils.fieldName(pd.nodeName) + ".value").
658+
query(FieldValue.of(text))))
659+
.functions(f -> f.fieldValueFactor(fv -> fv.field(
660+
ElasticIndexUtils.fieldName(pd.nodeName) + ".boost")))))
657661
.scoreMode(ChildScoreMode.Avg))
658662
);
659663
}
@@ -889,8 +893,8 @@ private static QueryStringQuery.Builder fullTextQuery(String text, String fieldN
889893
.type(TextQueryType.CrossFields)
890894
.tieBreaker(0.5d);
891895
if (FieldNames.FULLTEXT.equals(fieldName)) {
892-
for(PropertyDefinition pd: pr.indexingRule.getNodeScopeAnalyzedProps()) {
893-
qsqBuilder.fields(pd.name + "^" + pd.boost);
896+
for (PropertyDefinition pd : pr.indexingRule.getNodeScopeAnalyzedProps()) {
897+
qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" + pd.boost);
894898
}
895899
// dynamic boost is included only for :fulltext field
896900
if (includeDynamicBoostedValues) {
@@ -951,6 +955,11 @@ private String getElasticFulltextFieldName(@Nullable String propertyName) {
951955
if (planResult.isPathTransformed()) {
952956
propertyName = PathUtils.getName(propertyName);
953957
}
954-
return propertyName;
958+
if ("*".equals(propertyName)) {
959+
// elasticsearch does support the pseudo-field "*" meaning all fields,
960+
// but (arguably) what we really want is the field ":fulltext".
961+
return FieldNames.FULLTEXT;
962+
}
963+
return ElasticIndexUtils.fieldName(propertyName);
955964
}
956965
}

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener;
25+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2526
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
2627
import org.slf4j.Logger;
2728
import org.slf4j.LoggerFactory;
@@ -44,6 +45,7 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe
4445
private static final Logger LOG = LoggerFactory.getLogger(ElasticSecureFacetAsyncProvider.class);
4546

4647
private final Set<String> facetFields;
48+
private final Set<String> elasticFieldNames;
4749
private final Map<String, Map<String, Integer>> accessibleFacetCounts = new ConcurrentHashMap<>();
4850
private final ElasticResponseHandler elasticResponseHandler;
4951
private final Predicate<String> isAccessible;
@@ -58,11 +60,14 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe
5860
this.elasticResponseHandler = elasticResponseHandler;
5961
this.isAccessible = isAccessible;
6062
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
63+
this.elasticFieldNames = elasticRequestHandler.facetFields().
64+
map(p -> ElasticIndexUtils.fieldName(p)).
65+
collect(Collectors.toSet());
6166
}
6267

6368
@Override
6469
public Set<String> sourceFields() {
65-
return facetFields;
70+
return elasticFieldNames;
6671
}
6772

6873
@Override
@@ -75,7 +80,8 @@ public boolean on(Hit<ObjectNode> searchHit) {
7580
final String path = elasticResponseHandler.getPath(searchHit);
7681
if (path != null && isAccessible.test(path)) {
7782
for (String field: facetFields) {
78-
JsonNode value = searchHit.source().get(field);
83+
String elasticField = ElasticIndexUtils.fieldName(field);
84+
JsonNode value = searchHit.source().get(elasticField);
7985
if (value != null) {
8086
accessibleFacetCounts.compute(field, (column, facetValues) -> {
8187
if (facetValues == null) {

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
2424
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
2525
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
26+
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
2627
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
2728
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
2829
import org.slf4j.Logger;
@@ -74,11 +75,15 @@ public class ElasticStatisticalFacetAsyncProvider implements ElasticFacetProvide
7475

7576
this.elasticResponseHandler = elasticResponseHandler;
7677
this.isAccessible = isAccessible;
77-
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
78+
Set<String> elasticFieldNames = elasticRequestHandler.facetFields().
79+
map(p -> ElasticIndexUtils.fieldName(p)).
80+
collect(Collectors.toSet());
81+
this.facetFields = elasticRequestHandler.facetFields().
82+
collect(Collectors.toSet());
7883

7984
SearchRequest searchRequest = SearchRequest.of(srb -> srb.index(indexDefinition.getIndexAlias())
8085
.trackTotalHits(thb -> thb.enabled(true))
81-
.source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(facetFields)))))
86+
.source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(elasticFieldNames)))))
8287
.query(Query.of(qb -> qb.bool(elasticRequestHandler.baseQueryBuilder().build())))
8388
.aggregations(elasticRequestHandler.aggregations())
8489
.size(sampleSize)
@@ -135,7 +140,7 @@ private void processHit(Hit<ObjectNode> searchHit) {
135140
final String path = elasticResponseHandler.getPath(searchHit);
136141
if (path != null && isAccessible.test(path)) {
137142
for (String field : facetFields) {
138-
JsonNode value = searchHit.source().get(field);
143+
JsonNode value = searchHit.source().get(ElasticIndexUtils.fieldName(field));
139144
if (value != null) {
140145
accessibleFacetCounts.compute(field, (column, facetValues) -> {
141146
if (facetValues == null) {

0 commit comments

Comments
 (0)