Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.collections.StreamUtils;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
Expand Down Expand Up @@ -318,19 +319,18 @@ public float getSimilarityTagsBoost() {
*/
public String getElasticKeyword(String propertyName) {
List<PropertyDefinition> propertyDefinitions = propertiesByName.get(propertyName);
String field = ElasticIndexUtils.fieldName(propertyName);
if (propertyDefinitions == null) {
// if there are no property definitions we return the default keyword name
// this can happen for properties that were not explicitly defined (eg: created with a regex)
ElasticPropertyDefinition pd = getMatchingRegexPropertyDefinition(propertyName);
if (pd != null) {
if (pd.isFlattened()) {
return FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName + "." + propertyName;
}
if (pd != null && pd.isFlattened()) {
return FieldNames.FLATTENED_FIELD_PREFIX +
ElasticIndexUtils.fieldName(pd.nodeName) + "." + field;
} else {
return field + ".keyword";
}
return propertyName + ".keyword";
}

String field = propertyName;
// it's ok to look at the first property since we are sure they all have the same type
int type = propertyDefinitions.get(0).getType();
if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import java.util.concurrent.TimeUnit;

import co.elastic.clients.elasticsearch._types.query_dsl.Query;

import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
Expand Down Expand Up @@ -111,8 +113,9 @@ public int numDocs() {
*/
@Override
public int getDocCountFor(String field) {
String elasticField = ElasticIndexUtils.fieldName(field);
return countCache.getUnchecked(
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), field, null)
new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), elasticField, null)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,9 @@ void addProperty(String fieldName, Object value) {
properties.put(fieldName, finalValue);
}

void addSimilarityField(String name, Blob value) throws IOException {
void addSimilarityField(String fieldName, Blob value) throws IOException {
byte[] bytes = value.getNewStream().readAllBytes();
addProperty(FieldNames.createSimilarityFieldName(name), toFloats(bytes));
addProperty(FieldNames.createSimilarityFieldName(fieldName), toFloats(bytes));
}

void indexAncestors(String path) {
Expand All @@ -160,8 +160,8 @@ void indexAncestors(String path) {
addProperty(FieldNames.PATH_DEPTH, depth);
}

void addDynamicBoostField(String propName, String value, double boost) {
addProperty(propName,
void addDynamicBoostField(String fieldName, String value, double boost) {
addProperty(fieldName,
Map.of(
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_VALUE, value,
ElasticIndexHelper.DYNAMIC_BOOST_NESTED_BOOST, boost
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.jackrabbit.oak.commons.log.LogSilencer;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.Aggregate;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
Expand Down Expand Up @@ -165,16 +166,17 @@ protected boolean addTypedFields(ElasticDocument doc, PropertyState property, St
}

@Override
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) {
protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String propertyName, PropertyDefinition pd, int i) {
// Get the Type tag from the defined index definition here - and not from the actual persisted property state - this way in case
// If the actual property value is different from the property type defined in the index definition/mapping - this will try to convert the property if possible,
// otherwise will log a warning and not try and add the property to index. If we try and index incompatible data types (like String to Date),
// we would get an exception while indexing the node on elastic search and other properties for the node will also don't get indexed. (See OAK-9665).
String fieldName = pname;
String fieldName = ElasticIndexUtils.fieldName(propertyName);
if (pd.isRegexp) {
ElasticPropertyDefinition epd = (ElasticPropertyDefinition) pd;
if (epd.isFlattened()) {
fieldName = FieldNames.FLATTENED_FIELD_PREFIX + epd.nodeName + "." + pname;
fieldName = FieldNames.FLATTENED_FIELD_PREFIX +
ElasticIndexUtils.fieldName(epd.nodeName) + "." + fieldName;
}
}
int tag = pd.getType();
Expand All @@ -197,7 +199,7 @@ protected void indexTypedProperty(ElasticDocument doc, PropertyState property, S
if (!LOG_SILENCER.silence(LOG_KEY_COULD_NOT_CONVERT_PROPERTY)) {
LOG.warn(
"[{}] Ignoring property. Could not convert property {} (field {}) of type {} to type {} for path {}. Error: {}",
getIndexName(), pname, fieldName,
getIndexName(), propertyName, fieldName,
Type.fromTag(property.getType().tag(), false),
Type.fromTag(tag, false), path, e.toString());
}
Expand Down Expand Up @@ -252,7 +254,7 @@ protected void indexSimilarityBinaries(ElasticDocument doc, PropertyDefinition p
if (pd.getSimilaritySearchDenseVectorSize() == blob.length() / BLOB_LENGTH_DIVISOR) {
// see https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html
doc.addSimilarityField(pd.name, blob);
doc.addSimilarityField(ElasticIndexUtils.fieldName(pd.name), blob);
} else {
if (!LOG_SILENCER.silence(LOG_KEY_SIMILARITY_BINARIES_WRONG_DIMENSION)) {
LOG.warn("[{}] Ignoring binary property {} for path {}. Expected dimension is {} but got {}",
Expand All @@ -275,7 +277,7 @@ protected boolean augmentCustomFields(String path, ElasticDocument doc, NodeStat
@Override
protected boolean indexDynamicBoost(ElasticDocument doc, String parent, String nodeName, String token, double boost) {
if (!token.isEmpty()) {
doc.addDynamicBoostField(nodeName, token, boost);
doc.addDynamicBoostField(ElasticIndexUtils.fieldName(nodeName), token, boost);
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
Expand Down Expand Up @@ -149,7 +150,8 @@ private static void mapInferenceDefinition(@NotNull TypeMapping.Builder builder,
builder.meta("inference", JsonData.of(inferenceDefinition));

if (inferenceDefinition.properties != null) {
inferenceDefinition.properties.forEach(p -> builder.properties(p.name,
inferenceDefinition.properties.forEach(p -> builder.properties(
ElasticIndexUtils.fieldName(p.name),
b -> b.object(bo -> bo
.properties("value", pb -> pb.denseVector(dv ->
dv.index(true)
Expand Down Expand Up @@ -243,13 +245,15 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
if (epd.isFlattened()) {
Property.Builder pBuilder = new Property.Builder();
pBuilder.flattened(b2 -> b2.index(true));
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName, pBuilder.build());
builder.properties(FieldNames.FLATTENED_FIELD_PREFIX +
ElasticIndexUtils.fieldName(pd.nodeName), pBuilder.build());
}
}
}
for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) {
final String name = entry.getKey();
final List<PropertyDefinition> propertyDefinitions = entry.getValue();
String propertyName = entry.getKey();
String fieldName = ElasticIndexUtils.fieldName(propertyName);
List<PropertyDefinition> propertyDefinitions = entry.getValue();
Type<?> type = null;
for (PropertyDefinition pd : propertyDefinitions) {
type = Type.fromTag(pd.getType(), false);
Expand Down Expand Up @@ -280,10 +284,10 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
pBuilder.keyword(b1 -> b1.ignoreAbove(256));
}
}
builder.properties(name, pBuilder.build());
builder.properties(fieldName, pBuilder.build());

for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) {
builder.properties(pd.nodeName,
builder.properties(ElasticIndexUtils.fieldName(pd.nodeName),
b1 -> b1.nested(
b2 -> b2.properties(DYNAMIC_BOOST_NESTED_VALUE,
b3 -> b3.text(
Expand All @@ -305,7 +309,9 @@ private static void mapIndexRules(@NotNull TypeMapping.Builder builder,
.similarity(DEFAULT_SIMILARITY_METRIC)
.build();

builder.properties(FieldNames.createSimilarityFieldName(pd.name), b1 -> b1.denseVector(denseVectorProperty));
builder.properties(FieldNames.createSimilarityFieldName(
ElasticIndexUtils.fieldName(pd.name)),
b1 -> b1.denseVector(denseVectorProperty));
}

builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ public Optional<KnnQuery> similarityQuery(@NotNull String text, List<PropertyDef
continue;
}

String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name);
String similarityPropFieldName = FieldNames.createSimilarityFieldName(
ElasticIndexUtils.fieldName(pd.name));
KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build();
return Optional.of(knnQuery);
}
Expand Down Expand Up @@ -648,13 +649,20 @@ private ObjectBuilder<BoolQuery> inference(BoolQuery.Builder b, String propertyN
}

private Stream<NestedQuery> dynamicScoreQueries(String text) {
return elasticIndexDefinition.getDynamicBoostProperties().stream().map(pd -> NestedQuery.of(n -> n
.path(pd.nodeName)
.query(q -> q.functionScore(s -> s
return elasticIndexDefinition.getDynamicBoostProperties().stream()
.map(pd -> {
String field = ElasticIndexUtils.fieldName(pd.nodeName);
return NestedQuery.of(n -> n
.path(field)
.query(q -> q.functionScore(s -> s
.boost(DYNAMIC_BOOST_WEIGHT)
.query(fq -> fq.match(m -> m.field(pd.nodeName + ".value").query(FieldValue.of(text))))
.functions(f -> f.fieldValueFactor(fv -> fv.field(pd.nodeName + ".boost")))))
.scoreMode(ChildScoreMode.Avg))
.query(fq -> fq.match(m -> m.field(
field + ".value").
query(FieldValue.of(text))))
.functions(f -> f.fieldValueFactor(fv -> fv.field(
field + ".boost")))))
.scoreMode(ChildScoreMode.Avg));
}
);
}

Expand Down Expand Up @@ -889,8 +897,8 @@ private static QueryStringQuery.Builder fullTextQuery(String text, String fieldN
.type(TextQueryType.CrossFields)
.tieBreaker(0.5d);
if (FieldNames.FULLTEXT.equals(fieldName)) {
for(PropertyDefinition pd: pr.indexingRule.getNodeScopeAnalyzedProps()) {
qsqBuilder.fields(pd.name + "^" + pd.boost);
for (PropertyDefinition pd : pr.indexingRule.getNodeScopeAnalyzedProps()) {
qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" + pd.boost);
}
// dynamic boost is included only for :fulltext field
if (includeDynamicBoostedValues) {
Expand Down Expand Up @@ -951,6 +959,11 @@ private String getElasticFulltextFieldName(@Nullable String propertyName) {
if (planResult.isPathTransformed()) {
propertyName = PathUtils.getName(propertyName);
}
return propertyName;
if ("*".equals(propertyName)) {
// elasticsearch does support the pseudo-field "*" meaning all fields,
// but (arguably) what we really want is the field ":fulltext".
return FieldNames.FULLTEXT;
}
return ElasticIndexUtils.fieldName(propertyName);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -44,6 +45,7 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe
private static final Logger LOG = LoggerFactory.getLogger(ElasticSecureFacetAsyncProvider.class);

private final Set<String> facetFields;
private final Set<String> elasticFieldNames;
private final Map<String, Map<String, Integer>> accessibleFacetCounts = new ConcurrentHashMap<>();
private final ElasticResponseHandler elasticResponseHandler;
private final Predicate<String> isAccessible;
Expand All @@ -58,11 +60,14 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe
this.elasticResponseHandler = elasticResponseHandler;
this.isAccessible = isAccessible;
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
this.elasticFieldNames = elasticRequestHandler.facetFields().
map(p -> ElasticIndexUtils.fieldName(p)).
collect(Collectors.toSet());
}

@Override
public Set<String> sourceFields() {
return facetFields;
return elasticFieldNames;
}

@Override
Expand All @@ -75,7 +80,8 @@ public boolean on(Hit<ObjectNode> searchHit) {
final String path = elasticResponseHandler.getPath(searchHit);
if (path != null && isAccessible.test(path)) {
for (String field: facetFields) {
JsonNode value = searchHit.source().get(field);
String elasticField = ElasticIndexUtils.fieldName(field);
JsonNode value = searchHit.source().get(elasticField);
if (value != null) {
accessibleFacetCounts.compute(field, (column, facetValues) -> {
if (facetValues == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
import org.slf4j.Logger;
Expand All @@ -48,6 +49,7 @@
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* An {@link ElasticFacetProvider} extension that performs random sampling on the result set to compute facets.
Expand All @@ -74,11 +76,15 @@ public class ElasticStatisticalFacetAsyncProvider implements ElasticFacetProvide

this.elasticResponseHandler = elasticResponseHandler;
this.isAccessible = isAccessible;
this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet());
this.facetFields = elasticRequestHandler.facetFields().
collect(Collectors.toSet());
Set<String> elasticFieldNames = facetFields.stream().
map(ElasticIndexUtils::fieldName).
collect(Collectors.toSet());

SearchRequest searchRequest = SearchRequest.of(srb -> srb.index(indexDefinition.getIndexAlias())
.trackTotalHits(thb -> thb.enabled(true))
.source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(facetFields)))))
.source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(elasticFieldNames)))))
.query(Query.of(qb -> qb.bool(elasticRequestHandler.baseQueryBuilder().build())))
.aggregations(elasticRequestHandler.aggregations())
.size(sampleSize)
Expand Down Expand Up @@ -135,7 +141,7 @@ private void processHit(Hit<ObjectNode> searchHit) {
final String path = elasticResponseHandler.getPath(searchHit);
if (path != null && isAccessible.test(path)) {
for (String field : facetFields) {
JsonNode value = searchHit.source().get(field);
JsonNode value = searchHit.source().get(ElasticIndexUtils.fieldName(field));
if (value != null) {
accessibleFacetCounts.compute(field, (column, facetValues) -> {
if (facetValues == null) {
Expand Down
Loading
Loading