Skip to content

Commit 673572f

Browse files
OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic boosted values (#1949)
* OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic boosted values * OAK-11352 (minor) make useInFullTextQuery final * OAK-11352 (test) fix dynamicBoostNotIncludedInFullText
1 parent 6049f7a commit 673572f

File tree

6 files changed

+76
-10
lines changed

6 files changed

+76
-10
lines changed

oak-doc/src/site/markdown/query/elastic.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,19 @@ however there are differences:
4848
* `useInExcerpt` does not support regexp relative properties.
4949
* For property definitions, `sync` and `unique` are ignored.
5050
Synchronous indexing, and enforcing uniqueness constraints is not currently supported in elastic indexes.
51-
* The behavior for `dynamicBoost` is slightly different:
52-
For Lucene indexes, boosting is done in indexing, while for Elastic it is done at query time.
53-
* The behavior for `suggest` is slightly different:
51+
* The behavior of `dynamicBoost` differs slightly between Lucene and Elasticsearch:
52+
- **Lucene**: Boosting is applied at indexing time.
53+
- **Elasticsearch**: Boosting is applied at query time.
54+
55+
Full-text queries automatically use dynamically boosted values to match relevant results, but this behavior may not always be desirable.
56+
To use these values exclusively for influencing relevance without affecting matching, configure the property definition as follows:
57+
```json
58+
{
59+
"dynamicBoost": true,
60+
"useInFullTextQuery": false
61+
}
62+
```
63+
* The behavior of `suggest` is slightly different:
5464
For Lucene indexes, the suggestor is updated every 10 minutes by default and the frequency
5565
can be changed by `suggestUpdateFrequencyMinutes` property in suggestion node under the index definition node.
5666
In Elastic indexes, there is no such delay and thus no need for the above config property. This is an improvement in ES over lucene.

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ public class ElasticIndexDefinition extends IndexDefinition {
187187
public final InferenceDefinition inferenceDefinition;
188188

189189
private final Map<String, List<PropertyDefinition>> propertiesByName;
190-
private final List<PropertyDefinition> dynamicBoostProperties;
190+
private final List<ElasticPropertyDefinition> dynamicBoostProperties;
191191
private final List<PropertyDefinition> similarityProperties;
192192
private final List<PropertyDefinition> similarityTagsProperties;
193193
private final String[] similarityTagsFields;
@@ -238,6 +238,7 @@ public ElasticIndexDefinition(NodeState root, NodeState defn, String indexPath,
238238
.stream()
239239
.flatMap(IndexingRule::getNamePatternsProperties)
240240
.filter(pd -> pd.dynamicBoost)
241+
.map(pd -> (ElasticPropertyDefinition) pd)
241242
.collect(Collectors.toList());
242243

243244
this.similarityProperties = getDefinedRules()
@@ -278,7 +279,7 @@ public Map<String, List<PropertyDefinition>> getPropertiesByName() {
278279
return propertiesByName;
279280
}
280281

281-
public List<PropertyDefinition> getDynamicBoostProperties() {
282+
public List<ElasticPropertyDefinition> getDynamicBoostProperties() {
282283
return dynamicBoostProperties;
283284
}
284285

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ public class ElasticPropertyDefinition extends PropertyDefinition {
3434
private static final int DEFAULT_CANDIDATES = 500;
3535
private KnnSearchParameters knnSearchParameters;
3636

37+
/**
38+
* Whether to use dynamic boosted values in full text queries, default is true
39+
*/
40+
private static final String PROP_USE_IN_FULL_TEXT_QUERY = "useInFullTextQuery";
41+
private final boolean useInFullTextQuery;
42+
3743
public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, String nodeName, NodeState defn) {
3844
super(idxDefn, nodeName, defn);
3945
if (this.useInSimilarity) {
@@ -43,12 +49,17 @@ public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, String no
4349
getOptionalValue(defn, PROP_K, DEFAULT_K),
4450
getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES));
4551
}
52+
this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn, PROP_USE_IN_FULL_TEXT_QUERY, true);
4653
}
4754

4855
public KnnSearchParameters getKnnSearchParameters() {
4956
return knnSearchParameters;
5057
}
5158

59+
public boolean useInFullTextQuery() {
60+
return useInFullTextQuery;
61+
}
62+
5263
/**
5364
* Class for defining parameters of approximate knn search on dense_vector fields
5465
* <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a> and

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -556,13 +556,17 @@ private boolean visitTerm(String propertyName, String text, String boost, boolea
556556
QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, ElasticIndexDefinition.DYNAMIC_PROPERTIES + ".value", pr, false);
557557
bqBuilder.must(m -> m.nested(nf -> nf.path(ElasticIndexDefinition.DYNAMIC_PROPERTIES).query(Query.of(q -> q.queryString(qsqBuilder.build())))));
558558
} else {
559-
boolean dbEnabled = !elasticIndexDefinition.getDynamicBoostProperties().isEmpty();
559+
// TODO: we include dynamic boosted values in the full-text query if there is at least one dynamic property with useInFullTextQuery set to true
560+
// This might not be ideal when there are multiple dynamic properties with different useInFullTextQuery settings (very unlikely)
561+
// A better approach would be to include the values at index time (requires a refactoring of the DocumentMaker to access the ElasticIndexDefinition)
562+
boolean includeDynamicBoostedValues = !elasticIndexDefinition.getDynamicBoostProperties().isEmpty() &&
563+
elasticIndexDefinition.getDynamicBoostProperties().stream().anyMatch(ElasticPropertyDefinition::useInFullTextQuery);
560564

561565
// Experimental support for inference queries
562566
if (elasticIndexDefinition.inferenceDefinition != null && elasticIndexDefinition.inferenceDefinition.queries != null) {
563-
bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, text, pr, dbEnabled)));
567+
bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, text, pr, includeDynamicBoostedValues)));
564568
} else {
565-
QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, getElasticFieldName(propertyName), pr, dbEnabled);
569+
QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, getElasticFieldName(propertyName), pr, includeDynamicBoostedValues);
566570
bqBuilder.must(m -> m.queryString(qsqBuilder.build()));
567571
}
568572
}
@@ -878,7 +882,7 @@ private static Query referenceConstraint(String uuid) {
878882
return Query.of(q -> q.multiMatch(m -> m.fields(uuid)));
879883
}
880884

881-
private static QueryStringQuery.Builder fullTextQuery(String text, String fieldName, PlanResult pr, boolean dynamicBoostEnabled) {
885+
private static QueryStringQuery.Builder fullTextQuery(String text, String fieldName, PlanResult pr, boolean includeDynamicBoostedValues) {
882886
LOG.debug("fullTextQuery for text: '{}', fieldName: '{}'", text, fieldName);
883887
QueryStringQuery.Builder qsqBuilder = new QueryStringQuery.Builder()
884888
.query(FulltextIndex.rewriteQueryText(text))
@@ -890,7 +894,7 @@ private static QueryStringQuery.Builder fullTextQuery(String text, String fieldN
890894
qsqBuilder.fields(pd.name + "^" + pd.boost);
891895
}
892896
// dynamic boost is included only for :fulltext field
893-
if (dynamicBoostEnabled) {
897+
if (includeDynamicBoostedValues) {
894898
qsqBuilder.fields(ElasticIndexDefinition.DYNAMIC_BOOST_FULLTEXT + "^" + DYNAMIC_BOOST_WEIGHT);
895899
}
896900
}

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,39 @@ public void dynamicBoostAnalyzed() throws Exception {
8484
List.of("/test/asset2", "/test/asset1"));
8585
});
8686
}
87+
88+
@Test
89+
public void dynamicBoostNotIncludedInFullText() throws Exception {
90+
createAssetsIndexAndProperties(false, false, false);
91+
92+
Tree testParent = createNodeWithType(root.getTree("/"), "test", JcrConstants.NT_UNSTRUCTURED, "");
93+
94+
Tree predicted1 = createAssetNodeWithPredicted(testParent, "asset1", "flower with a lot of red and a bit of blue");
95+
createPredictedTag(predicted1, "fooTag", 100.0);
96+
createPredictedTag(predicted1, "barTag", 1.0);
97+
createPredictedTag(predicted1, "red", 9.0);
98+
createPredictedTag(predicted1, "blue", 1.0);
99+
100+
Tree predicted2 = createAssetNodeWithPredicted(testParent, "asset2", "flower with a lot of blue and a bit of red");
101+
createPredictedTag(predicted2, "fooTag", 1.0);
102+
createPredictedTag(predicted2, "barTag", 100.0);
103+
createPredictedTag(predicted2, "red", 1.0);
104+
createPredictedTag(predicted2, "blue", 9.0);
105+
106+
Tree predicted3 = createAssetNodeWithPredicted(testParent, "asset3", "this is a not matching asset");
107+
createPredictedTag(predicted3, "fooTag", 1.0);
108+
createPredictedTag(predicted3, "barTag", 1.0);
109+
110+
root.commit();
111+
112+
assertEventually(() -> {
113+
// with this test we are checking that the dynamic boost is not included in the fulltext search
114+
assertQuery("//element(*, dam:Asset)[jcr:contains(., 'fooTag')]", XPATH, List.of());
115+
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower OR fooTag')",
116+
List.of("/test/asset1", "/test/asset2"));
117+
assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower OR barTag')",
118+
List.of("/test/asset2", "/test/asset1"));
119+
});
120+
121+
}
87122
}

oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,13 +230,18 @@ protected void createPredictedTag(Tree parent, String tagName, double confidence
230230
}
231231

232232
protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags) throws Exception {
233+
createAssetsIndexAndProperties(lite, similarityTags, true);
234+
}
235+
236+
protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags, boolean useInFullTextQuery) throws Exception {
233237
NodeTypeRegistry.register(root, new ByteArrayInputStream(ASSET_NODE_TYPE.getBytes()), "test nodeType");
234238
Tree indexRuleProps = createIndex("dam:Asset", lite);
235239

236240
Tree predictedTagsDynamicBoost = createNodeWithType(indexRuleProps, "predictedTagsDynamicBoost", JcrConstants.NT_UNSTRUCTURED, "");
237241
predictedTagsDynamicBoost.setProperty("name", "jcr:content/metadata/predictedTags/.*");
238242
predictedTagsDynamicBoost.setProperty("isRegexp", true);
239243
predictedTagsDynamicBoost.setProperty("dynamicBoost", true);
244+
predictedTagsDynamicBoost.setProperty("useInFullTextQuery", useInFullTextQuery);
240245

241246
if (similarityTags) {
242247
Tree predictedTags = createNodeWithType(indexRuleProps, "predictedTags", JcrConstants.NT_UNSTRUCTURED, "");

0 commit comments

Comments
 (0)