Skip to content

Commit 0992d27

Browse files
committed
OAK-11721: experimental and inferenceConfig implementation should be backward compatible + resolve default config implementation
1 parent 189b9ad commit 0992d27

File tree

4 files changed

+106
-63
lines changed

4 files changed

+106
-63
lines changed

oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQuery.java

Lines changed: 36 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -41,60 +41,57 @@ public VectorQuery(@NotNull String text) {
4141
}
4242

4343
private String[] parseText(String inputText) {
44+
String jsonPart = null;
45+
String queryTextPart = null;
4446
String text = inputText.trim();
45-
// Remove the first delimiter
46-
if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX) && text.charAt(INFERENCE_QUERY_CONFIG_PREFIX.length()) == '{') {
47+
if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX)) {
4748
text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
48-
49-
// Try to find the end of the JSON part by parsing incrementally
50-
int possibleEndIndex = 0;
51-
String jsonPart = null;
52-
String queryTextPart;
53-
int jsonEndDelimiterIndex = -1;
54-
55-
while (possibleEndIndex < text.length()) {
56-
possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, possibleEndIndex + 1);
57-
if (possibleEndIndex == -1) {
58-
// If we reach here, it means we couldn't find a valid JSON part
59-
jsonPart = "";
60-
LOG.warn("Query starts with inference prefix {}, but without valid json part," +
61-
" if case this prefix is a valid fulltext query prefix, please update system property {} with different prefix value",
62-
INFERENCE_QUERY_CONFIG_PREFIX, INFERENCE_QUERY_CONFIG_PREFIX_KEY);
63-
break;
49+
if (text.charAt(0) == '{') {
50+
// Try to find the end of the JSON part by parsing incrementally
51+
int possibleEndIndex = 0;
52+
int jsonEndDelimiterIndex = -1;
53+
while (possibleEndIndex < text.length()) {
54+
possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, possibleEndIndex + INFERENCE_QUERY_CONFIG_PREFIX.length());
55+
if (possibleEndIndex == -1) {
56+
// If we reach here, it means we couldn't find a valid JSON part
57+
jsonPart = "{}";
58+
// we should now use text string as queryText
59+
jsonEndDelimiterIndex = 0;
60+
break;
61+
}
62+
String candidateJson = text.substring(0, possibleEndIndex);
63+
// Verify if this is valid JSON using Oak's JsopTokenizer
64+
if (JsonUtils.isValidJson(candidateJson, false)) {
65+
jsonPart = candidateJson;
66+
jsonEndDelimiterIndex = possibleEndIndex;
67+
break;
68+
}
6469
}
65-
String candidateJson = text.substring(0, possibleEndIndex);
66-
// Verify if this is valid JSON using Oak's JsopTokenizer
67-
if (JsonUtils.isValidJson(candidateJson, false)) {
68-
jsonPart = candidateJson;
69-
jsonEndDelimiterIndex = possibleEndIndex;
70-
break;
70+
text = text.substring(jsonEndDelimiterIndex);
71+
if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX)) {
72+
// Remove the second delimiter
73+
text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
7174
}
72-
}
73-
// If we found a valid JSON part, extract it
74-
if (jsonPart == null) {
75-
// If we reach here, it means we couldn't find a valid JSON part
76-
jsonPart = "";
7775
queryTextPart = text;
78-
LOG.warn("Query starts with InferenceQueryPrefix: {}, but without valid json part," +
79-
" if case this prefix is a valid fulltext query prefix, please update {} with different prefix value",
80-
INFERENCE_QUERY_CONFIG_PREFIX, INFERENCE_QUERY_CONFIG_PREFIX_KEY);
81-
8276
} else {
83-
// Extract query text part (everything after the JSON part delimiter)
84-
queryTextPart = text.substring(jsonEndDelimiterIndex + 1).trim();
85-
77+
// No JSON part present but starts with prefix
78+
//we return "{}" to be compatible with experimental inference queries
79+
jsonPart = "{}";
80+
queryTextPart = text;
8681
}
87-
return new String[]{jsonPart, queryTextPart};
8882
} else {
89-
return new String[]{"", text};
83+
// If the text doesn't start with the prefix, return empty config and the original text
84+
jsonPart = "";
85+
queryTextPart = text;
9086
}
87+
return new String[]{jsonPart, queryTextPart};
9188
}
9289

9390
public String getQueryInferenceConfig() {
9491
return queryInferenceConfig;
9592
}
9693

9794
public String getQueryText() {
98-
return queryText;
95+
return queryText.trim();
9996
}
10097
}

oak-query-spi/src/test/java/org/apache/jackrabbit/oak/spi/query/fulltext/VectorQueryTest.java

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,67 +26,96 @@ public class VectorQueryTest {
2626

2727
@Test
2828
public void testBasicQuery() {
29+
// Input string: "simple query"
2930
VectorQuery query = new VectorQuery("simple query");
3031
assertEquals("", query.getQueryInferenceConfig());
3132
assertEquals("simple query", query.getQueryText());
3233
}
3334

3435
@Test
3536
public void testQueryWithInferenceConfig() {
36-
VectorQuery query = new VectorQuery("?{\"model\":\"gpt-4\"}?search for oak trees");
37+
// Input string: "?{"model":"gpt-4"}?search for oak trees"
38+
VectorQuery query = new VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{\"model\":\"gpt-4\"}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"search for oak trees");
3739
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
3840
assertEquals("search for oak trees", query.getQueryText());
3941
}
4042

4143
@Test
4244
public void testQueryWithComplexInferenceConfig() {
45+
// Input string: "?{"model":"gpt-4","temperature":0.7,"options":{"filter":true}}?oak trees"
4346
VectorQuery query = new VectorQuery(
44-
"?{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}?oak trees");
47+
VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"oak trees");
4548
assertEquals("{\"model\":\"gpt-4\",\"temperature\":0.7,\"options\":{\"filter\":true}}",
4649
query.getQueryInferenceConfig());
4750
assertEquals("oak trees", query.getQueryText());
4851
}
4952

5053
@Test
5154
public void testQueryWithQuestionMarksInText() {
52-
VectorQuery query = new VectorQuery("?{\"model\":\"gpt-4\"}?what are oak trees?");
55+
// Input string: "?{"model":"gpt-4"}?what are oak trees?"
56+
VectorQuery query = new VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{\"model\":\"gpt-4\"}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"what are oak trees?");
5357
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
5458
assertEquals("what are oak trees?", query.getQueryText());
5559
}
5660

5761
@Test
5862
public void testQueryWithoutInferencePrefix() {
59-
VectorQuery query = new VectorQuery("{\"model\":\"gpt-4\"}?query");
63+
// Input string: "{"model":"gpt-4"}?query"
64+
VectorQuery query = new VectorQuery("{\"model\":\"gpt-4\"}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query");
6065
assertEquals("", query.getQueryInferenceConfig());
61-
assertEquals("{\"model\":\"gpt-4\"}?query", query.getQueryText());
66+
assertEquals("{\"model\":\"gpt-4\"}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query", query.getQueryText());
6267
}
6368

6469
@Test
6570
public void testQueryWithInvalidJson() {
66-
VectorQuery query = new VectorQuery("?{invalid json}?query");
67-
assertEquals("", query.getQueryInferenceConfig());
68-
assertEquals("{invalid json}?query", query.getQueryText());
71+
// Input string: "?{invalid json}?query"
72+
VectorQuery query = new VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{invalid json}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query");
73+
assertEquals("{}", query.getQueryInferenceConfig());
74+
assertEquals("{invalid json}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query", query.getQueryText());
6975
}
7076

7177
@Test
7278
public void testQueryWithEmptyConfig() {
73-
VectorQuery query = new VectorQuery("??query text");
74-
assertEquals("", query.getQueryInferenceConfig());
75-
assertEquals("??query text", query.getQueryText());
79+
// Input string: "??query text"
80+
String inputString = VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query text";
81+
VectorQuery query = new VectorQuery(inputString);
82+
83+
assertEquals("{}", query.getQueryInferenceConfig());
84+
assertEquals(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query text", query.getQueryText());
7685
}
7786

7887
@Test
7988
public void testQueryWithWhitespace() {
80-
VectorQuery query = new VectorQuery(" ?{\"model\":\"gpt-4\"}? search query ");
89+
// Input string: " ?{"model":"gpt-4"}? search query "
90+
VectorQuery query = new VectorQuery(" "+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{\"model\":\"gpt-4\"}"+VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+" search query ");
8191
assertEquals("{\"model\":\"gpt-4\"}", query.getQueryInferenceConfig());
8292
assertEquals("search query", query.getQueryText());
8393
}
8494

8595
@Test
8696
public void testEmptyQuery() {
97+
// Input string: ""
8798
VectorQuery query = new VectorQuery("");
8899
assertEquals("", query.getQueryInferenceConfig());
89100
assertEquals("", query.getQueryText());
90101
}
91102

103+
@Test
104+
public void testPrefixOnlyQuery() {
105+
// Input string: "?query text"
106+
VectorQuery query = new VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"query text");
107+
assertEquals("{}", query.getQueryInferenceConfig());
108+
// With the implementation fix, the prefix should now be correctly stripped
109+
assertEquals("query text", query.getQueryText());
110+
}
111+
112+
@Test
113+
public void testNoJsonEndDelimiterQuery() {
114+
// Input string: "?{"model":"gpt-4"query text"
115+
VectorQuery query = new VectorQuery(VectorQuery.INFERENCE_QUERY_CONFIG_PREFIX+"{\"model\":\"gpt-4\"query text");
116+
assertEquals("{}", query.getQueryInferenceConfig());
117+
// With the implementation fix, the prefix should now be correctly stripped
118+
assertEquals("{\"model\":\"gpt-4\"query text", query.getQueryText());
119+
}
120+
92121
}

oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ private boolean visitTerm(String propertyName, String text, String boost, boolea
641641
}
642642
// Experimental support for inference queries
643643
else if (elasticIndexDefinition.inferenceDefinition != null && elasticIndexDefinition.inferenceDefinition.queries != null) {
644-
bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, queryText, pr, includeDynamicBoostedValues)));
644+
bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, text, pr, includeDynamicBoostedValues)));
645645
} else {
646646
QueryStringQuery.Builder qsqBuilder = fullTextQuery(queryText, getElasticFulltextFieldName(propertyName), pr, includeDynamicBoostedValues);
647647
bqBuilder.must(m -> m.queryString(qsqBuilder.build()));
@@ -685,19 +685,20 @@ private ObjectBuilder<BoolQuery> inferenceConfigQuery(BoolQuery.Builder b, Strin
685685
inferenceModelConfig.getMinTerms(), vectorQuery.getQueryText());
686686
return b.must(mm -> mm.queryString(qsqBuilder.build()));
687687
} else if (inferenceModelConfig.isEnabled() && inferenceModelConfig.getMinTerms() <= vectorQuery.getQueryText().split("\\s+").length) {
688+
String inferenceModelConfigName = inferenceModelConfig.getInferenceModelConfigName();
688689
InferenceService inferenceService = InferenceServiceManager
689690
.getInstance(inferenceModelConfig);
690691
List<Float> embeddings = inferenceService.embeddings(vectorQuery.getQueryText(), (int) inferenceModelConfig.getTimeoutMillis());
691692
if (embeddings != null) {
692693
KnnQuery.Builder knnQueryBuilder = new KnnQuery.Builder();
693-
knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES + "." + inferenceQueryModelName + "." + InferenceConstants.VECTOR);
694+
knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES + "." + inferenceModelConfigName + "." + InferenceConstants.VECTOR);
694695
knnQueryBuilder.numCandidates(inferenceModelConfig.getNumCandidates());
695696
knnQueryBuilder.queryVector(embeddings);
696697

697698
KnnQuery knnQuery = knnQueryBuilder.build();
698699

699700
NestedQuery.Builder nestedQueryBuilder = new NestedQuery.Builder()
700-
.path(InferenceConstants.VECTOR_SPACES + "." + inferenceQueryModelName)
701+
.path(InferenceConstants.VECTOR_SPACES + "." + inferenceModelConfigName)
701702
.query(Query.of(q2 -> q2.knn(knnQuery)));
702703

703704
b.should(s -> s.nested(nestedQueryBuilder.build()));

oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,25 @@ private void setupInferenceModelConfig(NodeBuilder inferenceIndexConfig,
212212
}
213213

214214
@Test
215-
public void hybridSearch() throws Exception {
215+
public void testHybridSearchWithVectorQueryConfigJson() throws Exception {
216+
// Test hybrid search with inference configuration
217+
hybridSearch("?{\"inferenceModelConfig\": \"ada-test-model\"}?");
218+
}
219+
220+
@Test
221+
public void testHybridSearchWithEmptyVectorQueryConfigJson() throws Exception {
222+
// Test hybrid search with empty inference configuration
223+
hybridSearch("?{}?");
224+
}
225+
226+
@Test
227+
public void testHybridSearchWithExperimentalPrefix() throws Exception {
228+
// Test hybrid search with experimental inference query prefix
229+
hybridSearch("?");
230+
}
231+
232+
private void hybridSearch(String inferenceConfigInQuery) throws Exception {
216233
String jcrIndexName = UUID.randomUUID().toString();
217-
String inferenceConfigInQuery = "{\"inferenceModelConfig\": \"ada-test-model\"}";
218234
String inferenceServiceUrl = "http://localhost:" + wireMock.port() + "/v1/embeddings";
219235
String inferenceModelConfigName = "ada-test-model";
220236
String inferenceModelName = "text-embedding-ada-002";
@@ -449,8 +465,8 @@ private void verifyQueryResults(Map<String, String> queryResults, String inferen
449465
String expectedPath = entry.getValue();
450466

451467
// Test with inference config
452-
String queryPath = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '?"
453-
+ inferenceConfigInQuery + "?" + query + "')";
468+
String queryPath = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '"
469+
+ inferenceConfigInQuery + query + "')";
454470
List<String> results = executeQuery(queryPath, SQL2, true, true);
455471
assertEquals(expectedPath, results.get(0));
456472

@@ -465,13 +481,13 @@ private void verifyQueryResults(Map<String, String> queryResults, String inferen
465481
*/
466482
private void verifyErrorHandling(String jcrIndexName, String inferenceConfigInQuery) {
467483
// Test server error handling
468-
String queryPath3 = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '?"
469-
+ inferenceConfigInQuery + "?" + "machine learning')";
484+
String queryPath3 = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '"
485+
+ inferenceConfigInQuery + "machine learning')";
470486
assertQuery(queryPath3, List.of("/content/ml", "/content/programming"));
471487

472488
// Test timeout handling
473-
String queryPath4 = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '?"
474-
+ inferenceConfigInQuery + "?" + "farming practices')";
489+
String queryPath4 = "select [jcr:path] from [nt:base] where ISDESCENDANTNODE('/content') and contains(*, '"
490+
+ inferenceConfigInQuery + "farming practices')";
475491
assertQuery(queryPath4, List.of("/content/farm"));
476492
}
477493

@@ -687,8 +703,8 @@ public void testEnricherStatusOnReinitialization() throws Exception {
687703
// Add content
688704
Tree content = root.getTree("/").addChild("content");
689705
Tree document = content.addChild("document");
690-
document.setProperty("title", "Test Document for Reinitialization");
691706
Tree document2 = content.addChild("document2");
707+
document.setProperty("title", "Test Document for Reinitialization");
692708
document2.setProperty("title", "Test Document for Reinitialization 2");
693709
root.commit();
694710

0 commit comments

Comments
 (0)