Skip to content

Commit e432161

Browse files
authored
Adjusts Seeded knn searches to clean up user and internal interfaces (apache#14170)
This is a bugfix and refactor for seeded knn searches. First, Since we are using collectors, we don't actually need unique queries for every input type. Consequently, I have collapsed the two individual seeded queries into a single query that delegates to a provided kNN query. Then the collector manager is simply wrapped, so that the entry points can be provided. Second, the interactions in the hnsw graph were not clear. Consequently, I did a minor refactor of HNSW searcher to have a "SeededSearcher", where instead of searching the graph for the entry points, it provides them directly. Third, instead of continually overloading collectors, I opted to add a new "searchstrategy" value to KnnCollector. This way various strategies can be executed with different options. I think Seeded could eventually be replaced with something.
1 parent 26dbc82 commit e432161

29 files changed

+881
-609
lines changed

lucene/CHANGES.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ API Changes
4949
New Features
5050
---------------------
5151

52-
* GITHUB#14084, GITHUB#13635, GITHUB#13634: Adds new `SeededKnnByteVectorQuery` and `SeededKnnFloatVectorQuery`
53-
queries. These queries allow for the vector search entry points to be initialized via a `seed` query. This follows
52+
* GITHUB#14084, GITHUB#13635, GITHUB#13634, GITHUB#14170: Adds new `SeededKnnVectorQuery` query.
53+
These queries allow for the vector search entry points to be initialized via a `seed` query. This follows
5454
the research provided via https://arxiv.org/abs/2307.16779. (Sean MacAvaney, Ben Trent).
5555

5656

lucene/core/src/java/org/apache/lucene/search/AbstractKnnCollector.java

+15-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.lucene.search;
1919

20+
import org.apache.lucene.search.knn.KnnSearchStrategy;
21+
2022
/**
2123
* AbstractKnnCollector is the default implementation for a knn collector used for gathering kNN
2224
* results and providing topDocs from the gathered neighbors
@@ -25,11 +27,17 @@ public abstract class AbstractKnnCollector implements KnnCollector {
2527

2628
protected long visitedCount;
2729
private final long visitLimit;
30+
private final KnnSearchStrategy searchStrategy;
2831
private final int k;
2932

30-
protected AbstractKnnCollector(int k, long visitLimit) {
31-
this.visitLimit = visitLimit;
33+
protected AbstractKnnCollector(int k, long visitLimit, KnnSearchStrategy searchStrategy) {
3234
this.k = k;
35+
this.searchStrategy = searchStrategy;
36+
this.visitLimit = visitLimit;
37+
}
38+
39+
protected AbstractKnnCollector(int k, long visitLimit) {
40+
this(k, visitLimit, null);
3341
}
3442

3543
@Override
@@ -68,4 +76,9 @@ public final int k() {
6876

6977
@Override
7078
public abstract TopDocs topDocs();
79+
80+
@Override
81+
public KnnSearchStrategy getSearchStrategy() {
82+
return searchStrategy;
83+
}
7184
}

lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ abstract class AbstractVectorSimilarityQuery extends Query {
6161
}
6262

6363
protected KnnCollectorManager getKnnCollectorManager() {
64-
return (visitedLimit, context) ->
65-
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitedLimit);
64+
return (visitLimit, searchStrategy, context) ->
65+
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitLimit);
6666
}
6767

6868
abstract VectorScorer createVectorScorer(LeafReaderContext context) throws IOException;

lucene/core/src/java/org/apache/lucene/search/ByteVectorSimilarityQuery.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ protected TopDocs approximateSearch(
113113
int visitLimit,
114114
KnnCollectorManager knnCollectorManager)
115115
throws IOException {
116-
KnnCollector collector = knnCollectorManager.newCollector(visitLimit, context);
116+
KnnCollector collector = knnCollectorManager.newCollector(visitLimit, null, context);
117117
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
118118
return collector.topDocs();
119119
}

lucene/core/src/java/org/apache/lucene/search/FloatVectorSimilarityQuery.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ protected TopDocs approximateSearch(
115115
int visitLimit,
116116
KnnCollectorManager knnCollectorManager)
117117
throws IOException {
118-
KnnCollector collector = knnCollectorManager.newCollector(visitLimit, context);
118+
KnnCollector collector = knnCollectorManager.newCollector(visitLimit, null, context);
119119
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
120120
return collector.topDocs();
121121
}

lucene/core/src/java/org/apache/lucene/search/KnnByteVectorQuery.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ protected TopDocs approximateSearch(
8383
int visitedLimit,
8484
KnnCollectorManager knnCollectorManager)
8585
throws IOException {
86-
KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, context);
86+
KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, null, context);
8787
LeafReader reader = context.reader();
8888
ByteVectorValues byteVectorValues = reader.getByteVectorValues(field);
8989
if (byteVectorValues == null) {

lucene/core/src/java/org/apache/lucene/search/KnnCollector.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.lucene.search;
1919

20+
import org.apache.lucene.search.knn.KnnSearchStrategy;
21+
2022
/**
2123
* KnnCollector is a knn collector used for gathering kNN results and providing topDocs from the
2224
* gathered neighbors
@@ -86,14 +88,21 @@ public interface KnnCollector {
8688
*/
8789
TopDocs topDocs();
8890

91+
/**
92+
* @return the search strategy used by this collector, can be null
93+
*/
94+
default KnnSearchStrategy getSearchStrategy() {
95+
return null;
96+
}
97+
8998
/**
9099
* KnnCollector.Decorator is the base class for decorators of KnnCollector objects, which extend
91100
* the object with new behaviors.
92101
*
93102
* @lucene.experimental
94103
*/
95104
abstract class Decorator implements KnnCollector {
96-
private final KnnCollector collector;
105+
protected final KnnCollector collector;
97106

98107
public Decorator(KnnCollector collector) {
99108
this.collector = collector;
@@ -138,5 +147,10 @@ public float minCompetitiveSimilarity() {
138147
public TopDocs topDocs() {
139148
return collector.topDocs();
140149
}
150+
151+
@Override
152+
public KnnSearchStrategy getSearchStrategy() {
153+
return collector.getSearchStrategy();
154+
}
141155
}
142156
}

lucene/core/src/java/org/apache/lucene/search/KnnFloatVectorQuery.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ protected TopDocs approximateSearch(
8484
int visitedLimit,
8585
KnnCollectorManager knnCollectorManager)
8686
throws IOException {
87-
KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, context);
87+
KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, null, context);
8888
LeafReader reader = context.reader();
8989
FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
9090
if (floatVectorValues == null) {

lucene/core/src/java/org/apache/lucene/search/SeededKnnByteVectorQuery.java

-97
This file was deleted.

lucene/core/src/java/org/apache/lucene/search/SeededKnnFloatVectorQuery.java

-97
This file was deleted.

0 commit comments

Comments
 (0)