Skip to content

Commit 2949386

Browse files
committed
Bump to Lucene 10.3.2
1 parent 81b8ada commit 2949386

File tree

6 files changed

+23
-112
lines changed

6 files changed

+23
-112
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
</developers>
2727

2828
<properties>
29-
<lucene.version>10.1.0</lucene.version>
29+
<lucene.version>10.3.2</lucene.version>
3030
<java.version>21</java.version>
3131
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
3232
</properties>

src/main/java/io/anserini/index/IndexFlatDenseVectors.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import org.apache.lucene.codecs.KnnVectorsFormat;
2727
import org.apache.lucene.codecs.KnnVectorsReader;
2828
import org.apache.lucene.codecs.KnnVectorsWriter;
29-
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
29+
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
3030
import org.apache.lucene.index.ConcurrentMergeScheduler;
3131
import org.apache.lucene.index.IndexWriter;
3232
import org.apache.lucene.index.IndexWriterConfig;
@@ -89,15 +89,15 @@ public IndexFlatDenseVectors(Args args) {
8989

9090
if (args.quantizeInt8) {
9191
config = new IndexWriterConfig().setCodec(
92-
new Lucene101Codec() {
92+
new Lucene103Codec() {
9393
@Override
9494
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
9595
return new DelegatingKnnVectorsFormat(new AnseriniLucene99ScalarQuantizedVectorsFormat(), 4096);
9696
}
9797
});
9898
} else {
9999
config = new IndexWriterConfig().setCodec(
100-
new Lucene101Codec() {
100+
new Lucene103Codec() {
101101
@Override
102102
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
103103
return new DelegatingKnnVectorsFormat(new AnseriniLucene99FlatVectorFormat(), 4096);

src/main/java/io/anserini/index/IndexHnswDenseVectors.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import org.apache.lucene.codecs.KnnVectorsFormat;
2727
import org.apache.lucene.codecs.KnnVectorsReader;
2828
import org.apache.lucene.codecs.KnnVectorsWriter;
29-
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
29+
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
3030
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
3131
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
3232
import org.apache.lucene.index.ConcurrentMergeScheduler;
@@ -101,7 +101,7 @@ public IndexHnswDenseVectors(Args args) throws Exception {
101101

102102
if (args.quantizeInt8) {
103103
config = new IndexWriterConfig().setCodec(
104-
new Lucene101Codec() {
104+
new Lucene103Codec() {
105105
@Override
106106
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
107107
return new DelegatingKnnVectorsFormat(
@@ -110,7 +110,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
110110
});
111111
} else {
112112
config = new IndexWriterConfig().setCodec(
113-
new Lucene101Codec() {
113+
new Lucene103Codec() {
114114
@Override
115115
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
116116
return new DelegatingKnnVectorsFormat(

src/main/java/io/anserini/index/codecs/AnseriniLucene99FlatVectorFormat.java

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
3939
import org.apache.lucene.util.hnsw.RandomVectorScorer;
4040

41+
import org.apache.lucene.search.AcceptDocs;
42+
4143
public class AnseriniLucene99FlatVectorFormat extends KnnVectorsFormat {
4244

4345
static final String NAME = "AnseriniLucene99FlatVectorFormat";
@@ -131,43 +133,33 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
131133
}
132134

133135
@Override
134-
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
136+
public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException {
135137
FloatVectorValues vectors = reader.getFloatVectorValues(field);
136138
if (vectors == null) {
137139
return;
138140
}
139141
VectorScorer scorer = vectors.scorer(target);
140142
DocIdSetIterator it = scorer.iterator();
143+
Bits bits = acceptDocs == null ? null : acceptDocs.bits();
141144
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
142-
if (acceptDocs == null || acceptDocs.get(doc)) {
145+
if (bits == null || bits.get(doc)) {
143146
knnCollector.collect(doc, scorer.score());
144147
}
145148
knnCollector.incVisitedCount(1);
146149
}
147150
}
148151

149-
private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException {
150-
OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
151-
Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
152-
for (int i = 0; i < scorer.maxOrd(); i++) {
153-
if (acceptedOrds == null || acceptedOrds.get(i)) {
154-
collector.collect(i, scorer.score(i));
155-
collector.incVisitedCount(1);
156-
}
157-
}
158-
assert collector.earlyTerminated() == false;
159-
}
160-
161152
@Override
162-
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
153+
public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException {
163154
ByteVectorValues vectors = reader.getByteVectorValues(field);
164155
if (vectors == null) {
165156
return;
166157
}
167158
VectorScorer scorer = vectors.scorer(target);
168159
DocIdSetIterator it = scorer.iterator();
160+
Bits bits = acceptDocs == null ? null : acceptDocs.bits();
169161
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
170-
if (acceptDocs == null || acceptDocs.get(doc)) {
162+
if (bits == null || bits.get(doc)) {
171163
knnCollector.collect(doc, scorer.score());
172164
}
173165
knnCollector.incVisitedCount(1);

src/main/java/io/anserini/index/codecs/AnseriniLucene99ScalarQuantizedVectorsFormat.java

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.index.SegmentWriteState;
3131
import org.apache.lucene.index.Sorter;
3232
import org.apache.lucene.index.VectorSimilarityFunction;
33+
import org.apache.lucene.search.AcceptDocs;
3334
import org.apache.lucene.search.DocIdSetIterator;
3435
import org.apache.lucene.search.KnnCollector;
3536
import org.apache.lucene.util.Bits;
@@ -131,16 +132,17 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
131132
}
132133

133134
@Override
134-
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
135+
public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException {
135136
FloatVectorValues vectors = reader.getFloatVectorValues(field);
136137
if (vectors == null) {
137138
return;
138139
}
139140
VectorSimilarityFunction similarity = VectorSimilarityFunction.DOT_PRODUCT;
140141
FloatVectorValues vectorValues = vectors.copy();
141142
KnnVectorValues.DocIndexIterator it = vectorValues.iterator();
143+
Bits bits = acceptDocs == null ? null : acceptDocs.bits();
142144
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
143-
if (acceptDocs == null || acceptDocs.get(doc)) {
145+
if (bits == null || bits.get(doc)) {
144146
int ord = it.index();
145147
float score = similarity.compare(target, vectorValues.vectorValue(ord));
146148
knnCollector.collect(doc, score);
@@ -149,29 +151,18 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
149151
}
150152
}
151153

152-
private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException {
153-
OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
154-
Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
155-
for (int i = 0; i < scorer.maxOrd(); i++) {
156-
if (acceptedOrds == null || acceptedOrds.get(i)) {
157-
collector.collect(i, scorer.score(i));
158-
collector.incVisitedCount(1);
159-
}
160-
}
161-
assert collector.earlyTerminated() == false;
162-
}
163-
164154
@Override
165-
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
155+
public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException {
166156
ByteVectorValues vectors = reader.getByteVectorValues(field);
167157
if (vectors == null) {
168158
return;
169159
}
170160
VectorSimilarityFunction similarity = VectorSimilarityFunction.DOT_PRODUCT;
171161
ByteVectorValues vectorValues = vectors.copy();
172162
KnnVectorValues.DocIndexIterator it = vectorValues.iterator();
163+
Bits bits = acceptDocs == null ? null : acceptDocs.bits();
173164
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
174-
if (acceptDocs == null || acceptDocs.get(doc)) {
165+
if (bits == null || bits.get(doc)) {
175166
int ord = it.index();
176167
float score = similarity.compare(target, vectorValues.vectorValue(ord));
177168
knnCollector.collect(doc, score);

src/main/java/io/anserini/search/FlatDenseSearcher.java

Lines changed: 1 addition & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,6 @@ public ScoredDoc[] search(float[] query, int k) throws IOException {
200200
public ScoredDoc[] search(@Nullable K qid, float[] query, int k) throws IOException {
201201
KnnFloatVectorQuery vectorQuery = new KnnFloatVectorQuery(Constants.VECTOR, query, DUMMY_EF_SEARCH);
202202
TopDocs topDocs = getIndexSearcher().search(vectorQuery, k, BREAK_SCORE_TIES_BY_DOCID, true);
203-
if (topDocs.scoreDocs.length == 0 && reader.numDocs() > 0) {
204-
topDocs = bruteForceSearch(query, k);
205-
}
206203

207204
return super.processLuceneTopDocs(qid, topDocs);
208205
}
@@ -239,78 +236,9 @@ public ScoredDoc[] search(@Nullable K qid, String query, int k) throws IOExcepti
239236

240237
KnnFloatVectorQuery vectorQuery = generator.buildQuery(Constants.VECTOR, query, DUMMY_EF_SEARCH);
241238
TopDocs topDocs = getIndexSearcher().search(vectorQuery, k, BREAK_SCORE_TIES_BY_DOCID, true);
242-
if (topDocs.scoreDocs.length == 0 && reader.numDocs() > 0) {
243-
topDocs = bruteForceSearch(vectorQuery.getTargetCopy(), k);
244-
}
245-
246-
return super.processLuceneTopDocs(qid, topDocs);
247-
}
248239

249-
private TopDocs bruteForceSearch(float[] query, int k) throws IOException {
250-
List<ScoredDocInfo> scored = new ArrayList<>();
251240

252-
for (LeafReaderContext ctx : reader.leaves()) {
253-
LeafReader leaf = ctx.reader();
254-
FieldInfo fieldInfo = leaf.getFieldInfos().fieldInfo(Constants.VECTOR);
255-
if (fieldInfo == null) {
256-
continue;
257-
}
258-
VectorSimilarityFunction similarity = fieldInfo.getVectorSimilarityFunction();
259-
260-
var floatVectors = leaf.getFloatVectorValues(Constants.VECTOR);
261-
if (floatVectors != null) {
262-
if (floatVectors.getClass().getName().contains("QuantizedVectorValues")) {
263-
VectorScorer scorer = floatVectors.scorer(query);
264-
DocIdSetIterator it = scorer.iterator();
265-
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
266-
float score = scorer.score();
267-
int globalDoc = doc + ctx.docBase;
268-
scored.add(new ScoredDocInfo(globalDoc, score,
269-
getIndexSearcher().storedFields().document(globalDoc).get(Constants.ID)));
270-
}
271-
} else {
272-
var it = floatVectors.iterator();
273-
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
274-
int ord = it.index();
275-
float score = similarity.compare(query, floatVectors.vectorValue(ord));
276-
int globalDoc = doc + ctx.docBase;
277-
scored.add(new ScoredDocInfo(globalDoc, score,
278-
getIndexSearcher().storedFields().document(globalDoc).get(Constants.ID)));
279-
}
280-
}
281-
continue;
282-
}
283-
284-
}
285-
286-
scored.sort((a, b) -> {
287-
int scoreCmp = Float.compare(b.score, a.score);
288-
if (scoreCmp != 0) {
289-
return scoreCmp;
290-
}
291-
return a.docid.compareTo(b.docid);
292-
});
293-
294-
int hits = Math.min(k, scored.size());
295-
ScoreDoc[] scoreDocs = new ScoreDoc[hits];
296-
for (int i = 0; i < hits; i++) {
297-
ScoredDocInfo info = scored.get(i);
298-
scoreDocs[i] = new ScoreDoc(info.luceneDocid, info.score);
299-
}
300-
301-
return new TopDocs(new TotalHits(scored.size(), TotalHits.Relation.EQUAL_TO), scoreDocs);
302-
}
303-
304-
private static final class ScoredDocInfo {
305-
private final int luceneDocid;
306-
private final float score;
307-
private final String docid;
308-
309-
private ScoredDocInfo(int luceneDocid, float score, String docid) {
310-
this.luceneDocid = luceneDocid;
311-
this.score = score;
312-
this.docid = docid;
313-
}
241+
return super.processLuceneTopDocs(qid, topDocs);
314242
}
315243

316244
@Override

0 commit comments

Comments
 (0)