-
Notifications
You must be signed in to change notification settings - Fork 25.2k
ES|QL dense vector field type support #126456
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 38 commits
6439422
c7e48a0
9e40cd4
03f6a92
d84c8dd
9ff5ed8
2754697
d196be0
975b4db
ae9fa5f
9896adf
dfa420e
d983495
5707e25
a8c8a6a
d827c6d
d8e139d
0539aff
7bbe7ee
7d1f8b7
8870fef
ad84f13
2619fe6
b23d5a9
6bb8e62
8013e47
e0e34f4
c30f5d9
a2fbb13
4833ce5
e8878a0
93d45fc
a6b0f6c
cd462b8
0675a42
7f95d6a
2cff4e7
efc3fb6
f8741ca
c951ee7
ba0a6b9
f5889b2
4b2126e
afe79f7
8049a58
217ce84
74fe676
af965a1
53ec29c
60da0fe
06cbea9
8f07e08
53bcbc7
fe82007
a63ca2c
c5d6ddc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,13 +11,16 @@ | |
|
||
import org.apache.lucene.index.BinaryDocValues; | ||
import org.apache.lucene.index.DocValues; | ||
import org.apache.lucene.index.FloatVectorValues; | ||
import org.apache.lucene.index.KnnVectorValues; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.NumericDocValues; | ||
import org.apache.lucene.index.SortedDocValues; | ||
import org.apache.lucene.index.SortedNumericDocValues; | ||
import org.apache.lucene.index.SortedSetDocValues; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.elasticsearch.common.io.stream.ByteArrayStreamInput; | ||
import org.elasticsearch.index.IndexVersion; | ||
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory; | ||
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder; | ||
import org.elasticsearch.index.mapper.BlockLoader.Builder; | ||
|
@@ -26,6 +29,7 @@ | |
import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder; | ||
import org.elasticsearch.index.mapper.BlockLoader.IntBuilder; | ||
import org.elasticsearch.index.mapper.BlockLoader.LongBuilder; | ||
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder; | ||
import org.elasticsearch.search.fetch.StoredFieldsSpec; | ||
|
||
import java.io.IOException; | ||
|
@@ -504,6 +508,81 @@ public String toString() { | |
} | ||
} | ||
|
||
public static class DenseVectorBlockLoader extends DocValuesBlockLoader { | ||
private final String fieldName; | ||
|
||
public DenseVectorBlockLoader(String fieldName) { | ||
this.fieldName = fieldName; | ||
} | ||
|
||
@Override | ||
public Builder builder(BlockFactory factory, int expectedCount) { | ||
return factory.floats(expectedCount); | ||
} | ||
|
||
@Override | ||
public AllReader reader(LeafReaderContext context) throws IOException { | ||
FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName); | ||
if (floatVectorValues != null) { | ||
return new FloatVectorValuesBlockReader(floatVectorValues); | ||
} | ||
return new ConstantNullsReader(); | ||
} | ||
} | ||
|
||
private static class FloatVectorValuesBlockReader extends BlockDocValuesReader { | ||
private final FloatVectorValues floatVectorValues; | ||
private final KnnVectorValues.DocIndexIterator iterator; | ||
|
||
FloatVectorValuesBlockReader(FloatVectorValues floatVectorValues) { | ||
this.floatVectorValues = floatVectorValues; | ||
iterator = floatVectorValues.iterator(); | ||
} | ||
|
||
@Override | ||
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { | ||
// Doubles from doc values ensures that the values are in order | ||
try (BlockLoader.FloatBuilder builder = factory.floatsFromDocValues(docs.count())) { | ||
for (int i = 0; i < docs.count(); i++) { | ||
int doc = docs.get(i); | ||
if (doc < iterator.docID()) { | ||
throw new IllegalStateException("docs within same block must be in order"); | ||
} | ||
read(doc, builder); | ||
} | ||
return builder.build(); | ||
} | ||
} | ||
|
||
@Override | ||
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { | ||
read(docId, (BlockLoader.FloatBuilder) builder); | ||
} | ||
|
||
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { | ||
if (iterator.advance(doc) == doc) { | ||
builder.beginPositionEntry(); | ||
float[] floats = floatVectorValues.vectorValue(iterator.index()); | ||
for (float aFloat : floats) { | ||
builder.appendFloat(aFloat); | ||
} | ||
builder.endPositionEntry(); | ||
} else { | ||
builder.appendNull(); | ||
} | ||
} | ||
|
||
@Override | ||
public int docId() { | ||
return iterator.docID(); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "BlockDocValuesReader.FloatVectorValuesBlockReader"; | ||
} | ||
} | ||
|
||
public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader { | ||
private final String fieldName; | ||
|
||
|
@@ -752,6 +831,92 @@ public String toString() { | |
} | ||
} | ||
|
||
public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one reads from binary docvalues |
||
private final String fieldName; | ||
private final int dims; | ||
private final IndexVersion indexVersion; | ||
|
||
public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) { | ||
this.fieldName = fieldName; | ||
this.dims = dims; | ||
this.indexVersion = indexVersion; | ||
} | ||
|
||
@Override | ||
public Builder builder(BlockFactory factory, int expectedCount) { | ||
return factory.floats(expectedCount); | ||
} | ||
|
||
@Override | ||
public AllReader reader(LeafReaderContext context) throws IOException { | ||
BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); | ||
if (docValues == null) { | ||
return new ConstantNullsReader(); | ||
} | ||
return new DenseVectorFromBinary(docValues, dims, indexVersion); | ||
} | ||
} | ||
|
||
private static class DenseVectorFromBinary extends BlockDocValuesReader { | ||
private final BinaryDocValues docValues; | ||
private final IndexVersion indexVersion; | ||
private final float[] scratch; | ||
|
||
private int docID = -1; | ||
|
||
DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) { | ||
this.docValues = docValues; | ||
this.scratch = new float[dims]; | ||
this.indexVersion = indexVersion; | ||
} | ||
|
||
@Override | ||
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { | ||
try (BlockLoader.FloatBuilder builder = factory.floats(docs.count())) { | ||
for (int i = 0; i < docs.count(); i++) { | ||
int doc = docs.get(i); | ||
if (doc < docID) { | ||
throw new IllegalStateException("docs within same block must be in order"); | ||
} | ||
read(doc, builder); | ||
} | ||
return builder.build(); | ||
} | ||
} | ||
|
||
@Override | ||
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { | ||
read(docId, (BlockLoader.FloatBuilder) builder); | ||
} | ||
|
||
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { | ||
this.docID = doc; | ||
if (false == docValues.advanceExact(doc)) { | ||
builder.appendNull(); | ||
return; | ||
} | ||
BytesRef bytesRef = docValues.binaryValue(); | ||
assert bytesRef.length > 0; | ||
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch); | ||
|
||
builder.beginPositionEntry(); | ||
for (float value : scratch) { | ||
builder.appendFloat(value); | ||
} | ||
builder.endPositionEntry(); | ||
} | ||
|
||
@Override | ||
public int docId() { | ||
return docID; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "DenseVectorFromBinary.Bytes"; | ||
} | ||
} | ||
|
||
public static class BooleansBlockLoader extends DocValuesBlockLoader { | ||
private final String fieldName; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -301,6 +301,46 @@ public String toString() { | |
} | ||
} | ||
|
||
/** | ||
* Load {@code float}s from {@code _source}. | ||
*/ | ||
public static class FloatsBlockLoader extends SourceBlockLoader { | ||
public FloatsBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup) { | ||
super(fetcher, lookup); | ||
} | ||
|
||
@Override | ||
public Builder builder(BlockFactory factory, int expectedCount) { | ||
return factory.floats(expectedCount); | ||
} | ||
|
||
@Override | ||
public RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) { | ||
return new Floats(fetcher, iter); | ||
} | ||
|
||
@Override | ||
protected String name() { | ||
return "Floats"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again - just checking if this the value we want to return given that the class is called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, I renamed this multiple times but didn't follow on this method. Thanks! afe79f7 |
||
} | ||
} | ||
|
||
private static class Floats extends BlockSourceReader { | ||
Floats(ValueFetcher fetcher, DocIdSetIterator iter) { | ||
super(fetcher, iter); | ||
} | ||
|
||
@Override | ||
protected void append(BlockLoader.Builder builder, Object v) { | ||
((BlockLoader.FloatBuilder) builder).appendFloat(((Number) v).floatValue()); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "BlockSourceReader.Floats"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++, afe79f7 |
||
} | ||
} | ||
|
||
/** | ||
* Load {@code int}s from {@code _source}. | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a BlockLoader for dense vectors, that uses
FloatVectorValues
to retrieve indexed vector data.