Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8cc11a8
Adding base64 indexing for vector values
benwtrent Oct 23, 2025
c2c5d27
Update docs/changelog/137072.yaml
benwtrent Oct 23, 2025
ba4793d
Merge remote-tracking branch 'upstream/main' into add-base64-encoded-…
benwtrent Oct 27, 2025
5e9baed
Switching to BIG ENDIAN, adding more tests
benwtrent Oct 27, 2025
bf3de81
iter
benwtrent Oct 27, 2025
2c86500
iter
benwtrent Oct 27, 2025
ddd52eb
iter
benwtrent Oct 28, 2025
24a9d8e
Merge remote-tracking branch 'upstream/main' into add-base64-encoded-…
benwtrent Oct 28, 2025
c62be9a
iter
benwtrent Oct 28, 2025
abfefb5
[CI] Auto commit changes from spotless
Oct 28, 2025
25bbdce
fixing formatting
benwtrent Oct 30, 2025
44862b4
iter
benwtrent Oct 31, 2025
49c117b
Merge remote-tracking branch 'upstream/main' into add-base64-encoded-…
benwtrent Oct 31, 2025
24d33ce
Adding further tests and support
benwtrent Oct 31, 2025
0113501
Merge remote-tracking branch 'upstream/main' into add-base64-encoded-…
benwtrent Oct 31, 2025
44dde19
Merge branch 'main' into add-base64-encoded-float32-support
benwtrent Oct 31, 2025
38cac73
Merge remote-tracking branch 'upstream/main' into add-base64-encoded-…
benwtrent Nov 3, 2025
3652c36
addressing PR comments
benwtrent Nov 3, 2025
fcd9a28
Merge branch 'add-base64-encoded-float32-support' of github.com:benwt…
benwtrent Nov 3, 2025
c77d674
iter
benwtrent Nov 3, 2025
fe782c4
Merge branch 'main' into add-base64-encoded-float32-support
benwtrent Nov 3, 2025
372b76a
Merge branch 'main' into add-base64-encoded-float32-support
iverase Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137072.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137072
summary: Adding base64 indexing for vector values
area: Vector Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ setup:

# [-128, 127, 10] - is encoded as '807f0a'
- do:
catch: /Failed to parse object./
catch: bad_request
index:
index: knn_hex_vector_index
id: "5"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
setup:
- requires:
cluster_features: "mapper.base64_dense_vectors"
reason: 'base64 encoding for vectors feature required'

- do:
indices.create:
index: knn_base64_vector_index
body:
settings:
number_of_shards: 1
mappings:
dynamic: false
properties:
my_vector_byte:
type: dense_vector
dims: 3
index : true
similarity : l2_norm
element_type: byte
my_vector_float:
type: dense_vector
dims: 3
index: true
element_type: float
similarity : l2_norm

# [0.8837743, 0.6310808, 0.7800066] - is encoded as 'P2I/CD8hjoM/R66D'
# [-128, 127, 10] - is encoded as 'gH8K'
- do:
index:
index: knn_base64_vector_index
id: "1"
body:
my_vector_float: "P2I/CD8hjoM/R66D"
my_vector_byte: "gH8K"


# [0.27721548, 0.9202792 , 0.46455473] - is encoded as 'Po3vMD9rl2s+7doe'
# [0, 1, 0] - is encoded as 'AAEA'
- do:
index:
index: knn_base64_vector_index
id: "2"
body:
my_vector_float: "Po3vMD9rl2s+7doe"
my_vector_byte: "AAEA"

- do:
index:
index: knn_base64_vector_index
id: "3"
body:
my_vector_float: [0.2509804, -0.039215684, -0.11764706]
my_vector_byte: [64, -10, -30]

- do:
indices.refresh: {}

---
"Fail to index hex-encoded vector on float field":

# [-128, 127, 10] - is encoded as '807f0a'
- do:
catch: bad_request
index:
index: knn_base64_vector_index
id: "5"
body:
my_vector_float: "807f0a"

---
"Knn retrieve base64 encoded vectors" :
- do:
get:
index: knn_base64_vector_index
id: "1"
_source_exclude_vectors: false

- match: { _source.my_vector_float: [0.8837743, 0.6310808, 0.7800066] }
- match: { _source.my_vector_byte: [-128, 127, 10] }
---
"Base64 bytes infers the dimensions correctly":
- do:
indices.create:
index: knn_base64_vector_index_infer_dims
body:
settings:
number_of_shards: 1
mappings:
dynamic: false
properties:
my_vector_byte:
type: dense_vector
index : true
similarity : l2_norm
element_type: byte

# [-128, 127, 10, 0] - is encoded as 'gH8KAA=='
- do:
index:
index: knn_base64_vector_index_infer_dims
id: "1"
body:
my_vector_byte: "gH8KAA=="

- do:
cluster.health:
wait_for_events: languid

- do:
indices.get_mapping:
index: knn_base64_vector_index_infer_dims

# sanity
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.type: dense_vector }
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.index: true }
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.dims: 4 }
---
"Base64 floats infers the dimensions correctly":
- do:
indices.create:
index: knn_base64_vector_index_infer_dims
body:
settings:
number_of_shards: 1
mappings:
dynamic: false
properties:
my_vector_byte:
type: dense_vector
index : true
similarity : l2_norm
element_type: float

# [0.8837743, 0.6310808, 0.7800066, 0.0] - is encoded as 'P2I/CD8hjoM/R66DAAAAAA=='
- do:
index:
index: knn_base64_vector_index_infer_dims
id: "1"
body:
my_vector_byte: "P2I/CD8hjoM/R66DAAAAAA=="

- do:
cluster.health:
wait_for_events: languid
- do:
indices.get_mapping:
index: knn_base64_vector_index_infer_dims

# sanity
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.type: dense_vector }
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.index: true }
- match: { knn_base64_vector_index_infer_dims.mappings.properties.my_vector_byte.dims: 4 }
---
"Retrieve Base64 encoded vectors when exclude vectors from source is false":
- do:
indices.create:
index: knn_base64_vector_index_with_source_vectors
body:
settings:
number_of_shards: 1
index:
mapping:
exclude_source_vectors: false
mappings:
dynamic: false
properties:
my_vector_byte:
type: dense_vector
dims: 3
index : true
similarity : l2_norm
element_type: byte
my_vector_float:
type: dense_vector
dims: 3
index: true
element_type: float
similarity : l2_norm

- do:
index:
index: knn_base64_vector_index_with_source_vectors
id: "1"
body:
my_vector_float: "P2I/CD8hjoM/R66D"
my_vector_byte: "gH8K"

- do:
index:
index: knn_base64_vector_index_with_source_vectors
id: "3"
body:
my_vector_float: [0.2509804, -0.039215684, -0.11764706]
my_vector_byte: [64, -10, -30]

- do:
indices.refresh: {}

- do:
search:
index: knn_base64_vector_index_with_source_vectors
body:
query:
ids:
values: ["1"]
_source: false
fields:
- my_vector_float
- my_vector_byte

- match: { hits.hits.0.fields.my_vector_float: ["P2I/CD8hjoM/R66D"] }
- match: { hits.hits.0.fields.my_vector_byte: ["gH8K"] }

- do:
search:
index: knn_base64_vector_index_with_source_vectors
body:
query:
ids:
values: ["3"]
_source: false
fields:
- my_vector_float
- my_vector_byte

- match: { hits.hits.0.fields.my_vector_float: [0.2509804, -0.039215684, -0.11764706] }
- match: { hits.hits.0.fields.my_vector_byte: [64, -10, -30] }
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ setup:

# [-128, 127, 10] - is encoded as '807f0a'
- do:
catch: /Failed to parse object./
catch: bad_request
index:
index: knn_hex_vector_index
id: "5"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class MapperFeatures implements FeatureSpecification {
"mapper.ignore_dynamic_field_names_beyond_limit"
);
static final NodeFeature EXCLUDE_VECTORS_DOCVALUE_BUGFIX = new NodeFeature("mapper.exclude_vectors_docvalue_bugfix");
static final NodeFeature BASE64_DENSE_VECTORS = new NodeFeature("mapper.base64_dense_vectors");

@Override
public Set<NodeFeature> getTestFeatures() {
Expand Down Expand Up @@ -99,7 +100,8 @@ public Set<NodeFeature> getTestFeatures() {
DISKBBQ_ON_DISK_RESCORING,
PROVIDE_INDEX_SORT_SETTING_DEFAULTS,
INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_FIELD_NAME_LIMIT,
EXCLUDE_VECTORS_DOCVALUE_BUGFIX
EXCLUDE_VECTORS_DOCVALUE_BUGFIX,
BASE64_DENSE_VECTORS
);
}
}
Loading