Skip to content

Commit cd4fcbf

Browse files
Update Default value of Oversample for bbq (#127134)
* Unit test to validate default behavior * adding default value to oversample for bbq * Fix code style issue * Update docs/changelog/127134.yaml * Update changelog * Adding index version to support only new indices * Update index version name to better match * Adding a simple yaml test to verify the yaml functionality for oversample value * Refactor knn float to add rescore vector by default when index type is one of bbq * adding yaml tests to verify oversampel default value * Fixing format issue for not_exists
1 parent 352db86 commit cd4fcbf

File tree

10 files changed

+132
-2
lines changed

10 files changed

+132
-2
lines changed

docs/changelog/127134.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127134
2+
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml

+11
Original file line numberDiff line numberDiff line change
@@ -568,3 +568,14 @@ setup:
568568
- match: { hits.hits.1._score: $default_rescore1 }
569569
- match: { hits.hits.2._score: $override_score2 }
570570
- match: { hits.hits.2._score: $default_rescore2 }
571+
572+
---
573+
"default oversample value":
574+
- requires:
575+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
576+
reason: "Needs default_oversample_value_for_bbq feature"
577+
- do:
578+
indices.get_mapping:
579+
index: bbq_hnsw
580+
581+
- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,14 @@ setup:
339339
- match: { hits.hits.0._score: $rescore_score0 }
340340
- match: { hits.hits.1._score: $rescore_score1 }
341341
- match: { hits.hits.2._score: $rescore_score2 }
342+
343+
---
344+
"default oversample value":
345+
- requires:
346+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
347+
reason: "Needs default_oversample_value_for_bbq feature"
348+
- do:
349+
indices.get_mapping:
350+
index: bbq_flat
351+
352+
- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -495,3 +495,14 @@ setup:
495495
- match: { hits.hits.0._score: $rescore_score0 }
496496
- match: { hits.hits.1._score: $rescore_score1 }
497497
- match: { hits.hits.2._score: $rescore_score2 }
498+
499+
---
500+
"no default oversample value":
501+
- requires:
502+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
503+
reason: "Needs default_oversample_value_for_bbq feature"
504+
- do:
505+
indices.get_mapping:
506+
index: int4_flat
507+
508+
- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,14 @@ setup:
436436
- match: { hits.hits.0._score: $rescore_score0 }
437437
- match: { hits.hits.1._score: $rescore_score1 }
438438
- match: { hits.hits.2._score: $rescore_score2 }
439+
440+
---
441+
"no default oversample value":
442+
- requires:
443+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
444+
reason: "Needs default_oversample_value_for_bbq feature"
445+
- do:
446+
indices.get_mapping:
447+
index: int8_flat
448+
449+
- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector

server/src/main/java/org/elasticsearch/index/IndexVersions.java

+1
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ private static Version parseUnchecked(String version) {
161161
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
162162
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
163163
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1);
164+
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1);
164165
/*
165166
* STOP! READ THIS FIRST! No, really,
166167
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
1818
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
19+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
1920

2021
/**
2122
* Spec for mapper-related features.
@@ -66,7 +67,8 @@ public Set<NodeFeature> getTestFeatures() {
6667
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
6768
DateFieldMapper.INVALID_DATE_FIX,
6869
NPE_ON_DIMS_UPDATE_FIX,
69-
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
70+
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
71+
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
7072
);
7173
}
7274
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

+15
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,15 @@ public static boolean isNotUnitVector(float magnitude) {
117117
public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS;
118118
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
119119
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
120+
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
120121

121122
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
122123
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
123124
"mapper.dense_vector.rescore_zero_vector"
124125
);
126+
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
127+
"mapper.dense_vector.default_oversample_value_for_bbq"
128+
);
125129

126130
public static final String CONTENT_TYPE = "dense_vector";
127131
public static final short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
@@ -131,6 +135,7 @@ public static boolean isNotUnitVector(float magnitude) {
131135
// vector
132136
public static final int MAGNITUDE_BYTES = 4;
133137
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
138+
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value
134139

135140
private static DenseVectorFieldMapper toType(FieldMapper in) {
136141
return (DenseVectorFieldMapper) in;
@@ -1462,6 +1467,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14621467
RescoreVector rescoreVector = null;
14631468
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
14641469
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1470+
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
1471+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1472+
}
14651473
}
14661474
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14671475
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
@@ -1483,6 +1491,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14831491
RescoreVector rescoreVector = null;
14841492
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
14851493
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1494+
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
1495+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1496+
}
14861497
}
14871498
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14881499
return new BBQFlatIndexOptions(rescoreVector);
@@ -2311,6 +2322,10 @@ int getVectorDimensions() {
23112322
ElementType getElementType() {
23122323
return elementType;
23132324
}
2325+
2326+
IndexOptions getIndexOptions() {
2327+
return indexOptions;
2328+
}
23142329
}
23152330

23162331
private final IndexOptions indexOptions;

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

+54
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() {
10221022
}
10231023
}
10241024

1025+
public void testDefaultOversampleValue() throws IOException {
1026+
{
1027+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1028+
b.field("type", "dense_vector");
1029+
b.field("dims", 128);
1030+
b.field("index", true);
1031+
b.field("similarity", "dot_product");
1032+
b.startObject("index_options");
1033+
b.field("type", "bbq_hnsw");
1034+
b.endObject();
1035+
}));
1036+
1037+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1038+
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
1039+
.fieldType()
1040+
.getIndexOptions();
1041+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1042+
}
1043+
{
1044+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1045+
b.field("type", "dense_vector");
1046+
b.field("dims", 128);
1047+
b.field("index", true);
1048+
b.field("similarity", "dot_product");
1049+
b.startObject("index_options");
1050+
b.field("type", "bbq_flat");
1051+
b.endObject();
1052+
}));
1053+
1054+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1055+
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
1056+
.fieldType()
1057+
.getIndexOptions();
1058+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1059+
}
1060+
{
1061+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1062+
b.field("type", "dense_vector");
1063+
b.field("dims", 128);
1064+
b.field("index", true);
1065+
b.field("similarity", "dot_product");
1066+
b.startObject("index_options");
1067+
b.field("type", "int8_hnsw");
1068+
b.endObject();
1069+
}));
1070+
1071+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1072+
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
1073+
.fieldType()
1074+
.getIndexOptions();
1075+
assertNull(indexOptions.rescoreVector);
1076+
}
1077+
}
1078+
10251079
public void testDims() {
10261080
{
10271081
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import java.util.stream.Collectors;
4747
import java.util.stream.Stream;
4848

49+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
4950
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
5051
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
5152
import static org.hamcrest.Matchers.containsString;
@@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
144145
fieldName,
145146
k,
146147
numCands,
147-
randomRescoreVectorBuilder(),
148+
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
148149
randomFloat()
149150
);
150151

@@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
161162
return queryBuilder;
162163
}
163164

165+
private boolean isIndextypeBBQ() {
166+
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
167+
}
168+
169+
protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
170+
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
171+
}
172+
164173
protected RescoreVectorBuilder randomRescoreVectorBuilder() {
165174
if (randomBoolean()) {
166175
return null;

0 commit comments

Comments
 (0)