Skip to content

Commit 538adbc

Browse files
Samiul-TheSoccerFanbenwtrent
authored andcommitted
Update Default value of Oversample for bbq (elastic#127134)
* Unit test to validate default behavior * adding default value to oversample for bbq * Fix code style issue * Update docs/changelog/127134.yaml * Update changelog * Adding index version to support only new indices * Update index version name to better match * Adding a simple yaml test to verify the yaml functionality for oversample value * Refactor knn float to add rescore vector by default when index type is one of bbq * adding yaml tests to verify oversampel default value * Fixing format issue for not_exists (cherry picked from commit cd4fcbf)
1 parent b763932 commit 538adbc

File tree

9 files changed

+128
-25
lines changed

9 files changed

+128
-25
lines changed

docs/changelog/127134.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127134
2+
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -572,28 +572,13 @@ setup:
572572
- match: { hits.hits.2._score: $override_score2 }
573573
- match: { hits.hits.2._score: $default_rescore2 }
574574

575+
---
576+
"default oversample value":
577+
- requires:
578+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
579+
reason: "Needs default_oversample_value_for_bbq feature"
575580
- do:
576-
headers:
577-
Content-Type: application/json
578-
search:
579-
rest_total_hits_as_int: true
580-
index: bbq_rescore_zero_hnsw
581-
body:
582-
knn:
583-
field: vector
584-
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
585-
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
586-
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
587-
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
588-
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
589-
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
590-
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
591-
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
592-
k: 3
593-
num_candidates: 3
581+
indices.get_mapping:
582+
index: bbq_hnsw
594583

595-
# Compare scores as hit IDs may change depending on how things are distributed
596-
- match: { hits.total: 3 }
597-
- match: { hits.hits.0._score: $raw_score0 }
598-
- match: { hits.hits.1._score: $raw_score1 }
599-
- match: { hits.hits.2._score: $raw_score2 }
584+
- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,14 @@ setup:
342342
- match: { hits.hits.0._score: $rescore_score0 }
343343
- match: { hits.hits.1._score: $rescore_score1 }
344344
- match: { hits.hits.2._score: $rescore_score2 }
345+
346+
---
347+
"default oversample value":
348+
- requires:
349+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
350+
reason: "Needs default_oversample_value_for_bbq feature"
351+
- do:
352+
indices.get_mapping:
353+
index: bbq_flat
354+
355+
- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,3 +498,14 @@ setup:
498498
- match: { hits.hits.0._score: $rescore_score0 }
499499
- match: { hits.hits.1._score: $rescore_score1 }
500500
- match: { hits.hits.2._score: $rescore_score2 }
501+
502+
---
503+
"no default oversample value":
504+
- requires:
505+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
506+
reason: "Needs default_oversample_value_for_bbq feature"
507+
- do:
508+
indices.get_mapping:
509+
index: int4_flat
510+
511+
- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,14 @@ setup:
436436
- match: { hits.hits.0._score: $rescore_score0 }
437437
- match: { hits.hits.1._score: $rescore_score1 }
438438
- match: { hits.hits.2._score: $rescore_score2 }
439+
440+
---
441+
"no default oversample value":
442+
- requires:
443+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
444+
reason: "Needs default_oversample_value_for_bbq feature"
445+
- do:
446+
indices.get_mapping:
447+
index: int8_flat
448+
449+
- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
2121
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
22+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
2223

2324
/**
2425
* Spec for mapper-related features.
@@ -95,7 +96,8 @@ public Set<NodeFeature> getTestFeatures() {
9596
DateFieldMapper.INVALID_DATE_FIX,
9697
NPE_ON_DIMS_UPDATE_FIX,
9798
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
98-
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
99+
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
100+
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
99101
);
100102
}
101103
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,15 @@ private static boolean allowsZeroRescore(IndexVersion version) {
128128
public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0;
129129
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
130130
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
131+
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
131132

132133
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
133134
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
134135
"mapper.dense_vector.rescore_zero_vector"
135136
);
137+
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
138+
"mapper.dense_vector.default_oversample_value_for_bbq"
139+
);
136140

137141
public static final String CONTENT_TYPE = "dense_vector";
138142
public static short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
@@ -141,6 +145,7 @@ private static boolean allowsZeroRescore(IndexVersion version) {
141145
public static short MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING = 128; // minimum number of dims for floats to be dynamically mapped to vector
142146
public static final int MAGNITUDE_BYTES = 4;
143147
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
148+
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value
144149

145150
private static DenseVectorFieldMapper toType(FieldMapper in) {
146151
return (DenseVectorFieldMapper) in;
@@ -1439,6 +1444,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14391444
RescoreVector rescoreVector = null;
14401445
if (hasRescoreIndexVersion(indexVersion)) {
14411446
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1447+
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
1448+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1449+
}
14421450
}
14431451
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14441452
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
@@ -1460,6 +1468,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14601468
RescoreVector rescoreVector = null;
14611469
if (hasRescoreIndexVersion(indexVersion)) {
14621470
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1471+
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
1472+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1473+
}
14631474
}
14641475
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14651476
return new BBQFlatIndexOptions(rescoreVector);
@@ -2288,6 +2299,10 @@ int getVectorDimensions() {
22882299
ElementType getElementType() {
22892300
return elementType;
22902301
}
2302+
2303+
IndexOptions getIndexOptions() {
2304+
return indexOptions;
2305+
}
22912306
}
22922307

22932308
private final IndexOptions indexOptions;

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() {
10221022
}
10231023
}
10241024

1025+
public void testDefaultOversampleValue() throws IOException {
1026+
{
1027+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1028+
b.field("type", "dense_vector");
1029+
b.field("dims", 128);
1030+
b.field("index", true);
1031+
b.field("similarity", "dot_product");
1032+
b.startObject("index_options");
1033+
b.field("type", "bbq_hnsw");
1034+
b.endObject();
1035+
}));
1036+
1037+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1038+
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
1039+
.fieldType()
1040+
.getIndexOptions();
1041+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1042+
}
1043+
{
1044+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1045+
b.field("type", "dense_vector");
1046+
b.field("dims", 128);
1047+
b.field("index", true);
1048+
b.field("similarity", "dot_product");
1049+
b.startObject("index_options");
1050+
b.field("type", "bbq_flat");
1051+
b.endObject();
1052+
}));
1053+
1054+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1055+
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
1056+
.fieldType()
1057+
.getIndexOptions();
1058+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1059+
}
1060+
{
1061+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1062+
b.field("type", "dense_vector");
1063+
b.field("dims", 128);
1064+
b.field("index", true);
1065+
b.field("similarity", "dot_product");
1066+
b.startObject("index_options");
1067+
b.field("type", "int8_hnsw");
1068+
b.endObject();
1069+
}));
1070+
1071+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1072+
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
1073+
.fieldType()
1074+
.getIndexOptions();
1075+
assertNull(indexOptions.rescoreVector);
1076+
}
1077+
}
1078+
10251079
public void testDims() {
10261080
{
10271081
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import java.util.stream.Collectors;
4747
import java.util.stream.Stream;
4848

49+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
4950
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
5051
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
5152
import static org.hamcrest.Matchers.containsString;
@@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
144145
fieldName,
145146
k,
146147
numCands,
147-
randomRescoreVectorBuilder(),
148+
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
148149
randomFloat()
149150
);
150151

@@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
161162
return queryBuilder;
162163
}
163164

165+
private boolean isIndextypeBBQ() {
166+
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
167+
}
168+
169+
protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
170+
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
171+
}
172+
164173
protected RescoreVectorBuilder randomRescoreVectorBuilder() {
165174
if (randomBoolean()) {
166175
return null;

0 commit comments

Comments
 (0)