Skip to content

Commit f5d50db

Browse files
[8.19] Update Default value of Oversample for bbq (#127134) (#127747)
* Update Default value of Oversample for bbq (#127134) * Unit test to validate default behavior * adding default value to oversample for bbq * Fix code style issue * Update docs/changelog/127134.yaml * Update changelog * Adding index version to support only new indices * Update index version name to better match * Adding a simple yaml test to verify the yaml functionality for oversample value * Refactor knn float to add rescore vector by default when index type is one of bbq * adding yaml tests to verify oversampel default value * Fixing format issue for not_exists (cherry picked from commit cd4fcbf) * Adding backport index versions for PR #127134 (#127724) This adds backport index versions in preparation for backporting #127134 --------- Co-authored-by: Samiul Monir <[email protected]>
1 parent b763932 commit f5d50db

File tree

10 files changed

+135
-32
lines changed

10 files changed

+135
-32
lines changed

docs/changelog/127134.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127134
2+
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml

+8-23
Original file line numberDiff line numberDiff line change
@@ -572,28 +572,13 @@ setup:
572572
- match: { hits.hits.2._score: $override_score2 }
573573
- match: { hits.hits.2._score: $default_rescore2 }
574574

575+
---
576+
"default oversample value":
577+
- requires:
578+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
579+
reason: "Needs default_oversample_value_for_bbq feature"
575580
- do:
576-
headers:
577-
Content-Type: application/json
578-
search:
579-
rest_total_hits_as_int: true
580-
index: bbq_rescore_zero_hnsw
581-
body:
582-
knn:
583-
field: vector
584-
query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393,
585-
0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015,
586-
0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259,
587-
-0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 ,
588-
-0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232,
589-
-0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034,
590-
-0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582,
591-
-0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158]
592-
k: 3
593-
num_candidates: 3
581+
indices.get_mapping:
582+
index: bbq_hnsw
594583

595-
# Compare scores as hit IDs may change depending on how things are distributed
596-
- match: { hits.total: 3 }
597-
- match: { hits.hits.0._score: $raw_score0 }
598-
- match: { hits.hits.1._score: $raw_score1 }
599-
- match: { hits.hits.2._score: $raw_score2 }
584+
- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_bbq_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,14 @@ setup:
342342
- match: { hits.hits.0._score: $rescore_score0 }
343343
- match: { hits.hits.1._score: $rescore_score1 }
344344
- match: { hits.hits.2._score: $rescore_score2 }
345+
346+
---
347+
"default oversample value":
348+
- requires:
349+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
350+
reason: "Needs default_oversample_value_for_bbq feature"
351+
- do:
352+
indices.get_mapping:
353+
index: bbq_flat
354+
355+
- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int4_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -498,3 +498,14 @@ setup:
498498
- match: { hits.hits.0._score: $rescore_score0 }
499499
- match: { hits.hits.1._score: $rescore_score1 }
500500
- match: { hits.hits.2._score: $rescore_score2 }
501+
502+
---
503+
"no default oversample value":
504+
- requires:
505+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
506+
reason: "Needs default_oversample_value_for_bbq feature"
507+
- do:
508+
indices.get_mapping:
509+
index: int4_flat
510+
511+
- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/42_knn_search_int8_flat.yml

+11
Original file line numberDiff line numberDiff line change
@@ -436,3 +436,14 @@ setup:
436436
- match: { hits.hits.0._score: $rescore_score0 }
437437
- match: { hits.hits.1._score: $rescore_score1 }
438438
- match: { hits.hits.2._score: $rescore_score2 }
439+
440+
---
441+
"no default oversample value":
442+
- requires:
443+
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
444+
reason: "Needs default_oversample_value_for_bbq feature"
445+
- do:
446+
indices.get_mapping:
447+
index: int8_flat
448+
449+
- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector

server/src/main/java/org/elasticsearch/index/IndexVersions.java

+1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
130130
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY = def(8_527_0_00, Version.LUCENE_9_12_1);
131131
public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = def(8_528_0_00, Version.LUCENE_9_12_1);
132132
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = def(8_529_0_00, Version.LUCENE_9_12_1);
133+
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X = def(8_530_0_00, Version.LUCENE_9_12_1);
133134
/*
134135
* STOP! READ THIS FIRST! No, really,
135136
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
2121
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
22+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;
2223

2324
/**
2425
* Spec for mapper-related features.
@@ -95,7 +96,8 @@ public Set<NodeFeature> getTestFeatures() {
9596
DateFieldMapper.INVALID_DATE_FIX,
9697
NPE_ON_DIMS_UPDATE_FIX,
9798
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
98-
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
99+
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
100+
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
99101
);
100102
}
101103
}

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

+21-7
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@
9494

9595
import static org.elasticsearch.common.Strings.format;
9696
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
97-
import static org.elasticsearch.index.IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW;
98-
import static org.elasticsearch.index.IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
9997

10098
/**
10199
* A {@link FieldMapper} for indexing a dense vector of floats.
@@ -118,21 +116,26 @@ private static boolean hasRescoreIndexVersion(IndexVersion version) {
118116
}
119117

120118
private static boolean allowsZeroRescore(IndexVersion version) {
121-
return version.onOrAfter(RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS);
119+
return version.onOrAfter(IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS);
120+
}
121+
122+
private static boolean defaultOversampleForBBQ(IndexVersion version) {
123+
return version.onOrAfter(IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ_BACKPORT_8_X);
122124
}
123125

124126
public static final IndexVersion MAGNITUDE_STORED_INDEX_VERSION = IndexVersions.V_7_5_0;
125127
public static final IndexVersion INDEXED_BY_DEFAULT_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION;
126128
public static final IndexVersion NORMALIZE_COSINE = IndexVersions.NORMALIZED_VECTOR_COSINE;
127-
public static final IndexVersion DEFAULT_TO_INT8 = DEFAULT_DENSE_VECTOR_TO_INT8_HNSW;
129+
public static final IndexVersion DEFAULT_TO_INT8 = IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW;
128130
public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0;
129-
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
130-
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
131131

132132
public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
133133
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
134134
"mapper.dense_vector.rescore_zero_vector"
135135
);
136+
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
137+
"mapper.dense_vector.default_oversample_value_for_bbq"
138+
);
136139

137140
public static final String CONTENT_TYPE = "dense_vector";
138141
public static short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
@@ -141,6 +144,7 @@ private static boolean allowsZeroRescore(IndexVersion version) {
141144
public static short MIN_DIMS_FOR_DYNAMIC_FLOAT_MAPPING = 128; // minimum number of dims for floats to be dynamically mapped to vector
142145
public static final int MAGNITUDE_BYTES = 4;
143146
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
147+
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value
144148

145149
private static DenseVectorFieldMapper toType(FieldMapper in) {
146150
return (DenseVectorFieldMapper) in;
@@ -196,7 +200,7 @@ public Builder(String name, IndexVersion indexVersionCreated) {
196200
super(name);
197201
this.indexVersionCreated = indexVersionCreated;
198202
final boolean indexedByDefault = indexVersionCreated.onOrAfter(INDEXED_BY_DEFAULT_INDEX_VERSION);
199-
final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(DEFAULT_DENSE_VECTOR_TO_INT8_HNSW);
203+
final boolean defaultInt8Hnsw = indexVersionCreated.onOrAfter(IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW);
200204
this.indexed = Parameter.indexParam(m -> toType(m).fieldType().indexed, indexedByDefault);
201205
if (indexedByDefault) {
202206
// Only serialize on newer index versions to prevent breaking existing indices when upgrading
@@ -1439,6 +1443,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14391443
RescoreVector rescoreVector = null;
14401444
if (hasRescoreIndexVersion(indexVersion)) {
14411445
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1446+
if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) {
1447+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1448+
}
14421449
}
14431450
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14441451
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
@@ -1460,6 +1467,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
14601467
RescoreVector rescoreVector = null;
14611468
if (hasRescoreIndexVersion(indexVersion)) {
14621469
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1470+
if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) {
1471+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1472+
}
14631473
}
14641474
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
14651475
return new BBQFlatIndexOptions(rescoreVector);
@@ -2288,6 +2298,10 @@ int getVectorDimensions() {
22882298
ElementType getElementType() {
22892299
return elementType;
22902300
}
2301+
2302+
IndexOptions getIndexOptions() {
2303+
return indexOptions;
2304+
}
22912305
}
22922306

22932307
private final IndexOptions indexOptions;

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java

+54
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() {
10221022
}
10231023
}
10241024

1025+
public void testDefaultOversampleValue() throws IOException {
1026+
{
1027+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1028+
b.field("type", "dense_vector");
1029+
b.field("dims", 128);
1030+
b.field("index", true);
1031+
b.field("similarity", "dot_product");
1032+
b.startObject("index_options");
1033+
b.field("type", "bbq_hnsw");
1034+
b.endObject();
1035+
}));
1036+
1037+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1038+
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
1039+
.fieldType()
1040+
.getIndexOptions();
1041+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1042+
}
1043+
{
1044+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1045+
b.field("type", "dense_vector");
1046+
b.field("dims", 128);
1047+
b.field("index", true);
1048+
b.field("similarity", "dot_product");
1049+
b.startObject("index_options");
1050+
b.field("type", "bbq_flat");
1051+
b.endObject();
1052+
}));
1053+
1054+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1055+
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
1056+
.fieldType()
1057+
.getIndexOptions();
1058+
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
1059+
}
1060+
{
1061+
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
1062+
b.field("type", "dense_vector");
1063+
b.field("dims", 128);
1064+
b.field("index", true);
1065+
b.field("similarity", "dot_product");
1066+
b.startObject("index_options");
1067+
b.field("type", "int8_hnsw");
1068+
b.endObject();
1069+
}));
1070+
1071+
DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
1072+
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
1073+
.fieldType()
1074+
.getIndexOptions();
1075+
assertNull(indexOptions.rescoreVector);
1076+
}
1077+
}
1078+
10251079
public void testDims() {
10261080
{
10271081
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {

server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import java.util.stream.Collectors;
4747
import java.util.stream.Stream;
4848

49+
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
4950
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
5051
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
5152
import static org.hamcrest.Matchers.containsString;
@@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
144145
fieldName,
145146
k,
146147
numCands,
147-
randomRescoreVectorBuilder(),
148+
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
148149
randomFloat()
149150
);
150151

@@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
161162
return queryBuilder;
162163
}
163164

165+
private boolean isIndextypeBBQ() {
166+
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
167+
}
168+
169+
protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
170+
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
171+
}
172+
164173
protected RescoreVectorBuilder randomRescoreVectorBuilder() {
165174
if (randomBoolean()) {
166175
return null;

0 commit comments

Comments
 (0)