Skip to content

Update Default value of Oversample for bbq #127134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127134.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127134
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
area: Vector Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -568,3 +568,14 @@ setup:
- match: { hits.hits.1._score: $default_rescore1 }
- match: { hits.hits.2._score: $override_score2 }
- match: { hits.hits.2._score: $default_rescore2 }

---
"default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: bbq_hnsw

- match: { bbq_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: bbq_flat

- match: { bbq_flat.mappings.properties.vector.index_options.rescore_vector.oversample: 3.0 }
Original file line number Diff line number Diff line change
Expand Up @@ -495,3 +495,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"no default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: int4_flat

- not_exists: int4_flat.mappings.properties.vector.index_options.rescore_vector
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,14 @@ setup:
- match: { hits.hits.0._score: $rescore_score0 }
- match: { hits.hits.1._score: $rescore_score1 }
- match: { hits.hits.2._score: $rescore_score2 }

---
"no default oversample value":
- requires:
cluster_features: ["mapper.dense_vector.default_oversample_value_for_bbq"]
reason: "Needs default_oversample_value_for_bbq feature"
- do:
indices.get_mapping:
index: int8_flat

- not_exists: int8_flat.mappings.properties.vector.index_options.rescore_vector
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0);
public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1);
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;

/**
* Spec for mapper-related features.
Expand Down Expand Up @@ -66,7 +67,8 @@ public Set<NodeFeature> getTestFeatures() {
RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING,
DateFieldMapper.INVALID_DATE_FIX,
NPE_ON_DIMS_UPDATE_FIX,
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING
RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING,
USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,15 @@ public static boolean isNotUnitVector(float magnitude) {
public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS;
public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS =
IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS;
public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = IndexVersions.DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ;

public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector");
public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature(
"mapper.dense_vector.rescore_zero_vector"
);
public static final NodeFeature USE_DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = new NodeFeature(
"mapper.dense_vector.default_oversample_value_for_bbq"
);

public static final String CONTENT_TYPE = "dense_vector";
public static final short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions
Expand All @@ -131,6 +135,7 @@ public static boolean isNotUnitVector(float magnitude) {
// vector
public static final int MAGNITUDE_BYTES = 4;
public static final int OVERSAMPLE_LIMIT = 10_000; // Max oversample allowed
public static final float DEFAULT_OVERSAMPLE = 3.0F; // Default oversample value

private static DenseVectorFieldMapper toType(FieldMapper in) {
return (DenseVectorFieldMapper) in;
Expand Down Expand Up @@ -1462,6 +1467,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
RescoreVector rescoreVector = null;
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
}
}
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
Expand All @@ -1483,6 +1491,9 @@ public IndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOpti
RescoreVector rescoreVector = null;
if (indexVersion.onOrAfter(ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS)) {
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
if (rescoreVector == null && indexVersion.onOrAfter(DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ)) {
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
}
}
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
return new BBQFlatIndexOptions(rescoreVector);
Expand Down Expand Up @@ -2311,6 +2322,10 @@ int getVectorDimensions() {
ElementType getElementType() {
return elementType;
}

IndexOptions getIndexOptions() {
return indexOptions;
}
}

private final IndexOptions indexOptions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,60 @@ public void testInvalidRescoreVector() {
}
}

public void testDefaultOversampleValue() throws IOException {
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "bbq_hnsw");
b.endObject();
}));

DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.BBQHnswIndexOptions indexOptions = (DenseVectorFieldMapper.BBQHnswIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
}
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "bbq_flat");
b.endObject();
}));

DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.BBQFlatIndexOptions indexOptions = (DenseVectorFieldMapper.BBQFlatIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F);
}
{
DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> {
b.field("type", "dense_vector");
b.field("dims", 128);
b.field("index", true);
b.field("similarity", "dot_product");
b.startObject("index_options");
b.field("type", "int8_hnsw");
b.endObject();
}));

DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) mapperService.mappers().getMapper("field");
DenseVectorFieldMapper.Int8HnswIndexOptions indexOptions = (DenseVectorFieldMapper.Int8HnswIndexOptions) denseVectorFieldMapper
.fieldType()
.getIndexOptions();
assertNull(indexOptions.rescoreVector);
}
}

public void testDims() {
{
Exception e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE;
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.OVERSAMPLE_LIMIT;
import static org.elasticsearch.search.SearchService.DEFAULT_SIZE;
import static org.hamcrest.Matchers.containsString;
Expand Down Expand Up @@ -144,7 +145,7 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
fieldName,
k,
numCands,
randomRescoreVectorBuilder(),
isIndextypeBBQ() ? randomBBQRescoreVectorBuilder() : randomRescoreVectorBuilder(),
randomFloat()
);

Expand All @@ -161,6 +162,14 @@ protected KnnVectorQueryBuilder doCreateTestQueryBuilder() {
return queryBuilder;
}

private boolean isIndextypeBBQ() {
return indexType.equals("bbq_hnsw") || indexType.equals("bbq_flat");
}

protected RescoreVectorBuilder randomBBQRescoreVectorBuilder() {
return new RescoreVectorBuilder(randomBoolean() ? DEFAULT_OVERSAMPLE : randomFloatBetween(1.0f, 10.0f, false));
}

protected RescoreVectorBuilder randomRescoreVectorBuilder() {
if (randomBoolean()) {
return null;
Expand Down
Loading