diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b6dfd57c0..74f891cc59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [CONTRIBUTING guide](./CONTRIBUTING.md#Changelog) for instructions on how to add changelog entries. ## [Unreleased 3.3](https://github.com/opensearch-project/k-NN/compare/main...HEAD) + +### Features +* Integrates Lucene's better binary quantization [#2838](https://github.com/opensearch-project/k-NN/pull/2838) + ### Refactoring * Refactored the KNN Stat files for better readability. diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index 4188232485..7bde8b5e96 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -318,6 +318,12 @@ testClusters { } } + if (knn_bwc_version.startsWith("1.") || knn_bwc_version.startsWith("2.") || knn_bwc_version.startsWith("3.0")) { + filter { + excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneBBQ" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java index 707c005c0f..b03d26bb0b 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.util.Collections; -import java.io.IOException; import java.util.List; import java.util.Map; @@ -51,6 +50,7 @@ import static org.opensearch.knn.common.KNNConstants.MODE_PARAMETER; import static org.opensearch.knn.common.KNNConstants.NAME; import static org.opensearch.knn.common.KNNConstants.PARAMETERS; +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; public class IndexingIT extends AbstractRestartUpgradeTestCase { private static final String TEST_FIELD = "test-field"; @@ -659,4 +659,70 @@ public void testRandomRotationBWC() throws Exception { deleteKNNIndex(newIndex); } } + + public void testKNNIndexLuceneBBQ() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + + // Skip test if BBQ encoder is not supported in the old cluster version + if (isBBQEncoderSupported(getBWCVersion()) == false) { + logger.info("Skipping testKNNIndexLuceneBBQ as BBQ encoder is not supported in version: {}", getBWCVersion()); + return; + } + + int k = 4; + int dimension = 2; + + if (isRunningAgainstOldCluster()) { + String mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(TEST_FIELD) + .field(VECTOR_TYPE, KNN_VECTOR) + .field(DIMENSION, dimension) + .startObject(KNN_METHOD) + .field(NAME, METHOD_HNSW) + .field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.INNER_PRODUCT.getValue()) + .field(KNN_ENGINE, LUCENE_NAME) + .startObject(PARAMETERS) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_BBQ) + .endObject() + .field(METHOD_PARAMETER_EF_CONSTRUCTION, 256) + .field(METHOD_PARAMETER_M, 16) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + createKnnIndex(testIndex, getKNNDefaultIndexSettings(), mapping); + + Float[] vector1 = { -10.6f, 25.48f }; + Float[] vector2 = { -10.8f, 25.48f }; + Float[] vector3 = { -11.0f, 25.48f }; + Float[] vector4 = { -11.2f, 25.48f }; + addKnnDoc(testIndex, "1", TEST_FIELD, vector1); + addKnnDoc(testIndex, "2", TEST_FIELD, vector2); + addKnnDoc(testIndex, "3", TEST_FIELD, vector3); + addKnnDoc(testIndex, "4", TEST_FIELD, vector4); + + float[] queryVector = { -10.5f, 25.48f }; + Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k); + List results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD); + assertEquals(k, results.size()); + for (int i = 0; i < k; i++) { + assertEquals(k - i, Integer.parseInt(results.get(i).getDocId())); + } + } else { + float[] queryVector = { -10.5f, 25.48f }; + Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k); + List results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD); + assertEquals(k, results.size()); + for (int i = 0; i < k; i++) { + assertEquals(k - i, Integer.parseInt(results.get(i).getDocId())); + } + deleteKNNIndex(testIndex); + } + } + } diff --git a/src/main/java/org/opensearch/knn/common/KNNConstants.java b/src/main/java/org/opensearch/knn/common/KNNConstants.java index c137651fdb..bf7106ba28 100644 --- a/src/main/java/org/opensearch/knn/common/KNNConstants.java +++ b/src/main/java/org/opensearch/knn/common/KNNConstants.java @@ -102,6 +102,9 @@ public class KNNConstants { public static final double MAXIMUM_CONFIDENCE_INTERVAL = 1.0; public static final String LUCENE_SQ_BITS = "bits"; public static final int LUCENE_SQ_DEFAULT_BITS = 7; + public static final String ENCODER_BBQ = "binary"; + public static final int LUCENE_BBQ_DEFAULT_BITS = 1; + public static final String LUCENE_BBQ_BITS = "bits"; // nmslib specific constants @Deprecated(since = "2.19.0", forRemoval = true) diff --git a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java index faf044399d..059af4f830 100644 --- a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java @@ -16,6 +16,7 @@ import org.opensearch.knn.index.KNNSettings; import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsFormat; import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory; +import org.opensearch.knn.index.codec.params.KNNBBQVectorsFormatParams; import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams; import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams; import org.opensearch.knn.index.engine.KNNEngine; @@ -44,7 +45,8 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor private final int defaultBeamWidth; private final Supplier defaultFormatSupplier; private final Function vectorsFormatSupplier; - private Function scalarQuantizedVectorsFormatSupplier; + private final Function scalarQuantizedVectorsFormatSupplier; + private final Function bbqVectorsFormatSupplier; private final NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory; private static final String MAX_CONNECTIONS = "max_connections"; private static final String BEAM_WIDTH = "beam_width"; @@ -56,7 +58,7 @@ public BasePerFieldKnnVectorsFormat( Supplier defaultFormatSupplier, Function vectorsFormatSupplier ) { - this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null); + this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null, null); } public BasePerFieldKnnVectorsFormat( @@ -65,7 +67,8 @@ public BasePerFieldKnnVectorsFormat( int defaultBeamWidth, Supplier defaultFormatSupplier, Function vectorsFormatSupplier, - Function scalarQuantizedVectorsFormatSupplier + Function scalarQuantizedVectorsFormatSupplier, + Function bbqVectorsFormatSupplier ) { this( mapperService, @@ -74,6 +77,7 @@ public BasePerFieldKnnVectorsFormat( defaultFormatSupplier, vectorsFormatSupplier, scalarQuantizedVectorsFormatSupplier, + bbqVectorsFormatSupplier, new NativeIndexBuildStrategyFactory() ); } @@ -110,6 +114,19 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { if (engine == KNNEngine.LUCENE) { if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) { + KNNBBQVectorsFormatParams bbqParams = new KNNBBQVectorsFormatParams(params, defaultMaxConnections, defaultBeamWidth); + if (bbqParams.validate(params)) { + log.debug( + "Initialize KNN vector format for field [{}] with binary quantization, params [{}] = \"{}\", [{}] = \"{}\"", + field, + MAX_CONNECTIONS, + bbqParams.getMaxConnections(), + BEAM_WIDTH, + bbqParams.getBeamWidth() + ); + return bbqVectorsFormatSupplier.apply(bbqParams); + } + KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams( params, defaultMaxConnections, @@ -117,7 +134,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { ); if (knnScalarQuantizedVectorsFormatParams.validate(params)) { log.debug( - "Initialize KNN vector format for field [{}] with params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"", + "Initialize KNN vector format for field [{}] with scalar quantization, params [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\", [{}] = \"{}\"", field, MAX_CONNECTIONS, knnScalarQuantizedVectorsFormatParams.getMaxConnections(), diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java index 82064b1131..ecbb99f234 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java @@ -5,6 +5,7 @@ package org.opensearch.knn.index.codec.KNN9120Codec; +import org.apache.lucene.codecs.lucene102.Lucene102HnswBinaryQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.opensearch.common.collect.Tuple; @@ -76,6 +77,15 @@ public KNN9120PerFieldKnnVectorsFormat( mergeThreadCountAndExecutorService.v2() ); }, + knnBBQVectorsFormatParams -> { + final Tuple mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService(); + return new Lucene102HnswBinaryQuantizedVectorsFormat( + knnBBQVectorsFormatParams.getMaxConnections(), + knnBBQVectorsFormatParams.getBeamWidth(), + mergeThreadCountAndExecutorService.v1(), + mergeThreadCountAndExecutorService.v2() + ); + }, nativeIndexBuildStrategyFactory ); } diff --git a/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java index 39a3c5bc8d..97fb084c18 100644 --- a/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormat.java @@ -5,6 +5,7 @@ package org.opensearch.knn.index.codec.backward_codecs.KNN990Codec; +import org.apache.lucene.codecs.lucene102.Lucene102HnswBinaryQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.opensearch.index.mapper.MapperService; @@ -37,6 +38,12 @@ public KNN990PerFieldKnnVectorsFormat(final Optional mapperServic knnScalarQuantizedVectorsFormatParams.isCompressFlag(), knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(), null + ), + knnBBQVectorsFormatParams -> new Lucene102HnswBinaryQuantizedVectorsFormat( + knnBBQVectorsFormatParams.getMaxConnections(), + knnBBQVectorsFormatParams.getBeamWidth(), + NUM_MERGE_WORKERS, + null ) ); } diff --git a/src/main/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatParams.java b/src/main/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatParams.java new file mode 100644 index 0000000000..eb0d2a4577 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatParams.java @@ -0,0 +1,37 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.params; + +import org.opensearch.knn.index.engine.MethodComponentContext; +import java.util.Map; +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; +import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; + +/** + * Class provides params for Lucene102HnswBinaryQuantizedVectorsFormat + */ +public class KNNBBQVectorsFormatParams extends KNNVectorsFormatParams { + + public KNNBBQVectorsFormatParams(Map params, int defaultMaxConnections, int defaultBeamWidth) { + super(params, defaultMaxConnections, defaultBeamWidth); + MethodComponentContext encoderMethodComponentContext = (MethodComponentContext) params.get(METHOD_ENCODER_PARAMETER); + Map bbqEncoderParams = encoderMethodComponentContext.getParameters(); + } + + @Override + public boolean validate(Map params) { + if (params.get(METHOD_ENCODER_PARAMETER) == null) { + return false; + } + + if ((params.get(METHOD_ENCODER_PARAMETER) instanceof MethodComponentContext) == false) { + return false; + } + + MethodComponentContext encoderMethodComponentContext = (MethodComponentContext) params.get(METHOD_ENCODER_PARAMETER); + return ENCODER_BBQ.equals(encoderMethodComponentContext.getName()); + } +} diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoder.java b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoder.java new file mode 100644 index 0000000000..b498ec9a44 --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoder.java @@ -0,0 +1,46 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.engine.lucene; + +import com.google.common.collect.ImmutableSet; +import org.opensearch.knn.index.VectorDataType; +import org.opensearch.knn.index.engine.*; +import org.opensearch.knn.index.mapper.CompressionLevel; + +import java.util.List; +import java.util.Set; + +import static org.opensearch.knn.common.KNNConstants.*; + +/** + * Lucene BBQ (Better Binary Quantization) encoder + */ +public class LuceneBBQEncoder implements Encoder { + private static final Set SUPPORTED_DATA_TYPES = ImmutableSet.of(VectorDataType.FLOAT); + + private final static List LUCENE_BBQ_BITS_SUPPORTED = List.of(1); + + private final static MethodComponent METHOD_COMPONENT = MethodComponent.Builder.builder(ENCODER_BBQ) + .addSupportedDataTypes(SUPPORTED_DATA_TYPES) + .addParameter( + LUCENE_BBQ_BITS, + new Parameter.IntegerParameter(LUCENE_BBQ_BITS, LUCENE_BBQ_DEFAULT_BITS, (v, context) -> LUCENE_BBQ_BITS_SUPPORTED.contains(v)) + ) + .build(); + + @Override + public MethodComponent getMethodComponent() { + return METHOD_COMPONENT; + } + + @Override + public CompressionLevel calculateCompressionLevel( + MethodComponentContext methodComponentContext, + KNNMethodConfigContext knnMethodConfigContext + ) { + return CompressionLevel.x32; + } +} diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneHNSWMethod.java b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneHNSWMethod.java index 701f797683..2255be9bb9 100644 --- a/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneHNSWMethod.java +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneHNSWMethod.java @@ -45,7 +45,8 @@ public class LuceneHNSWMethod extends AbstractKNNMethod { ); final static Encoder SQ_ENCODER = new LuceneSQEncoder(); - final static Map SUPPORTED_ENCODERS = Map.of(SQ_ENCODER.getName(), SQ_ENCODER); + final static Encoder BBQ_ENCODER = new LuceneBBQEncoder(); + final static Map SUPPORTED_ENCODERS = Map.of(SQ_ENCODER.getName(), SQ_ENCODER, BBQ_ENCODER.getName(), BBQ_ENCODER); final static MethodComponent HNSW_METHOD_COMPONENT = initMethodComponent(); diff --git a/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolver.java b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolver.java index 6546d9f933..64e74be1e2 100644 --- a/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolver.java +++ b/src/main/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolver.java @@ -24,11 +24,14 @@ import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; import static org.opensearch.knn.index.engine.lucene.LuceneHNSWMethod.HNSW_METHOD_COMPONENT; -import static org.opensearch.knn.index.engine.lucene.LuceneHNSWMethod.SQ_ENCODER; public class LuceneMethodResolver extends AbstractMethodResolver { - private static final Set SUPPORTED_COMPRESSION_LEVELS = Set.of(CompressionLevel.x1, CompressionLevel.x4); + private static final Set SUPPORTED_COMPRESSION_LEVELS = Set.of( + CompressionLevel.x1, + CompressionLevel.x4, + CompressionLevel.x32 + ); @Override public ResolvedMethodContext resolveMethod( @@ -69,10 +72,21 @@ protected void resolveEncoder(KNNMethodContext resolvedKNNMethodContext, KNNMeth } MethodComponentContext methodComponentContext = resolvedKNNMethodContext.getMethodComponentContext(); - MethodComponentContext encoderComponentContext = new MethodComponentContext(SQ_ENCODER.getName(), new HashMap<>()); + + String encoderName; + MethodComponent encoderComponent; + if (resolvedCompressionLevel == CompressionLevel.x32) { + encoderName = LuceneHNSWMethod.BBQ_ENCODER.getName(); + encoderComponent = LuceneHNSWMethod.BBQ_ENCODER.getMethodComponent(); + } else { + encoderName = LuceneHNSWMethod.SQ_ENCODER.getName(); + encoderComponent = LuceneHNSWMethod.SQ_ENCODER.getMethodComponent(); + } + + MethodComponentContext encoderComponentContext = new MethodComponentContext(encoderName, new HashMap<>()); Map resolvedParams = MethodComponent.getParameterMapWithDefaultsAdded( encoderComponentContext, - SQ_ENCODER.getMethodComponent(), + encoderComponent, knnMethodConfigContext ); encoderComponentContext.getParameters().putAll(resolvedParams); diff --git a/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java b/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java index dc312a232a..d04eb8014d 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java +++ b/src/main/java/org/opensearch/knn/index/mapper/CompressionLevel.java @@ -10,6 +10,7 @@ import lombok.Getter; import org.opensearch.Version; import org.opensearch.core.common.Strings; +import org.opensearch.knn.index.engine.KNNEngine; import org.opensearch.knn.index.query.rescore.RescoreContext; import java.util.Collections; @@ -108,12 +109,23 @@ public static boolean isConfigured(CompressionLevel compressionLevel) { * is invalid. */ public RescoreContext getDefaultRescoreContext(Mode mode, int dimension, Version version) { + return getDefaultRescoreContext(mode, dimension, version, null); + } + + @VisibleForTesting + RescoreContext getDefaultRescoreContext(Mode mode, int dimension) { + return getDefaultRescoreContext(mode, dimension, Version.CURRENT); + } + + // Add new method signature with KNNEngine parameter + public RescoreContext getDefaultRescoreContext(Mode mode, int dimension, Version version, KNNEngine engine) { // TODO move this to separate class called resolver to resolve rescore context if (modesForRescore.contains(mode)) { if (this == x4 && version.before(Version.V_3_1_0)) { // For index created before 3.1, context was always null and mode is empty return null; } + // Adjust RescoreContext based on dimension except for 4x compression if (this != x4 && dimension <= RescoreContext.DIMENSION_THRESHOLD) { // For dimensions <= 1000, return a RescoreContext with 5.0f oversample factor @@ -124,12 +136,22 @@ public RescoreContext getDefaultRescoreContext(Mode mode, int dimension, Version } return defaultRescoreContext; } - return null; - } - @VisibleForTesting - RescoreContext getDefaultRescoreContext(Mode mode, int dimension) { - return getDefaultRescoreContext(mode, dimension, Version.CURRENT); + // Special handling for Lucene BBQ (x32 compression) + if (this == x32 && engine == KNNEngine.LUCENE && version.onOrAfter(Version.V_3_3_0)) { + if (dimension <= RescoreContext.DIMENSION_THRESHOLD) { + return RescoreContext.builder() + .oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_BELOW_DIMENSION_THRESHOLD) + .userProvided(false) + .build(); + } else { + return RescoreContext.builder() + .oversampleFactor(RescoreContext.OVERSAMPLE_FACTOR_ABOVE_DIMENSION_THRESHOLD) + .userProvided(false) + .build(); + } + } + return null; } } diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldType.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldType.java index 8bd5d3ab11..3229bf3367 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldType.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldType.java @@ -20,6 +20,7 @@ import org.opensearch.index.query.QueryShardException; import org.opensearch.knn.index.KNNVectorIndexFieldData; import org.opensearch.knn.index.VectorDataType; +import org.opensearch.knn.index.engine.KNNEngine; import org.opensearch.knn.index.engine.KNNMethodContext; import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.knn.indices.ModelDao; @@ -125,7 +126,14 @@ public RescoreContext resolveRescoreContext(RescoreContext userProvidedContext) int dimension = knnMappingConfig.getDimension(); CompressionLevel compressionLevel = knnMappingConfig.getCompressionLevel(); Mode mode = knnMappingConfig.getMode(); - return compressionLevel.getDefaultRescoreContext(mode, dimension, knnMappingConfig.getIndexCreatedVersion()); + + KNNEngine engine = null; + Optional knnMethodContext = knnMappingConfig.getKnnMethodContext(); + if (knnMethodContext.isPresent()) { + engine = knnMethodContext.get().getKnnEngine(); + } + + return compressionLevel.getDefaultRescoreContext(mode, dimension, knnMappingConfig.getIndexCreatedVersion(), engine); } /** diff --git a/src/main/java/org/opensearch/knn/index/query/rescore/RescoreContext.java b/src/main/java/org/opensearch/knn/index/query/rescore/RescoreContext.java index 4e89b1b04f..72babec4ea 100644 --- a/src/main/java/org/opensearch/knn/index/query/rescore/RescoreContext.java +++ b/src/main/java/org/opensearch/knn/index/query/rescore/RescoreContext.java @@ -23,6 +23,7 @@ public final class RescoreContext { public static final int MAX_FIRST_PASS_RESULTS = 10000; public static final int DIMENSION_THRESHOLD = 1000; public static final float OVERSAMPLE_FACTOR_BELOW_DIMENSION_THRESHOLD = 5.0f; + public static final float OVERSAMPLE_FACTOR_ABOVE_DIMENSION_THRESHOLD = 3.0f; // Dimension thresholds for adjusting oversample factor public static final int DIMENSION_THRESHOLD_1000 = 1000; diff --git a/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java b/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java index 2ccd13f451..e10aa1e4b1 100644 --- a/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java +++ b/src/test/java/org/opensearch/knn/index/LuceneEngineIT.java @@ -35,6 +35,7 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; import static org.opensearch.knn.common.KNNConstants.ENCODER_SQ; import static org.opensearch.knn.common.KNNConstants.LUCENE_SQ_BITS; import static org.opensearch.knn.common.KNNConstants.LUCENE_SQ_CONFIDENCE_INTERVAL; @@ -747,6 +748,124 @@ private void createKnnIndexMappingWithLuceneEngineAndSQEncoder( createKnnIndex(INDEX_NAME, mapping); } + @SneakyThrows + public void testBBQ_withInvalidParams_thenThrowException() { + // Use "byte" data_type with bbq encoder which throws an exception + expectThrows( + ResponseException.class, + () -> createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.L2, VectorDataType.BYTE) + ); + } + + @SneakyThrows + public void testAddDocWithBBQEncoder() { + createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.L2, VectorDataType.FLOAT); + Float[] vector = new Float[] { 2.0f, 4.5f, 6.5f }; + addKnnDoc(INDEX_NAME, DOC_ID, FIELD_NAME, vector); + + refreshIndex(INDEX_NAME); + assertEquals(1, getDocCount(INDEX_NAME)); + } + + @SneakyThrows + public void testUpdateDocWithBBQEncoder() { + createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.INNER_PRODUCT, VectorDataType.FLOAT); + Float[] vector = { 6.0f, 6.0f, 7.0f }; + addKnnDoc(INDEX_NAME, DOC_ID, FIELD_NAME, vector); + + Float[] updatedVector = { 8.0f, 8.0f, 8.0f }; + updateKnnDoc(INDEX_NAME, DOC_ID, FIELD_NAME, updatedVector); + + refreshIndex(INDEX_NAME); + assertEquals(1, getDocCount(INDEX_NAME)); + } + + @SneakyThrows + public void testDeleteDocWithBBQEncoder() { + createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.INNER_PRODUCT, VectorDataType.FLOAT); + Float[] vector = { 6.0f, 6.0f, 7.0f }; + addKnnDoc(INDEX_NAME, DOC_ID, FIELD_NAME, vector); + + deleteKnnDoc(INDEX_NAME, DOC_ID); + + refreshIndex(INDEX_NAME); + assertEquals(0, getDocCount(INDEX_NAME)); + } + + @SneakyThrows + public void testIndexingAndQueryingWithBBQEncoder() { + createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.INNER_PRODUCT, VectorDataType.FLOAT); + + int numDocs = 10; + for (int i = 0; i < numDocs; i++) { + float[] indexVector = new float[DIMENSION]; + Arrays.fill(indexVector, (float) i); + addKnnDocWithAttributes(INDEX_NAME, Integer.toString(i), FIELD_NAME, indexVector, ImmutableMap.of("rating", String.valueOf(i))); + } + + refreshIndex(INDEX_NAME); + assertEquals(numDocs, getDocCount(INDEX_NAME)); + + float[] queryVector = new float[DIMENSION]; + Arrays.fill(queryVector, (float) numDocs); + int k = 10; + + Response searchResponse = searchKNNIndex(INDEX_NAME, new KNNQueryBuilder(FIELD_NAME, queryVector, k), k); + List results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), FIELD_NAME); + assertEquals(k, results.size()); + for (int i = 0; i < k; i++) { + assertEquals(numDocs - i - 1, Integer.parseInt(results.get(i).getDocId())); + } + } + + public void testQueryWithFilterUsingBBQEncoder() throws Exception { + createKnnIndexMappingWithLuceneEngineAndBBQEncoder(DIMENSION, SpaceType.INNER_PRODUCT, VectorDataType.FLOAT); + + addKnnDocWithAttributes( + DOC_ID, + new float[] { 6.0f, 7.9f, 3.1f }, + ImmutableMap.of(COLOR_FIELD_NAME, "red", TASTE_FIELD_NAME, "sweet") + ); + addKnnDocWithAttributes(DOC_ID_2, new float[] { 3.2f, 2.1f, 4.8f }, ImmutableMap.of(COLOR_FIELD_NAME, "green")); + addKnnDocWithAttributes(DOC_ID_3, new float[] { 4.1f, 5.0f, 7.1f }, ImmutableMap.of(COLOR_FIELD_NAME, "red")); + + refreshIndex(INDEX_NAME); + + final float[] searchVector = { 6.0f, 6.0f, 4.1f }; + List expectedDocIdsKGreaterThanFilterResult = Arrays.asList(DOC_ID, DOC_ID_3); + List expectedDocIdsKLimitsFilterResult = Arrays.asList(DOC_ID); + validateQueryResultsWithFilters(searchVector, 5, 1, expectedDocIdsKGreaterThanFilterResult, expectedDocIdsKLimitsFilterResult); + } + + private void createKnnIndexMappingWithLuceneEngineAndBBQEncoder(int dimension, SpaceType spaceType, VectorDataType vectorDataType) + throws Exception { + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject(PROPERTIES_FIELD_NAME) + .startObject(FIELD_NAME) + .field(TYPE_FIELD_NAME, KNN_VECTOR_TYPE) + .field(DIMENSION_FIELD_NAME, dimension) + .field(VECTOR_DATA_TYPE_FIELD, vectorDataType) + .startObject(KNNConstants.KNN_METHOD) + .field(KNNConstants.NAME, METHOD_HNSW) + .field(KNNConstants.METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNNConstants.KNN_ENGINE, KNNEngine.LUCENE.getName()) + .startObject(KNNConstants.PARAMETERS) + .field(KNNConstants.METHOD_PARAMETER_M, M) + .field(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION, EF_CONSTRUCTION) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_BBQ) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject(); + + String mapping = builder.toString(); + createKnnIndex(INDEX_NAME, mapping); + } + private void createKnnIndexMappingWithLuceneEngine(int dimension, SpaceType spaceType, VectorDataType vectorDataType) throws Exception { XContentBuilder builder = XContentFactory.jsonBuilder() .startObject() diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormatTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormatTests.java new file mode 100644 index 0000000000..228d533213 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormatTests.java @@ -0,0 +1,86 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.KNN9120Codec; + +import junit.framework.TestCase; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.knn.index.codec.params.KNNBBQVectorsFormatParams; +import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams; +import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams; +import org.opensearch.knn.index.engine.MethodComponentContext; +import org.mockito.Mockito; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; +import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M; + +public class KNN9120PerFieldKnnVectorsFormatTests extends TestCase { + + private MapperService mockMapperService; + private KNN9120PerFieldKnnVectorsFormat format; + + @Override + public void setUp() throws Exception { + super.setUp(); + mockMapperService = Mockito.mock(MapperService.class); + format = new KNN9120PerFieldKnnVectorsFormat(Optional.of(mockMapperService)); + } + + public void testConstructor_whenCalled_thenFormatCreated() { + assertNotNull(format); + } + + public void testVectorsFormatParams_whenCalled_thenReturnValidParams() { + // Test regular format + Map regularParams = new HashMap<>(); + regularParams.put(METHOD_PARAMETER_M, 16); + regularParams.put(METHOD_PARAMETER_EF_CONSTRUCTION, 100); + + KNNVectorsFormatParams params = new KNNVectorsFormatParams(regularParams, 16, 100); + assertEquals(16, params.getMaxConnections()); + assertEquals(100, params.getBeamWidth()); + + // Test scalar quantized format + Map encoderParams = new HashMap<>(); + MethodComponentContext sqContext = new MethodComponentContext("sq", encoderParams); + + Map sqParams = new HashMap<>(); + sqParams.put(METHOD_ENCODER_PARAMETER, sqContext); + sqParams.put(METHOD_PARAMETER_M, 16); + sqParams.put(METHOD_PARAMETER_EF_CONSTRUCTION, 100); + + KNNScalarQuantizedVectorsFormatParams sqFormatParams = new KNNScalarQuantizedVectorsFormatParams(sqParams, 16, 100); + assertEquals(16, sqFormatParams.getMaxConnections()); + assertEquals(100, sqFormatParams.getBeamWidth()); + } + + public void testBBQParameterValidation_whenCalled_thenValidateCorrectly() { + Map encoderParams = new HashMap<>(); + MethodComponentContext bbqEncoder = new MethodComponentContext(ENCODER_BBQ, encoderParams); + + // Valid BBQ parameters + Map validParams = new HashMap<>(); + validParams.put(METHOD_ENCODER_PARAMETER, bbqEncoder); + validParams.put(METHOD_PARAMETER_M, 64); + validParams.put(METHOD_PARAMETER_EF_CONSTRUCTION, 256); + + KNNBBQVectorsFormatParams bbqParams = new KNNBBQVectorsFormatParams(validParams, 16, 100); + assertTrue(bbqParams.validate(validParams)); + assertEquals(64, bbqParams.getMaxConnections()); + assertEquals(256, bbqParams.getBeamWidth()); + + // Invalid parameters (SQ encoder instead of BBQ) + MethodComponentContext sqEncoder = new MethodComponentContext("sq", encoderParams); + Map invalidParams = new HashMap<>(); + invalidParams.put(METHOD_ENCODER_PARAMETER, sqEncoder); + assertFalse(bbqParams.validate(invalidParams)); + } +} diff --git a/src/test/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormatTests.java b/src/test/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormatTests.java new file mode 100644 index 0000000000..e5fbf42597 --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/codec/backward_codecs/KNN990Codec/KNN990PerFieldKnnVectorsFormatTests.java @@ -0,0 +1,60 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.backward_codecs.KNN990Codec; + +import junit.framework.TestCase; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.knn.index.codec.params.KNNBBQVectorsFormatParams; +import org.opensearch.knn.index.engine.MethodComponentContext; +import org.mockito.Mockito; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; +import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M; + +public class KNN990PerFieldKnnVectorsFormatTests extends TestCase { + + private MapperService mockMapperService; + private KNN990PerFieldKnnVectorsFormat format; + + @Override + public void setUp() throws Exception { + super.setUp(); + mockMapperService = Mockito.mock(MapperService.class); + format = new KNN990PerFieldKnnVectorsFormat(Optional.of(mockMapperService)); + } + + public void testConstructor_whenCalled_thenFormatCreated() { + assertNotNull(format); + } + + public void testBBQVectorsFormatParams_whenCalled_thenReturnValidParams() { + Map encoderParams = new HashMap<>(); + MethodComponentContext encoderContext = new MethodComponentContext(ENCODER_BBQ, encoderParams); + + Map params = new HashMap<>(); + params.put(METHOD_ENCODER_PARAMETER, encoderContext); + params.put(METHOD_PARAMETER_M, 16); + params.put(METHOD_PARAMETER_EF_CONSTRUCTION, 100); + + KNNBBQVectorsFormatParams bbqParams = new KNNBBQVectorsFormatParams(params, 16, 100); + + assertTrue(bbqParams.validate(params)); + assertEquals(16, bbqParams.getMaxConnections()); + assertEquals(100, bbqParams.getBeamWidth()); + + // Test validation with invalid encoder + MethodComponentContext invalidEncoder = new MethodComponentContext("invalid", encoderParams); + Map invalidParams = new HashMap<>(); + invalidParams.put(METHOD_ENCODER_PARAMETER, invalidEncoder); + assertFalse(bbqParams.validate(invalidParams)); + } +} diff --git a/src/test/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatTests.java b/src/test/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatTests.java new file mode 100644 index 0000000000..94d393f0ed --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/codec/params/KNNBBQVectorsFormatTests.java @@ -0,0 +1,106 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.codec.params; + +import junit.framework.TestCase; +import org.junit.Assert; +import org.opensearch.knn.index.engine.MethodComponentContext; + +import java.util.HashMap; +import java.util.Map; + +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; +import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M; + +public class KNNBBQVectorsFormatTests extends TestCase { + private static final int DEFAULT_MAX_CONNECTIONS = 16; + private static final int DEFAULT_BEAM_WIDTH = 100; + + public void testInitParams_whenCalled_thenReturnDefaultParams() { + KNNBBQVectorsFormatParams knnBBQVectorsFormatParams = new KNNBBQVectorsFormatParams( + getDefaultParamsForConstructor(), + DEFAULT_MAX_CONNECTIONS, + DEFAULT_BEAM_WIDTH + ); + + assertEquals(DEFAULT_MAX_CONNECTIONS, knnBBQVectorsFormatParams.getMaxConnections()); + assertEquals(DEFAULT_BEAM_WIDTH, knnBBQVectorsFormatParams.getBeamWidth()); + } + + public void testInitParams_whenCalled_thenReturnParams() { + int m = 64; + int efConstruction = 128; + + Map encoderParams = new HashMap<>(); + MethodComponentContext encoderComponentContext = new MethodComponentContext(ENCODER_BBQ, encoderParams); + + Map params = new HashMap<>(); + params.put(METHOD_ENCODER_PARAMETER, encoderComponentContext); + params.put(METHOD_PARAMETER_M, m); + params.put(METHOD_PARAMETER_EF_CONSTRUCTION, efConstruction); + + KNNBBQVectorsFormatParams knnBBQVectorsFormatParams = new KNNBBQVectorsFormatParams( + params, + DEFAULT_MAX_CONNECTIONS, + DEFAULT_BEAM_WIDTH + ); + + assertEquals(m, knnBBQVectorsFormatParams.getMaxConnections()); + assertEquals(efConstruction, knnBBQVectorsFormatParams.getBeamWidth()); + } + + public void testValidate_whenCalled_thenReturnTrue() { + Map params = getDefaultParamsForConstructor(); + KNNBBQVectorsFormatParams knnBBQVectorsFormatParams = new KNNBBQVectorsFormatParams( + params, + DEFAULT_MAX_CONNECTIONS, + DEFAULT_BEAM_WIDTH + ); + assertTrue(knnBBQVectorsFormatParams.validate(params)); + } + + public void testValidate_whenCalled_thenReturnFalse() { + KNNBBQVectorsFormatParams knnBBQVectorsFormatParams = new KNNBBQVectorsFormatParams( + getDefaultParamsForConstructor(), + DEFAULT_MAX_CONNECTIONS, + DEFAULT_BEAM_WIDTH + ); + Map params = new HashMap<>(); + + // Return false if encoder value is null + params.put(METHOD_ENCODER_PARAMETER, null); + assertFalse(knnBBQVectorsFormatParams.validate(params)); + + // Return false if encoder value is not an instance of MethodComponentContext + params.replace(METHOD_ENCODER_PARAMETER, "dummy string"); + assertFalse(knnBBQVectorsFormatParams.validate(params)); + + // Return false if encoder name is not "binary" + MethodComponentContext encoderComponentContext = new MethodComponentContext("invalid encoder name", new HashMap<>()); + params.replace(METHOD_ENCODER_PARAMETER, encoderComponentContext); + assertFalse(knnBBQVectorsFormatParams.validate(params)); + } + + public void testValidate_whenNullParams_thenThrowException() { + KNNBBQVectorsFormatParams knnBBQVectorsFormatParams = new KNNBBQVectorsFormatParams( + getDefaultParamsForConstructor(), + DEFAULT_MAX_CONNECTIONS, + DEFAULT_BEAM_WIDTH + ); + + Assert.assertThrows(NullPointerException.class, () -> knnBBQVectorsFormatParams.validate(null)); + } + + private Map getDefaultParamsForConstructor() { + Map encoderParams = new HashMap<>(); + MethodComponentContext encoderComponentContext = new MethodComponentContext(ENCODER_BBQ, encoderParams); + Map params = new HashMap<>(); + params.put(METHOD_ENCODER_PARAMETER, encoderComponentContext); + return params; + } +} diff --git a/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoderTests.java b/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoderTests.java new file mode 100644 index 0000000000..26ceddeddf --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneBBQEncoderTests.java @@ -0,0 +1,54 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.knn.index.engine.lucene; + +import junit.framework.TestCase; +import org.opensearch.knn.index.engine.MethodComponent; +import org.opensearch.knn.index.engine.MethodComponentContext; +import org.opensearch.knn.index.mapper.CompressionLevel; + +import java.util.HashMap; +import java.util.Map; + +import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; + +public class LuceneBBQEncoderTests extends TestCase { + + public void testGetMethodComponent_whenCalled_thenReturnConsistentComponent() { + LuceneBBQEncoder encoder1 = new LuceneBBQEncoder(); + LuceneBBQEncoder encoder2 = new LuceneBBQEncoder(); + + MethodComponent component1 = encoder1.getMethodComponent(); + MethodComponent component2 = encoder2.getMethodComponent(); + + assertNotNull(component1); + assertEquals(ENCODER_BBQ, component1.getName()); + assertSame(component1, component2); + } + + public void testCalculateCompressionLevel_whenCalled_thenReturnX32() { + LuceneBBQEncoder encoder = new LuceneBBQEncoder(); + + // Test with null parameters + assertEquals(CompressionLevel.x32, encoder.calculateCompressionLevel(null, null)); + + // Test with empty context + MethodComponentContext emptyContext = new MethodComponentContext(ENCODER_BBQ, new HashMap<>()); + assertEquals(CompressionLevel.x32, encoder.calculateCompressionLevel(emptyContext, null)); + + // Test with populated context + Map params = new HashMap<>(); + params.put("param1", "value1"); + params.put("param2", 42); + MethodComponentContext populatedContext = new MethodComponentContext(ENCODER_BBQ, params); + assertEquals(CompressionLevel.x32, encoder.calculateCompressionLevel(populatedContext, null)); + } + + public void testGetName_whenCalled_thenReturnEncoderBBQ() { + LuceneBBQEncoder encoder = new LuceneBBQEncoder(); + assertEquals(ENCODER_BBQ, encoder.getName()); + } +} diff --git a/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolverTests.java b/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolverTests.java index 833d831354..a6ad030985 100644 --- a/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolverTests.java +++ b/src/test/java/org/opensearch/knn/index/engine/lucene/LuceneMethodResolverTests.java @@ -178,14 +178,14 @@ public void testResolveMethod_whenInvalid_thenThrow() { ) ); - // Invalid compression + // Changed from 32x to 16x, Lucene 32x compression was added expectThrows( ValidationException.class, () -> TEST_RESOLVER.resolveMethod( null, KNNMethodConfigContext.builder() .vectorDataType(VectorDataType.FLOAT) - .compressionLevel(CompressionLevel.x32) + .compressionLevel(CompressionLevel.x16) .versionCreated(Version.CURRENT) .build(), false, diff --git a/src/test/java/org/opensearch/knn/index/mapper/CompressionLevelTests.java b/src/test/java/org/opensearch/knn/index/mapper/CompressionLevelTests.java index 9c11653e01..fe62b3c3d3 100644 --- a/src/test/java/org/opensearch/knn/index/mapper/CompressionLevelTests.java +++ b/src/test/java/org/opensearch/knn/index/mapper/CompressionLevelTests.java @@ -7,6 +7,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.knn.KNNTestCase; +import org.opensearch.knn.index.engine.KNNEngine; import org.opensearch.knn.index.query.rescore.RescoreContext; public class CompressionLevelTests extends KNNTestCase { @@ -116,5 +117,29 @@ public void testGetDefaultRescoreContext() { // NOT_CONFIGURED with dimension <= 1000 should return a RescoreContext with an oversample factor of 5.0f rescoreContext = CompressionLevel.NOT_CONFIGURED.getDefaultRescoreContext(mode, belowThresholdDimension); assertNull(rescoreContext); + + // These tests test the 32x compression techniques, ensure that the correct rescoring factor is set for FAISS ADC/RR and Lucene BBQ + rescoreContext = CompressionLevel.x32.getDefaultRescoreContext(mode, belowThresholdDimension, null, KNNEngine.LUCENE); + assertNotNull(rescoreContext); + assertEquals(5.0f, rescoreContext.getOversampleFactor(), 0.0f); + assertFalse(rescoreContext.isUserProvided()); + + // x32 with Lucene engine and dimension > 1000 + rescoreContext = CompressionLevel.x32.getDefaultRescoreContext(mode, aboveThresholdDimension, null, KNNEngine.LUCENE); + assertNotNull(rescoreContext); + assertEquals(3.0f, rescoreContext.getOversampleFactor(), 0.0f); + assertFalse(rescoreContext.isUserProvided()); + + // x32 with Faiss engine should return default behavior (not special Lucene handling) + rescoreContext = CompressionLevel.x32.getDefaultRescoreContext(mode, belowThresholdDimension, null, KNNEngine.FAISS); + assertNotNull(rescoreContext); + assertEquals(5.0f, rescoreContext.getOversampleFactor(), 0.0f); + assertFalse(rescoreContext.isUserProvided()); + + // x32 with null engine should return default behavior + rescoreContext = CompressionLevel.x32.getDefaultRescoreContext(mode, belowThresholdDimension, null, null); + assertNotNull(rescoreContext); + assertEquals(5.0f, rescoreContext.getOversampleFactor(), 0.0f); + assertFalse(rescoreContext.isUserProvided()); } } diff --git a/src/test/java/org/opensearch/knn/recall/RecallTestsIT.java b/src/test/java/org/opensearch/knn/recall/RecallTestsIT.java index 77ce654bfb..9346261934 100644 --- a/src/test/java/org/opensearch/knn/recall/RecallTestsIT.java +++ b/src/test/java/org/opensearch/knn/recall/RecallTestsIT.java @@ -33,6 +33,7 @@ import static org.opensearch.knn.common.KNNConstants.ENCODER_PARAMETER_PQ_M; import static org.opensearch.knn.common.KNNConstants.ENCODER_PQ; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.LUCENE_NAME; import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.KNN_METHOD; import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; @@ -668,6 +669,59 @@ public void testRecall_whenFaissHNSWPQFP32_thenRecallAbove50percent() { } } + /** + * { + * "properties": { + * { + * "type": "knn_vector", + * "dimension": {TEST_DIMENSION}, + * "method": { + * "name":"hnsw", + * "engine":"lucene", + * "space_type": "{SPACE_TYPE}", + * "parameters":{ + * "m":{HNSW_M}, + * "ef_construction": {HNSW_EF_CONSTRUCTION}, + * "encoder": { + * "name": "binary", + * } + * } + * } + * } + * } + * } + */ + @SneakyThrows + public void testRecall_whenBBQ_thenRecallAbove60percent() { + List spaceTypes = List.of(SpaceType.L2, SpaceType.COSINESIMIL); + for (SpaceType spaceType : spaceTypes) { + String indexName = createIndexName(KNNEngine.LUCENE, spaceType) + "_binary"; + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject(PROPERTIES_FIELD) + .startObject(TEST_FIELD_NAME) + .field(TYPE, TYPE_KNN_VECTOR) + .field(DIMENSION, TEST_DIMENSION) + .startObject(KNN_METHOD) + .field(METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNN_ENGINE, LUCENE_NAME) + .field(NAME, METHOD_HNSW) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_EF_CONSTRUCTION, HNSW_EF_CONSTRUCTION) + .field(METHOD_PARAMETER_M, HNSW_M) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, "binary") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject(); + createIndexAndIngestDocs(indexName, TEST_FIELD_NAME, getSettings(), builder.toString()); + assertRecall(indexName, spaceType, 0.4f); + } + } + @SneakyThrows private void assertRecall(String testIndexName, SpaceType spaceType, float acceptableRecallFromPerfect) { List> searchResults = bulkSearch(testIndexName, TEST_FIELD_NAME, QUERY_VECTORS, TEST_K); diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index ff78fef07f..755785dcbd 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -2516,6 +2516,21 @@ protected boolean isApproximateThresholdSupported(final Optional bwcVers return version.onOrAfter(Version.V_2_18_0); } + /** + * BBQ encoder is only supported on or after V_3_3_0 + */ + protected boolean isBBQEncoderSupported(final Optional bwcVersion) { + if (bwcVersion.isEmpty()) { + return false; + } + String versionString = bwcVersion.get(); + if (versionString.endsWith("-SNAPSHOT")) { + versionString = versionString.substring(0, versionString.length() - 9); + } + final Version version = Version.fromString(versionString); + return version.onOrAfter(Version.V_3_3_0); + } + /** * Remote Index Build settings are only supported on or after V_3_0_0 */