-
Notifications
You must be signed in to change notification settings - Fork 169
Integrating Lucene's Better Binary Quantization #2838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/lucene-bbq
Are you sure you want to change the base?
Changes from 38 commits
c181d5c
f8b81e0
b4c1d89
d925212
5d15c2f
3307128
f68cd43
011fe7d
ad6c48e
f5d8a4a
b4b44f3
862bcf3
b68d0f1
7541f90
9274a76
a863a3c
ab86018
302a67d
4a477f3
b92dfbc
d4c859c
95eff40
1206a98
6785f10
459eb6a
509403c
49b5db9
a9052cf
78a5509
70f2e0c
83a2aa2
02b0cd7
6812655
79cdd79
0abc8dc
2a85559
379520b
a8a31c7
aa29c46
6e783da
e3bc2dc
ca8cb09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,10 @@ All notable changes to this project are documented in this file. | |
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). See the [CONTRIBUTING guide](./CONTRIBUTING.md#Changelog) for instructions on how to add changelog entries. | ||
|
||
## [Unreleased 3.3](https://github.com/opensearch-project/k-NN/compare/main...HEAD) | ||
|
||
### Features | ||
* Integrated Lucene's better binary quantization [#2838](https://github.com/opensearch-project/k-NN/pull/2838) | ||
|
||
|
||
### Refactoring | ||
* Refactored the KNN Stat files for better readability. | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,6 +252,7 @@ testClusters { | |
knn_bwc_version.startsWith("2.15.")) { | ||
filter { | ||
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneQuantization" | ||
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexLuceneBBQ" | ||
adityamachiroutu marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
excludeTestsMatching "org.opensearch.knn.bwc.IndexingIT.testKNNIndexBinaryForceMerge" | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,6 @@ | |
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
|
@@ -51,6 +50,7 @@ | |
import static org.opensearch.knn.common.KNNConstants.MODE_PARAMETER; | ||
import static org.opensearch.knn.common.KNNConstants.NAME; | ||
import static org.opensearch.knn.common.KNNConstants.PARAMETERS; | ||
import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; | ||
|
||
public class IndexingIT extends AbstractRestartUpgradeTestCase { | ||
private static final String TEST_FIELD = "test-field"; | ||
|
@@ -64,6 +64,8 @@ public class IndexingIT extends AbstractRestartUpgradeTestCase { | |
private static final int NUM_DOCS = 10; | ||
private static int QUERY_COUNT = 0; | ||
|
||
private static final String ALGO = "hnsw"; | ||
|
||
|
||
// Default Legacy Field Mapping | ||
// space_type : "l2", engine : "nmslib", m : 16, ef_construction : 512 | ||
public void testKNNIndexDefaultLegacyFieldMapping() throws Exception { | ||
|
@@ -659,4 +661,75 @@ public void testRandomRotationBWC() throws Exception { | |
deleteKNNIndex(newIndex); | ||
} | ||
} | ||
|
||
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/k-NN/issues/2805") | ||
|
||
public void testKNNIndexLuceneBBQ() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to add the same condition to validate BWC version twice in if and else blocks, probably add it here after the cluster is green
|
||
int k = 4; | ||
int dimension = 2; | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
// Skip test if BBQ encoder is not supported in the old cluster version | ||
if (!isBBQEncoderSupported(getBWCVersion())) { | ||
logger.info("Skipping testKNNIndexLuceneBBQ as BBQ encoder is not supported in version: {}", getBWCVersion()); | ||
return; | ||
} | ||
|
||
String mapping = XContentFactory.jsonBuilder() | ||
.startObject() | ||
.startObject("properties") | ||
.startObject(TEST_FIELD) | ||
.field(VECTOR_TYPE, KNN_VECTOR) | ||
.field(DIMENSION, dimension) | ||
.startObject(KNN_METHOD) | ||
.field(NAME, METHOD_HNSW) | ||
.field(METHOD_PARAMETER_SPACE_TYPE, SpaceType.INNER_PRODUCT.getValue()) | ||
.field(KNN_ENGINE, LUCENE_NAME) | ||
.startObject(PARAMETERS) | ||
.startObject(METHOD_ENCODER_PARAMETER) | ||
.field(NAME, ENCODER_BBQ) | ||
.endObject() | ||
.field(METHOD_PARAMETER_EF_CONSTRUCTION, 256) | ||
.field(METHOD_PARAMETER_M, 16) | ||
.endObject() | ||
.endObject() | ||
.endObject() | ||
.endObject() | ||
.endObject() | ||
.toString(); | ||
createKnnIndex(testIndex, getKNNDefaultIndexSettings(), mapping); | ||
|
||
Float[] vector1 = { -10.6f, 25.48f }; | ||
Float[] vector2 = { -10.8f, 25.48f }; | ||
Float[] vector3 = { -11.0f, 25.48f }; | ||
Float[] vector4 = { -11.2f, 25.48f }; | ||
addKnnDoc(testIndex, "1", TEST_FIELD, vector1); | ||
addKnnDoc(testIndex, "2", TEST_FIELD, vector2); | ||
addKnnDoc(testIndex, "3", TEST_FIELD, vector3); | ||
addKnnDoc(testIndex, "4", TEST_FIELD, vector4); | ||
|
||
float[] queryVector = { -10.5f, 25.48f }; | ||
Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k); | ||
List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD); | ||
assertEquals(k, results.size()); | ||
for (int i = 0; i < k; i++) { | ||
assertEquals(k - i, Integer.parseInt(results.get(i).getDocId())); | ||
} | ||
} else { | ||
// Skip test if BBQ encoder is not supported in the old cluster version | ||
if (!isBBQEncoderSupported(getBWCVersion())) { | ||
logger.info("Skipping testKNNIndexLuceneBBQ validation as BBQ encoder is not supported in version: {}", getBWCVersion()); | ||
return; | ||
} | ||
float[] queryVector = { -10.5f, 25.48f }; | ||
Response searchResponse = searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, k), k); | ||
List<KNNResult> results = parseSearchResponse(EntityUtils.toString(searchResponse.getEntity()), TEST_FIELD); | ||
assertEquals(k, results.size()); | ||
for (int i = 0; i < k; i++) { | ||
assertEquals(k - i, Integer.parseInt(results.get(i).getDocId())); | ||
} | ||
deleteKNNIndex(testIndex); | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
import org.opensearch.knn.index.KNNSettings; | ||
import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsFormat; | ||
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory; | ||
import org.opensearch.knn.index.codec.params.KNNBBQVectorsFormatParams; | ||
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams; | ||
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams; | ||
import org.opensearch.knn.index.engine.KNNEngine; | ||
|
@@ -44,7 +45,8 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor | |
private final int defaultBeamWidth; | ||
private final Supplier<KnnVectorsFormat> defaultFormatSupplier; | ||
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier; | ||
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier; | ||
private final Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier; | ||
private final Function<KNNBBQVectorsFormatParams, KnnVectorsFormat> bbqVectorsFormatSupplier; | ||
private final NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory; | ||
private static final String MAX_CONNECTIONS = "max_connections"; | ||
private static final String BEAM_WIDTH = "beam_width"; | ||
|
@@ -56,7 +58,7 @@ public BasePerFieldKnnVectorsFormat( | |
Supplier<KnnVectorsFormat> defaultFormatSupplier, | ||
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier | ||
) { | ||
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null); | ||
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null, null); | ||
} | ||
|
||
public BasePerFieldKnnVectorsFormat( | ||
|
@@ -65,7 +67,8 @@ public BasePerFieldKnnVectorsFormat( | |
int defaultBeamWidth, | ||
Supplier<KnnVectorsFormat> defaultFormatSupplier, | ||
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier, | ||
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier | ||
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier, | ||
Function<KNNBBQVectorsFormatParams, KnnVectorsFormat> bbqVectorsFormatSupplier | ||
) { | ||
this( | ||
mapperService, | ||
|
@@ -74,6 +77,7 @@ public BasePerFieldKnnVectorsFormat( | |
defaultFormatSupplier, | ||
vectorsFormatSupplier, | ||
scalarQuantizedVectorsFormatSupplier, | ||
bbqVectorsFormatSupplier, | ||
new NativeIndexBuildStrategyFactory() | ||
); | ||
} | ||
|
@@ -110,6 +114,11 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { | |
|
||
if (engine == KNNEngine.LUCENE) { | ||
if (params != null && params.containsKey(METHOD_ENCODER_PARAMETER)) { | ||
KNNBBQVectorsFormatParams bbqParams = new KNNBBQVectorsFormatParams(params, defaultMaxConnections, defaultBeamWidth); | ||
if (bbqParams.validate(params)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please add debug log like the below? |
||
return bbqVectorsFormatSupplier.apply(bbqParams); | ||
} | ||
|
||
KNNScalarQuantizedVectorsFormatParams knnScalarQuantizedVectorsFormatParams = new KNNScalarQuantizedVectorsFormatParams( | ||
params, | ||
defaultMaxConnections, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
|
||
package org.opensearch.knn.index.codec.backward_codecs.KNN990Codec; | ||
|
||
import org.apache.lucene.codecs.lucene102.Lucene102HnswBinaryQuantizedVectorsFormat; | ||
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; | ||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; | ||
import org.opensearch.index.mapper.MapperService; | ||
|
@@ -37,6 +38,12 @@ public KNN990PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServic | |
knnScalarQuantizedVectorsFormatParams.isCompressFlag(), | ||
knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(), | ||
null | ||
), | ||
knnBBQVectorsFormatParams -> new Lucene102HnswBinaryQuantizedVectorsFormat( | ||
knnBBQVectorsFormatParams.getMaxConnections(), | ||
knnBBQVectorsFormatParams.getBeamWidth(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there no constructor parameter for bits like above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, Lucene does not have that constructor. |
||
NUM_MERGE_WORKERS, | ||
null | ||
) | ||
); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.params; | ||
|
||
import org.opensearch.knn.index.engine.MethodComponentContext; | ||
import java.util.Map; | ||
import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; | ||
import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; | ||
|
||
/** | ||
* Class provides params for Lucene102HnswBinaryQuantizedVectorsFormat | ||
*/ | ||
public class KNNBBQVectorsFormatParams extends KNNVectorsFormatParams { | ||
|
||
public KNNBBQVectorsFormatParams(Map<String, Object> params, int defaultMaxConnections, int defaultBeamWidth) { | ||
super(params, defaultMaxConnections, defaultBeamWidth); | ||
MethodComponentContext encoderMethodComponentContext = (MethodComponentContext) params.get(METHOD_ENCODER_PARAMETER); | ||
Map<String, Object> bbqEncoderParams = encoderMethodComponentContext.getParameters(); | ||
} | ||
|
||
@Override | ||
public boolean validate(Map<String, Object> params) { | ||
if (params.get(METHOD_ENCODER_PARAMETER) == null) { | ||
return false; | ||
} | ||
|
||
if (!(params.get(METHOD_ENCODER_PARAMETER) instanceof MethodComponentContext)) { | ||
|
||
return false; | ||
} | ||
|
||
MethodComponentContext encoderMethodComponentContext = (MethodComponentContext) params.get(METHOD_ENCODER_PARAMETER); | ||
return ENCODER_BBQ.equals(encoderMethodComponentContext.getName()); | ||
} | ||
|
||
/** | ||
* Check if BBQ is enabled | ||
* @return true if BBQ is enabled, false otherwise | ||
*/ | ||
public boolean isBBQEnabled() { | ||
|
||
// BBQ is enabled if this class is being used, which means the encoder parameter was validated | ||
return true; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.engine.lucene; | ||
|
||
import com.google.common.collect.ImmutableSet; | ||
import org.opensearch.knn.index.VectorDataType; | ||
import org.opensearch.knn.index.engine.Encoder; | ||
import org.opensearch.knn.index.engine.KNNMethodConfigContext; | ||
import org.opensearch.knn.index.engine.MethodComponent; | ||
import org.opensearch.knn.index.engine.MethodComponentContext; | ||
import org.opensearch.knn.index.mapper.CompressionLevel; | ||
|
||
import java.util.Set; | ||
|
||
import static org.opensearch.knn.common.KNNConstants.ENCODER_BBQ; | ||
|
||
/** | ||
* Lucene BBQ (Better Binary Quantization) encoder | ||
*/ | ||
public class LuceneBBQEncoder implements Encoder { | ||
private static final Set<VectorDataType> SUPPORTED_DATA_TYPES = ImmutableSet.of(VectorDataType.FLOAT); | ||
|
||
private final static MethodComponent METHOD_COMPONENT = MethodComponent.Builder.builder(ENCODER_BBQ) | ||
.addSupportedDataTypes(SUPPORTED_DATA_TYPES) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As discussed offline in the past, can you add support for bits parameter and set default to 1 bit (32x compression) such that in the future if Lucene supports 2 and 4 bits we can use this parameter. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the naming convention, pls keep it consistent with Faiss BQ |
||
.build(); | ||
|
||
@Override | ||
public MethodComponent getMethodComponent() { | ||
return METHOD_COMPONENT; | ||
} | ||
|
||
@Override | ||
public CompressionLevel calculateCompressionLevel( | ||
MethodComponentContext methodComponentContext, | ||
KNNMethodConfigContext knnMethodConfigContext | ||
) { | ||
return CompressionLevel.x32; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,11 +24,14 @@ | |
import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; | ||
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; | ||
import static org.opensearch.knn.index.engine.lucene.LuceneHNSWMethod.HNSW_METHOD_COMPONENT; | ||
import static org.opensearch.knn.index.engine.lucene.LuceneHNSWMethod.SQ_ENCODER; | ||
|
||
public class LuceneMethodResolver extends AbstractMethodResolver { | ||
|
||
private static final Set<CompressionLevel> SUPPORTED_COMPRESSION_LEVELS = Set.of(CompressionLevel.x1, CompressionLevel.x4); | ||
private static final Set<CompressionLevel> SUPPORTED_COMPRESSION_LEVELS = Set.of( | ||
CompressionLevel.x1, | ||
CompressionLevel.x4, | ||
CompressionLevel.x32 | ||
); | ||
|
||
@Override | ||
public ResolvedMethodContext resolveMethod( | ||
|
@@ -69,10 +72,18 @@ protected void resolveEncoder(KNNMethodContext resolvedKNNMethodContext, KNNMeth | |
} | ||
|
||
MethodComponentContext methodComponentContext = resolvedKNNMethodContext.getMethodComponentContext(); | ||
MethodComponentContext encoderComponentContext = new MethodComponentContext(SQ_ENCODER.getName(), new HashMap<>()); | ||
|
||
String encoderName = (resolvedCompressionLevel == CompressionLevel.x32) | ||
|
||
? LuceneHNSWMethod.BBQ_ENCODER.getName() | ||
: LuceneHNSWMethod.SQ_ENCODER.getName(); | ||
MethodComponent encoderComponent = (resolvedCompressionLevel == CompressionLevel.x32) | ||
? LuceneHNSWMethod.BBQ_ENCODER.getMethodComponent() | ||
: LuceneHNSWMethod.SQ_ENCODER.getMethodComponent(); | ||
|
||
MethodComponentContext encoderComponentContext = new MethodComponentContext(encoderName, new HashMap<>()); | ||
Map<String, Object> resolvedParams = MethodComponent.getParameterMapWithDefaultsAdded( | ||
encoderComponentContext, | ||
SQ_ENCODER.getMethodComponent(), | ||
encoderComponent, | ||
knnMethodConfigContext | ||
); | ||
encoderComponentContext.getParameters().putAll(resolvedParams); | ||
|
Uh oh!
There was an error while loading. Please reload this page.