feat: Add sparse vector query (#214)

spinscale · Alexander Reelsen · web-flow · commit 8e4477622112 · 2025-09-01T20:23:35.000+05:30
Sparse vector query is supported in Elasticsearch v8.15+.

---------

Co-authored-by: Alexander Reelsen &lt;alexander.reelsen@real-digital.de&gt;
diff --git a/src/index.d.ts b/src/index.d.ts
@@ -3768,6 +3768,96 @@ declare namespace esb {
         spanQry?: SpanQueryBase
     ): SpanFieldMaskingQuery;
 
+    /**
+     * The sparse vector query executes a query consisting of sparse vectors, such as built by a learned sparse retrieval model,
+     *
+     * NOTE: Only available in Elasticsearch v8.15+
+     */
+    export class SparseVectorQuery extends Query {
+        constructor(field?: string);
+
+        /**
+         * Sets the field to query
+         *
+         * @param {string} field the field for the query
+         * @returns {SparseVectorQuery}
+         */
+        field(field : string) : SparseVectorQuery;
+
+        /**
+         * Set model inference id
+         *
+         * @param {string} inferenceId The model inference ID
+         * @returns {SparseVectorQuery}
+         */
+        inferenceId(inferenceId : string) : SparseVectorQuery;
+
+        /**
+         * Sets the input query
+         *
+         * @param {string} query The input query
+         * @returns {SparseVectorQuery}
+         */
+        query(query : string) : SparseVectorQuery;
+
+        /**
+         * Set a query vector to the query to run. if you don't use inference
+         *
+         * @param {Object} queryVector
+         * @returns {SparseVectorQuery}
+         */
+        queryVector(queryVector : object) : SparseVectorQuery;
+
+        /**
+         * Enable pruning
+         *
+         * NOTE: Only available in Elasticsearch v9.0+
+         *
+         * @param {boolean} prune
+         * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+         */
+        prune(prune: boolean): SparseVectorQuery;
+
+        /**
+         * Set pruning config tokens_freq_ratio_threshold
+         *
+         * NOTE: Only available in Elasticsearch v9.0+
+         *
+         * @param {number} tokensFreqRatioThreshold
+         * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+         */
+        tokensFreqRatioThreshold(tokensFreqRatioThreshold : number) : SparseVectorQuery;
+
+        /**
+         * Set pruning config tokens_weight_threshold
+         *
+         * NOTE: Only available in Elasticsearch v9.0+
+         *
+         * @param {number} tokensWeightThreshold
+         * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+         */
+        tokensWeightThreshold(tokensWeightThreshold : number) : SparseVectorQuery;
+
+        /**
+         * Set pruning config only_score_pruned_tokens
+         *
+         * NOTE: Only available in Elasticsearch v9.0+
+         *
+         * @param {boolean} onlyScorePrunedTokens
+         * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+         */
+        onlyScorePrunedTokens(onlyScorePrunedTokens : boolean) : SparseVectorQuery;
+    }
+
+    /**
+     * Factory function to instantiate a new SparseVectorQuery object.
+     *
+     * @returns {SparseVectorQuery}
+     */
+    export function sparseVectorQuery(
+        field? : string
+    ) : SparseVectorQuery;
+
     /**
      * Knn performs k-nearest neighbor (KNN) searches.
      * This class allows configuring the KNN search with various parameters such as field, query vector,
diff --git a/src/index.js b/src/index.js
@@ -89,7 +89,8 @@ const {
         SpanContainingQuery,
         SpanWithinQuery,
         SpanFieldMaskingQuery
-    }
+    },
+    vectorQueries: { SparseVectorQuery }
 } = require('./queries');
 
 const {
@@ -345,6 +346,9 @@ exports.spanWithinQuery = constructorWrapper(SpanWithinQuery);
 exports.SpanFieldMaskingQuery = SpanFieldMaskingQuery;
 exports.spanFieldMaskingQuery = constructorWrapper(SpanFieldMaskingQuery);
 
+exports.SparseVectorQuery = SparseVectorQuery;
+exports.sparseVectorQuery = constructorWrapper(SparseVectorQuery);
+
 /* ============ ============ ============ */
 /* ======== KNN ======== */
 /* ============ ============ ============ */
diff --git a/src/queries/index.js b/src/queries/index.js
@@ -16,3 +16,5 @@ exports.geoQueries = require('./geo-queries');
 exports.specializedQueries = require('./specialized-queries');
 
 exports.spanQueries = require('./span-queries');
+
+exports.vectorQueries = require('./vector-queries');
diff --git a/src/queries/vector-queries/index.js b/src/queries/vector-queries/index.js
@@ -0,0 +1,3 @@
+'use strict';
+
+exports.SparseVectorQuery = require('./sparse-vector-query');
diff --git a/src/queries/vector-queries/sparse-vector-query.js b/src/queries/vector-queries/sparse-vector-query.js
@@ -0,0 +1,130 @@
+'use strict';
+
+const { Query } = require('../../core');
+
+/**
+ * The sparse vector query executes a query consisting of sparse vectors, such as built by a learned sparse retrieval model,
+ * NOTE: Only available in Elasticsearch v8.15+
+ *
+ * [Elasticsearch reference](https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-sparse-vector-query)
+ *
+ * @example
+ * const qry = esb.sparseVector().field('ml_tokens').inferenceId('model_id').query('my query');
+ *
+ * @extends Query
+ */
+class SparseVectorQuery extends Query {
+    // eslint-disable-next-line require-jsdoc
+    constructor() {
+        super('sparse_vector');
+    }
+
+    /**
+     * Sets the field to query
+     *
+     * @param {string} field the field for the query
+     * @returns {SparseVectorQuery}
+     */
+    field(field) {
+        this._queryOpts.field = field;
+        return this;
+    }
+
+    /**
+     * Set model inference id
+     *
+     * @param {string} inferenceId The model inference ID
+     * @returns {SparseVectorQuery}
+     */
+    inferenceId(inferenceId) {
+        this._queryOpts.inference_id = inferenceId;
+        return this;
+    }
+
+    /**
+     * Sets the input query.
+     * You should set either query or query vector, but not both
+     *
+     * @param {string} query The input query
+     * @returns {SparseVectorQuery}
+     */
+    query(query) {
+        this._queryOpts.query = query;
+        return this;
+    }
+
+    /**
+     * Set a query vector to the query to run. if you don't use inference
+     * You should set either query or query vector, but not both
+     *
+     * @param {Object} queryVector
+     * @returns {SparseVectorQuery}
+     */
+    queryVector(queryVector) {
+        this._queryOpts.query_vector = queryVector;
+        return this;
+    }
+
+    /**
+     * Enable pruning
+     *
+     * NOTE: Only available in Elasticsearch v9.0+
+     *
+     * @param {boolean} prune
+     * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+     */
+    prune(prune) {
+        this._queryOpts.prune = prune;
+        return this;
+    }
+
+    /**
+     * Set pruning config tokens_freq_ratio_threshold
+     *
+     * NOTE: Only available in Elasticsearch v9.0+
+     *
+     * @param {number} tokensFreqRatioThreshold
+     * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+     */
+    tokensFreqRatioThreshold(tokensFreqRatioThreshold) {
+        if (!this._queryOpts.pruning_config) {
+            this._queryOpts.pruning_config = {};
+        }
+        this._queryOpts.pruning_config.tokens_freq_ratio_threshold = tokensFreqRatioThreshold;
+        return this;
+    }
+
+    /**
+     * Set pruning config tokens_weight_threshold
+     *
+     * NOTE: Only available in Elasticsearch v9.0+
+     *
+     * @param {number} tokensWeightThreshold
+     * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+     */
+    tokensWeightThreshold(tokensWeightThreshold) {
+        if (!this._queryOpts.pruning_config) {
+            this._queryOpts.pruning_config = {};
+        }
+        this._queryOpts.pruning_config.tokens_weight_threshold = tokensWeightThreshold;
+        return this;
+    }
+
+    /**
+     * Set pruning config only_score_pruned_tokens
+     *
+     * NOTE: Only available in Elasticsearch v9.0+
+     *
+     * @param {boolean} onlyScorePrunedTokens
+     * @returns {SparseVectorQuery} returns `this` so that calls can be chained.
+     */
+    onlyScorePrunedTokens(onlyScorePrunedTokens) {
+        if (!this._queryOpts.pruning_config) {
+            this._queryOpts.pruning_config = {};
+        }
+        this._queryOpts.pruning_config.only_score_pruned_tokens = onlyScorePrunedTokens;
+        return this;
+    }
+}
+
+module.exports = SparseVectorQuery;
diff --git a/test/index.test.js b/test/index.test.js
@@ -172,6 +172,9 @@ test('queries are exported', t => {
 
     t.truthy(esb.SpanFieldMaskingQuery);
     t.truthy(esb.spanFieldMaskingQuery);
+
+    t.truthy(esb.sparseVectorQuery());
+    t.truthy(esb.SparseVectorQuery);
 });
 
 test('aggregations are exported', t => {
diff --git a/test/queries-test/sparse-vector-query.test.js b/test/queries-test/sparse-vector-query.test.js

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+'use strict';`
	`2`	`+`
	`3`	`+exports.SparseVectorQuery = require('./sparse-vector-query');`