elastic · mridula-s109 · Mar 6, 2025 · Mar 6, 2025 · Mar 12, 2025 · Mar 13, 2025
diff --git a/docs/changelog/124182.yaml b/docs/changelog/124182.yaml
@@ -0,0 +1,5 @@
+pr: 124182
+summary: Add `min_score` support to linear retriever
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md
@@ -269,11 +269,11 @@ Each entry specifies the following parameters:
 
 * `weight`:: (Optional, float)
 
-    The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.
+    The weight that each score of this retriever's top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.
 
 * `normalizer`:: (Optional, String)
 
-    Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, and `none`. Defaults to `none`.
+    Specifies how we will normalize the retriever's scores, before applying the specified `weight`. Available values are: `minmax`, and `none`. Defaults to `none`.
 
     * `none`
     * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula
@@ -288,14 +288,78 @@ See also [this hybrid search example](docs-content://solutions/search/retrievers
 `rank_window_size`
 :   (Optional, integer)
 
-    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request's [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+
+
+`min_score`
+:   (Optional, float)
+
+    Minimum score threshold for documents to be included in the final result set. Documents with scores below this threshold will be filtered out. Must be greater than or equal to 0. Defaults to 0.
 
 
 `filter`
 :   (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md))
 
-    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications.
+    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever's specifications.
+
+
+### Example: Hybrid search with min_score [linear-retriever-example]
+
+This example demonstrates how to use the Linear retriever to combine a standard retriever with a kNN retriever, applying weights, normalization, and a minimum score threshold:
+
+```console
+GET /restaurants/_search
+{
+  "retriever": {
+    "linear": { <1>
+      "retrievers": [ <2>
+        {
+          "retriever": { <3>
+            "standard": {
+              "query": {
+                "multi_match": {
+                  "query": "Italian cuisine",
+                  "fields": [
+                    "description",
+                    "cuisine"
+                  ]
+                }
+              }
+            }
+          },
+          "weight": 2.0, <4>
+          "normalizer": "minmax" <5>
+        },
+        {
+          "retriever": { <6>
+            "knn": {
+              "field": "vector",
+              "query_vector": [10, 22, 77],
+              "k": 10,
+              "num_candidates": 10
+            }
+          },
+          "weight": 1.0, <7>
+          "normalizer": "minmax" <8>
+        }
+      ],
+      "rank_window_size": 50, <9>
+      "min_score": 1.5 <10>
+    }
+  }
+}
+```
 
+1. Defines a retriever tree with a Linear retriever.
+2. The sub-retrievers array.
+3. The first sub-retriever is a `standard` retriever.
+4. The weight applied to the scores from the standard retriever (2.0).
+5. The normalization method applied to the standard retriever's scores.
+6. The second sub-retriever is a `knn` retriever.
+7. The weight applied to the scores from the kNN retriever (1.0).
+8. The normalization method applied to the kNN retriever's scores.
+9. The rank window size for the Linear retriever.
+10. The minimum score threshold - documents with a combined score below 1.5 will be filtered out from the final result set.
 
 
 ## RRF Retriever [rrf-retriever]
@@ -320,13 +384,13 @@ An [RRF](/reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md) retriever
 `rank_window_size`
 :   (Optional, integer)
 
-    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request's [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
 
 
 `filter`
 :   (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md))
 
-    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications.
+    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever's specifications.
 
 
 
@@ -435,12 +499,12 @@ For compound retrievers like `rrf`, the `window_size` parameter defines the tota
 
 When using the `rescorer`, an error is returned if the following conditions are not met:
 
-* The minimum configured rescore’s `window_size` is:
+* The minimum configured rescore's `window_size` is:
 
     * Greater than or equal to the `size` of the parent retriever for nested `rescorer` setups.
     * Greater than or equal to the `size` of the search request when used as the primary retriever in the tree.
 
-* And the maximum rescore’s `window_size` is:
+* And the maximum rescore's `window_size` is:
 
     * Smaller than or equal to the `size` or `rank_window_size` of the child retriever.
 
@@ -564,7 +628,7 @@ To use `text_similarity_reranker` you must first set up an inference endpoint fo
 
 You have the following options:
 
-* Use the the built-in [Elastic Rerank](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) cross-encoder model via the inference API’s {{es}} service.
+* Use the the built-in [Elastic Rerank](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) cross-encoder model via the inference API's {{es}} service.
 * Use the [Cohere Rerank inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) with the `rerank` task type.
 * Use the [Google Vertex AI inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) with the `rerank` task type.
 * Upload a model to {{es}} with [Eland](eland://reference/machine-learning.md#ml-nlp-pytorch) using the `text_similarity` NLP task type.

diff --git a/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java
@@ -29,14 +29,36 @@ public class RankDocsQueryBuilder extends AbstractQueryBuilder<RankDocsQueryBuil
 
     public static final String NAME = "rank_docs_query";
 
+    /**
+     * Default minimum score threshold for documents to be included in results.
+     * Using Float.MIN_VALUE as the default ensures that by default no documents
+     * are filtered out based on score, as virtually all scores will be above this threshold.
+     *
+     * This threshold is separate from the special handling of scores that are exactly 0:
+     * - The minScore parameter determines which documents are included in results based on their score
+     * - Documents with a score of exactly 0 will always be assigned Float.MIN_VALUE internally
+     *   to differentiate them from filtered matches, regardless of the minScore value
+     *
+     * Setting minScore to a higher value (e.g., 0.0f) would filter out documents with scores below that threshold,
+     * which can be useful to remove documents that only match filters but have no relevance score contribution.
+     */
+    public static final float DEFAULT_MIN_SCORE = Float.MIN_VALUE;
+
     private final RankDoc[] rankDocs;
     private final QueryBuilder[] queryBuilders;
     private final boolean onlyRankDocs;
+    private final float minScore;
+    private boolean countFilteredHits = false;
 
     public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs) {
+        this(rankDocs, queryBuilders, onlyRankDocs, DEFAULT_MIN_SCORE);
+    }
+
+    public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs, float minScore) {
         this.rankDocs = rankDocs;
         this.queryBuilders = queryBuilders;
         this.onlyRankDocs = onlyRankDocs;
+        this.minScore = minScore;
     }
 
     public RankDocsQueryBuilder(StreamInput in) throws IOException {
@@ -45,9 +67,17 @@ public RankDocsQueryBuilder(StreamInput in) throws IOException {
         if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
             this.queryBuilders = in.readOptionalArray(c -> c.readNamedWriteable(QueryBuilder.class), QueryBuilder[]::new);
             this.onlyRankDocs = in.readBoolean();
+            this.minScore = in.readFloat();
+            if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_17_0)) {
+                this.countFilteredHits = in.readBoolean();
+            } else {
+                this.countFilteredHits = false;
+            }
         } else {
             this.queryBuilders = null;
             this.onlyRankDocs = false;
+            this.minScore = DEFAULT_MIN_SCORE;
+            this.countFilteredHits = false;
         }
     }
 
@@ -70,7 +100,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
                 changed |= newQueryBuilders[i] != queryBuilders[i];
             }
             if (changed) {
-                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs);
+                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs, minScore);
                 clone.queryName(queryName());
                 return clone;
             }
@@ -88,6 +118,10 @@ protected void doWriteTo(StreamOutput out) throws IOException {
         if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
             out.writeOptionalArray(StreamOutput::writeNamedWriteable, queryBuilders);
             out.writeBoolean(onlyRankDocs);
+            out.writeFloat(minScore);
+            if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_17_0)) {
+                out.writeBoolean(countFilteredHits);
+            }
         }
     }
 
@@ -115,7 +149,12 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
             queries = new Query[0];
             queryNames = Strings.EMPTY_ARRAY;
         }
-        return new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs);
+
+        RankDocsQuery query = new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs, minScore);
+        if (countFilteredHits) {
+            query.setCountFilteredHits(true);
+        }
+        return query;
     }
 
     @Override
@@ -135,16 +174,31 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
     protected boolean doEquals(RankDocsQueryBuilder other) {
         return Arrays.equals(rankDocs, other.rankDocs)
             && Arrays.equals(queryBuilders, other.queryBuilders)
-            && onlyRankDocs == other.onlyRankDocs;
+            && onlyRankDocs == other.onlyRankDocs
+            && minScore == other.minScore
+            && countFilteredHits == other.countFilteredHits;
     }
 
     @Override
     protected int doHashCode() {
-        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs);
+        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs, minScore, countFilteredHits);
     }
 
     @Override
     public TransportVersion getMinimalSupportedVersion() {
         return TransportVersions.V_8_16_0;
     }
+
+    /**
+     * Sets whether this query should count only documents that pass the min_score filter.
+     * When true, the total hits count will reflect the number of documents meeting the minimum score threshold.
+     * When false (default), the total hits count will include all matching documents regardless of score.
+     *
+     * @param countFilteredHits true to count only documents passing min_score, false to count all matches
+     * @return this builder
+     */
+    public RankDocsQueryBuilder setCountFilteredHits(boolean countFilteredHits) {
+        this.countFilteredHits = countFilteredHits;
+        return this;
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java
@@ -198,6 +198,7 @@ public void onFailure(Exception e) {
             results::get
         );
         rankDocsRetrieverBuilder.retrieverName(retrieverName());
+        rankDocsRetrieverBuilder.minScore = minScore;
         return rankDocsRetrieverBuilder;
     }