elastic · mridula-s109 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/docs/changelog/126238.yaml b/docs/changelog/126238.yaml
@@ -0,0 +1,5 @@
+pr: 126238
+summary: Adding `MinScore` support to Linear Retriever
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md
@@ -293,12 +293,69 @@ See also [this hybrid search example](docs-content://solutions/search/retrievers
 
     This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
 
+`min_score`
+:   (Optional, float)
+
+    Minimum score threshold for documents to be included in the final result set. Documents with scores below this threshold will be filtered out. Must be greater than or equal to 0 if explicitly set. If not set, defaults to minimum float value, meaning no documents are filtered based on score  .
 
 `filter`
 :   (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md))
 
     Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications.
 
+```console
+GET /restaurants/_search
+{
+  "retriever": {
+    "linear": { <1>
+      "retrievers": [ <2>
+        {
+          "retriever": { <3>
+            "standard": {
+              "query": {
+                "multi_match": {
+                  "query": "Italian cuisine",
+                  "fields": [
+                    "description",
+                    "cuisine"
+                  ]
+                }
+              }
+            }
+          },
+          "weight": 2.0, <4>
+          "normalizer": "minmax" <5>
+        },
+        {
+          "retriever": { <6>
+            "knn": {
+              "field": "vector",
+              "query_vector": [10, 22, 77],
+              "k": 10,
+              "num_candidates": 10
+            }
+          },
+          "weight": 1.0, <7>
+          "normalizer": "minmax" <8>
+        }
+      ],
+      "rank_window_size": 50, <9>
+      "min_score": 1.5 <10>
+    }
+  }
+}
+```
+
+1. Defines a retriever tree using the `linear` retriever type.
+2. The array of retrievers to be combined.
+3. A `standard` retriever used for traditional full-text search.
+4. Weight applied to the score from the `standard` retriever.
+5. Normalization method (`minmax`) applied to the `standard` retriever score.
+6. A `knn` retriever used for vector-based similarity search.
+7. Weight applied to the score from the `knn` retriever.
+8. Normalization method (`minmax`) applied to the `knn` retriever score.
+9. The number of top documents considered for scoring in the linear combination.
+10. Minimum score threshold for the final result set — documents below this combined score will be excluded.
 
 
 ## RRF Retriever [rrf-retriever]

diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -236,6 +236,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion PINNED_RETRIEVER = def(9_068_0_00);
     public static final TransportVersion ML_INFERENCE_SAGEMAKER = def(9_069_0_00);
     public static final TransportVersion WRITE_LOAD_INCLUDES_BUFFER_WRITES = def(9_070_00_0);
+    public static final TransportVersion RANK_DOCS_QUERY_MIN_SCORE = def(9_071_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java
@@ -28,15 +28,22 @@
 public class RankDocsQueryBuilder extends AbstractQueryBuilder<RankDocsQueryBuilder> {
 
     public static final String NAME = "rank_docs_query";
+    public static final float DEFAULT_MIN_SCORE = Float.MIN_VALUE;
 
     private final RankDoc[] rankDocs;
     private final QueryBuilder[] queryBuilders;
     private final boolean onlyRankDocs;
+    private final float minScore;
 
     public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs) {
+        this(rankDocs, queryBuilders, onlyRankDocs, DEFAULT_MIN_SCORE);
+    }
+
+    public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs, float minScore) {
         this.rankDocs = rankDocs;
         this.queryBuilders = queryBuilders;
         this.onlyRankDocs = onlyRankDocs;
+        this.minScore = minScore;
     }
 
     public RankDocsQueryBuilder(StreamInput in) throws IOException {
@@ -45,9 +52,13 @@ public RankDocsQueryBuilder(StreamInput in) throws IOException {
         if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
             this.queryBuilders = in.readOptionalArray(c -> c.readNamedWriteable(QueryBuilder.class), QueryBuilder[]::new);
             this.onlyRankDocs = in.readBoolean();
+            this.minScore = in.getTransportVersion().onOrAfter(TransportVersions.RANK_DOCS_QUERY_MIN_SCORE)
+                ? in.readFloat()
+                : DEFAULT_MIN_SCORE;
         } else {
             this.queryBuilders = null;
             this.onlyRankDocs = false;
+            this.minScore = DEFAULT_MIN_SCORE;
         }
     }
 
@@ -70,7 +81,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
                 changed |= newQueryBuilders[i] != queryBuilders[i];
             }
             if (changed) {
-                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs);
+                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs, minScore);
                 clone.queryName(queryName());
                 return clone;
             }
@@ -88,6 +99,9 @@ protected void doWriteTo(StreamOutput out) throws IOException {
         if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
             out.writeOptionalArray(StreamOutput::writeNamedWriteable, queryBuilders);
             out.writeBoolean(onlyRankDocs);
+            if (out.getTransportVersion().onOrAfter(TransportVersions.RANK_DOCS_QUERY_MIN_SCORE)) {
+                out.writeFloat(minScore);
+            }
         }
     }
 
@@ -115,7 +129,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
             queries = new Query[0];
             queryNames = Strings.EMPTY_ARRAY;
         }
-        return new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs);
+        return new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs, minScore);
     }
 
     @Override
@@ -135,12 +149,13 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
     protected boolean doEquals(RankDocsQueryBuilder other) {
         return Arrays.equals(rankDocs, other.rankDocs)
             && Arrays.equals(queryBuilders, other.queryBuilders)
-            && onlyRankDocs == other.onlyRankDocs;
+            && onlyRankDocs == other.onlyRankDocs
+            && minScore == other.minScore;
     }
 
     @Override
     protected int doHashCode() {
-        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs);
+        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs, minScore);
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java
@@ -119,6 +119,8 @@ public final RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOExceptio
             if (entry.retriever.isCompound() && false == preFilterQueryBuilders.isEmpty()) {
                 entry.retriever.getPreFilterQueryBuilders().addAll(preFilterQueryBuilders);
             }
+            // Propagate the minScore down to the child retriever
+            entry.retriever.minScore(this.minScore);
             RetrieverBuilder newRetriever = entry.retriever.rewrite(ctx);
             if (newRetriever != entry.retriever) {
                 newRetrievers.add(new RetrieverSource(newRetriever, null));
@@ -198,6 +200,7 @@ public void onFailure(Exception e) {
             results::get
         );
         rankDocsRetrieverBuilder.retrieverName(retrieverName());
+        rankDocsRetrieverBuilder.minScore(this.minScore);
         return rankDocsRetrieverBuilder;
     }
 

diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java
@@ -105,6 +105,8 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
         // if we have aggregations we need to compute them based on all doc matches, not just the top hits
         // similarly, for profile and explain we re-run all parent queries to get all needed information
         RankDoc[] rankDocResults = rankDocs.get();
+        float effectiveMinScore = this.minScore() != null ? this.minScore() : RankDocsQueryBuilder.DEFAULT_MIN_SCORE;
+
         if (hasAggregations(searchSourceBuilder)
             || isExplainRequest(searchSourceBuilder)
             || isProfileRequest(searchSourceBuilder)
@@ -122,18 +124,29 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
                     false
                 );
             }
+            // Set top-level minScore only when not in onlyRankDocs mode
+            if (effectiveMinScore != RankDocsQueryBuilder.DEFAULT_MIN_SCORE) {
+                searchSourceBuilder.minScore(effectiveMinScore);
+            }
         } else {
-            rankQuery = new RankDocsQueryBuilder(rankDocResults, null, false);
+            // Pass minScore down to RankDocsQueryBuilder and set onlyRankDocs = true to ensure pre-computed scores are used.
+            // Filter the results upfront if minScore is set
+            RankDoc[] finalRankDocs;
+            if (effectiveMinScore != RankDocsQueryBuilder.DEFAULT_MIN_SCORE) {
+                finalRankDocs = Arrays.stream(rankDocResults).filter(doc -> doc.score >= effectiveMinScore).toArray(RankDoc[]::new);
+            } else {
+                finalRankDocs = rankDocResults;
+            }
+            // Now pass the potentially filtered array and the original minScore
+            rankQuery = new RankDocsQueryBuilder(finalRankDocs, null, true, effectiveMinScore);
+            // Do NOT set top-level minScore here, filtering is done above, and RankDocsQuery handles score propagation.
         }
         rankQuery.queryName(retrieverName());
         // ignore prefilters of this level, they were already propagated to children
         searchSourceBuilder.query(rankQuery);
         if (searchSourceBuilder.size() < 0) {
             searchSourceBuilder.size(rankWindowSize);
         }
-        if (sourceHasMinScore()) {
-            searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore());
-        }
         if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) {
             searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from()));
         }

diff --git a/server/src/main/java/org/elasticsearch/search/retriever/rankdoc/RankDocsQuery.java b/server/src/main/java/org/elasticsearch/search/retriever/rankdoc/RankDocsQuery.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.ScorerSupplier;
 import org.apache.lucene.search.Weight;
+import org.elasticsearch.common.lucene.search.function.MinScoreScorer;
 import org.elasticsearch.search.rank.RankDoc;
 
 import java.io.IOException;
@@ -32,6 +33,7 @@
 import java.util.Objects;
 
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.elasticsearch.index.query.RankDocsQueryBuilder.DEFAULT_MIN_SCORE;
 
 /**
  * A {@code RankDocsQuery} returns the top k documents in the order specified by the global doc IDs.
@@ -169,7 +171,7 @@ public float score() {
                             // so here we want to differentiate between this and all the tailQuery matches
                             // that would also produce a 0 score due to filtering, by setting the score to `Float.MIN_VALUE` instead for
                             // RankDoc matches.
-                            return Math.max(docs[upTo].score, Float.MIN_VALUE);
+                            return Math.max(docs[upTo].score, DEFAULT_MIN_SCORE);
                         }
 
                         @Override
@@ -234,6 +236,7 @@ public int hashCode() {
     // RankDocs provided. This query does not contribute to scoring, as it is set as filter when creating the weight
     private final Query tailQuery;
     private final boolean onlyRankDocs;
+    private final float minScore;
 
     /**
      * Creates a {@code RankDocsQuery} based on the provided docs.
@@ -242,8 +245,16 @@ public int hashCode() {
      * @param sources      The original queries that were used to compute the top documents
      * @param queryNames   The names (if present) of the original retrievers
      * @param onlyRankDocs Whether the query should only match the provided rank docs
+     * @param minScore     The minimum score threshold for documents to be included in total hits
      */
-    public RankDocsQuery(IndexReader reader, RankDoc[] rankDocs, Query[] sources, String[] queryNames, boolean onlyRankDocs) {
+    public RankDocsQuery(
+        IndexReader reader,
+        RankDoc[] rankDocs,
+        Query[] sources,
+        String[] queryNames,
+        boolean onlyRankDocs,
+        float minScore
+    ) {
         assert sources.length == queryNames.length;
         // clone to avoid side-effect after sorting
         this.docs = rankDocs.clone();
@@ -260,13 +271,15 @@ public RankDocsQuery(IndexReader reader, RankDoc[] rankDocs, Query[] sources, St
             this.tailQuery = null;
         }
         this.onlyRankDocs = onlyRankDocs;
+        this.minScore = minScore;
     }
 
-    private RankDocsQuery(RankDoc[] docs, Query topQuery, Query tailQuery, boolean onlyRankDocs) {
+    private RankDocsQuery(RankDoc[] docs, Query topQuery, Query tailQuery, boolean onlyRankDocs, float minScore) {
         this.docs = docs;
         this.topQuery = topQuery;
         this.tailQuery = tailQuery;
         this.onlyRankDocs = onlyRankDocs;
+        this.minScore = minScore;
     }
 
     private static int binarySearch(RankDoc[] docs, int fromIndex, int toIndex, int key) {
@@ -299,7 +312,11 @@ public RankDoc[] rankDocs() {
     @Override
     public Query rewrite(IndexSearcher searcher) throws IOException {
         if (tailQuery == null) {
-            return topQuery;
+            var topRewrite = topQuery.rewrite(searcher);
+            if (topRewrite != topQuery) {
+                return new RankDocsQuery(this.docs, topRewrite, null, this.onlyRankDocs, this.minScore);
+            }
+            return this;
         }
         boolean hasChanged = false;
         var topRewrite = topQuery.rewrite(searcher);
@@ -310,22 +327,33 @@ public Query rewrite(IndexSearcher searcher) throws IOException {
         if (tailRewrite != tailQuery) {
             hasChanged = true;
         }
-        return hasChanged ? new RankDocsQuery(docs, topRewrite, tailRewrite, onlyRankDocs) : this;
+        return hasChanged ? new RankDocsQuery(this.docs, topRewrite, tailRewrite, this.onlyRankDocs, this.minScore) : this;
     }
 
     @Override
     public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
-        if (tailQuery == null) {
-            throw new IllegalArgumentException("[tailQuery] should not be null; maybe missing a rewrite?");
+        Query combinedQuery;
+        if (onlyRankDocs) {
+            combinedQuery = topQuery;
+        } else {
+            if (tailQuery == null) {
+                combinedQuery = topQuery;
+            } else {
+                var combined = new BooleanQuery.Builder().add(topQuery, BooleanClause.Occur.SHOULD)
+                    .add(tailQuery, BooleanClause.Occur.FILTER)
+                    .build();
+                combinedQuery = combined;
+            }
         }
-        var combined = new BooleanQuery.Builder().add(topQuery, onlyRankDocs ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD)
-            .add(tailQuery, BooleanClause.Occur.FILTER)
-            .build();
+
         var topWeight = topQuery.createWeight(searcher, scoreMode, boost);
-        var combinedWeight = searcher.rewrite(combined).createWeight(searcher, scoreMode, boost);
+        var combinedWeight = searcher.rewrite(combinedQuery).createWeight(searcher, scoreMode, boost);
         return new Weight(this) {
             @Override
             public int count(LeafReaderContext context) throws IOException {
+                if (onlyRankDocs) {
+                    return topWeight.count(context);
+                }
                 return combinedWeight.count(context);
             }
 
@@ -346,7 +374,23 @@ public Matches matches(LeafReaderContext context, int doc) throws IOException {
 
             @Override
             public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
-                return combinedWeight.scorerSupplier(context);
+                ScorerSupplier baseSupplier = onlyRankDocs ? topWeight.scorerSupplier(context) : combinedWeight.scorerSupplier(context);
+
+                if (minScore != DEFAULT_MIN_SCORE && baseSupplier != null) {
+                    return new ScorerSupplier() {
+                        @Override
+                        public Scorer get(long leadCost) throws IOException {
+                            Scorer scorer = baseSupplier.get(leadCost);
+                            return scorer == null ? null : new MinScoreScorer(scorer, minScore);
+                        }
+
+                        @Override
+                        public long cost() {
+                            return baseSupplier.cost();
+                        }
+                    };
+                }
+                return baseSupplier;
             }
         };
     }