Skip to content

Commit 7df388d

Browse files
authored
Make terminate_after early termination friendly (elastic#97540)
There are situations in which the terminate_after functionality causes the collection to keep on going although there is nothing to collect, with the only goal of incrementing the counter of collected docs and eventually early terminating which sets the `terminated_early` flag in the search response to true. When docs collection early terminates, we should rather honor the corresponding `CollectionTerminatedException` that is thrown, and adjust expectations around the fact that `terminate_after` affects actual collection of documents, meaning that it can't be honored if the threshold has not been reached by the team the collection early terminates for other reasons. This commit adjust the QueryPhaseCollector behavior to do that, which allows for some additional simplifications. Closes elastic#97269
1 parent bd7c0f5 commit 7df388d

File tree

6 files changed

+138
-181
lines changed

6 files changed

+138
-181
lines changed

docs/changelog/97540.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 97540
2+
summary: Make `terminate_after` early termination friendly
3+
area: Search
4+
type: bug
5+
issues:
6+
- 97269

docs/reference/search/search-your-data/search-your-data.asciidoc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,11 @@ The response will not contain any hits as the `size` was set to `0`. The
478478
matching documents, or greater than `0` meaning that there were at least
479479
as many documents matching the query when it was early terminated.
480480
Also if the query was terminated early, the `terminated_early` flag will
481-
be set to `true` in the response.
481+
be set to `true` in the response. Some queries are able to retrieve the hits
482+
count directly from the index statistics, which is much faster as it does
483+
not require executing the query. In those situations, no documents are
484+
collected, the returned `total.hits` will be higher than `terminate_after`,
485+
and `terminated_early` will be set to `false`.
482486

483487
[source,console-result]
484488
--------------------------------------------------
@@ -503,8 +507,8 @@ be set to `true` in the response.
503507
}
504508
--------------------------------------------------
505509
// TESTRESPONSE[s/"took": 3/"took": $body.took/]
510+
// TESTRESPONSE[s/"terminated_early": true/"terminated_early": $body.terminated_early/]
506511
// TESTRESPONSE[s/"value": 1/"value": $body.hits.total.value/]
507-
// TESTRESPONSE[s/"relation": "eq"/"relation": $body.hits.total.relation/]
508512

509513
The `took` time in the response contains the milliseconds that this request
510514
took for processing, beginning quickly after the node received the query, up

server/src/main/java/org/elasticsearch/search/query/QueryPhaseCollector.java

Lines changed: 17 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -144,19 +144,9 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
144144
Bits postFilterBits = getPostFilterBits(context);
145145

146146
if (aggsCollector == null) {
147-
LeafCollector tdlc = null;
148-
try {
149-
tdlc = topDocsCollector.getLeafCollector(context);
150-
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
151-
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf collector).
152-
// The reason is only to set the early terminated flag to the QueryResult like some tests expect. This needs fixing.
153-
if (terminateAfter == 0) {
154-
throw e;
155-
}
156-
}
157-
final LeafCollector topDocsLeafCollector = tdlc;
147+
final LeafCollector topDocsLeafCollector = topDocsCollector.getLeafCollector(context);
158148
if (postFilterBits == null && terminateAfter == 0 && minScore == null) {
159-
// no need to wrap if we just need to collect unfiltered docs through leaf collector
149+
// no need to wrap if we just need to collect unfiltered docs through leaf collector.
160150
// aggs collector was not originally provided so the overall score mode is that of the top docs collector
161151
return topDocsLeafCollector;
162152
}
@@ -177,19 +167,14 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
177167
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
178168
// aggs collector does not need this segment, but the top docs collector may.
179169
if (topDocsLeafCollector == null) {
180-
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf collector).
181-
// The reason is only to set the early terminated flag to the QueryResult. We should fix this.
182-
if (terminateAfter == 0) {
183-
throw e;
184-
}
170+
throw e;
185171
}
186172
}
187173
final LeafCollector aggsLeafCollector = alf;
188174

189-
if (topDocsLeafCollector == null && terminateAfter == 0 && minScore == null) {
190-
// top docs collector early terminated, we can avoid wrapping as long as we don't need to apply terminate_after and min_score.
191-
// post_filter does not matter because it's not applied to aggs collection anyways. terminate_after matters only until we
192-
// address the different TODOs around needless collection to honour terminate_after after early termination.
175+
if (topDocsLeafCollector == null && minScore == null) {
176+
// top docs collector early terminated, we can avoid wrapping as long as we don't need to apply min_score.
177+
// post_filter and terminate_after do not matter because they not applied to aggs collection anyways.
193178
// aggs don't support skipping low scoring hits, so we can rely on setMinCompetitiveScore being a no-op already.
194179
return aggsLeafCollector;
195180
}
@@ -223,60 +208,32 @@ public DocIdSetIterator competitiveIterator() throws IOException {
223208

224209
private class TopDocsLeafCollector implements LeafCollector {
225210
private final Bits postFilterBits;
226-
private LeafCollector topDocsLeafCollector;
211+
private final LeafCollector topDocsLeafCollector;
227212
private Scorable scorer;
228213

229214
TopDocsLeafCollector(Bits postFilterBits, LeafCollector topDocsLeafCollector) {
215+
assert topDocsLeafCollector != null;
216+
assert postFilterBits != null || terminateAfter > 0 || minScore != null;
230217
this.postFilterBits = postFilterBits;
231218
this.topDocsLeafCollector = topDocsLeafCollector;
232219
}
233220

234221
@Override
235222
public void setScorer(Scorable scorer) throws IOException {
236-
if (cacheScores) {
237-
scorer = ScoreCachingWrappingScorer.wrap(scorer);
238-
}
239-
if (terminateAfter > 0) {
240-
scorer = new FilterScorable(scorer) {
241-
@Override
242-
public void setMinCompetitiveScore(float minScore) {
243-
// Ignore calls to setMinCompetitiveScore when terminate_after is used, otherwise early termination
244-
// of total hits tracking makes it impossible to terminate after.
245-
// TODO the reason is only to set the early terminated flag to the QueryResult. We should fix this.
246-
}
247-
};
248-
}
249-
if (topDocsLeafCollector != null) {
250-
topDocsLeafCollector.setScorer(scorer);
251-
}
223+
topDocsLeafCollector.setScorer(scorer);
252224
this.scorer = scorer;
253225
}
254226

255227
@Override
256228
public DocIdSetIterator competitiveIterator() throws IOException {
257-
if (topDocsLeafCollector != null) {
258-
return topDocsLeafCollector.competitiveIterator();
259-
}
260-
return null;
229+
return topDocsLeafCollector.competitiveIterator();
261230
}
262231

263232
@Override
264233
public void collect(int doc) throws IOException {
265234
if (shouldCollectTopDocs(doc, scorer, postFilterBits)) {
266235
numCollected++;
267-
if (topDocsLeafCollector != null) {
268-
try {
269-
topDocsLeafCollector.collect(doc);
270-
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
271-
topDocsLeafCollector = null;
272-
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf
273-
// collector).
274-
// The reason is only to set the early terminated flag to the QueryResult. We should fix this.
275-
if (terminateAfter == 0) {
276-
throw e;
277-
}
278-
}
279-
}
236+
topDocsLeafCollector.collect(doc);
280237
}
281238
}
282239
}
@@ -288,6 +245,7 @@ private class CompositeLeafCollector implements LeafCollector {
288245
private Scorable scorer;
289246

290247
CompositeLeafCollector(Bits postFilterBits, LeafCollector topDocsLeafCollector, LeafCollector aggsLeafCollector) {
248+
assert topDocsLeafCollector != null || aggsLeafCollector != null;
291249
this.postFilterBits = postFilterBits;
292250
this.topDocsLeafCollector = topDocsLeafCollector;
293251
this.aggsLeafCollector = aggsLeafCollector;
@@ -318,6 +276,8 @@ public void setMinCompetitiveScore(float minScore) {
318276
@Override
319277
public void collect(int doc) throws IOException {
320278
if (shouldCollectTopDocs(doc, scorer, postFilterBits)) {
279+
// we keep on counting and checking the terminate_after threshold so that we can terminate aggs collection
280+
// even if top docs collection early terminated
321281
numCollected++;
322282
if (topDocsLeafCollector != null) {
323283
try {
@@ -326,12 +286,7 @@ public void collect(int doc) throws IOException {
326286
topDocsLeafCollector = null;
327287
// top docs collector does not need this segment, but the aggs collector may.
328288
if (aggsLeafCollector == null) {
329-
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf
330-
// collector).
331-
// The reason is only to set the early terminated flag to the QueryResult. We should fix this.
332-
if (terminateAfter == 0) {
333-
throw e;
334-
}
289+
throw e;
335290
}
336291
}
337292
}
@@ -345,12 +300,7 @@ public void collect(int doc) throws IOException {
345300
aggsLeafCollector = null;
346301
// aggs collector does not need this segment, but the top docs collector may.
347302
if (topDocsLeafCollector == null) {
348-
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf
349-
// collector).
350-
// The reason is only to set the early terminated flag to the QueryResult. We should fix this.
351-
if (terminateAfter == 0) {
352-
throw e;
353-
}
303+
throw e;
354304
}
355305
}
356306
}

server/src/test/java/org/elasticsearch/search/query/NonCountingTermQuery.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ class NonCountingTermQuery extends TermQuery {
2929
super(term);
3030
}
3131

32+
@Override
3233
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
3334
Weight w = super.createWeight(searcher, scoreMode, boost);
3435
return new FilterWeight(w) {
35-
public int count(LeafReaderContext context) throws IOException {
36+
public int count(LeafReaderContext context) {
3637
return -1;
3738
}
3839
};

server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -948,8 +948,22 @@ public int docID() {
948948

949949
public void testSetMinCompetitiveScoreIsEnabledTopDocsOnly() throws IOException {
950950
// without aggs no need to disable set min competitive score
951+
Weight filterWeight = null;
952+
int terminateAfter = 0;
953+
Float minScore = null;
954+
if (randomBoolean()) {
955+
if (randomBoolean()) {
956+
filterWeight = new MatchAllDocsQuery().createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
957+
}
958+
if (randomBoolean()) {
959+
terminateAfter = randomIntBetween(1, Integer.MAX_VALUE);
960+
}
961+
if (randomBoolean()) {
962+
minScore = 0f;
963+
}
964+
}
951965
TopScoresCollector topDocs = new TopScoresCollector();
952-
Collector queryPhaseCollector = new QueryPhaseCollector(topDocs, null, 0, null, null);
966+
Collector queryPhaseCollector = new QueryPhaseCollector(topDocs, filterWeight, terminateAfter, null, minScore);
953967
LeafReaderContext leafReaderContext = searcher.getLeafContexts().get(0);
954968
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(leafReaderContext);
955969
MinCompetitiveScoreScorable scorer = new MinCompetitiveScoreScorable();
@@ -959,9 +973,24 @@ public void testSetMinCompetitiveScoreIsEnabledTopDocsOnly() throws IOException
959973
}
960974

961975
public void testSetMinCompetitiveScoreIsDisabledWithAggs() throws IOException {
976+
Weight filterWeight = null;
977+
int terminateAfter = 0;
978+
Float minScore = null;
979+
if (randomBoolean()) {
980+
if (randomBoolean()) {
981+
TermQuery termQuery = new TermQuery(new Term("field2", "value"));
982+
filterWeight = termQuery.createWeight(searcher, ScoreMode.TOP_DOCS, 1f);
983+
}
984+
if (randomBoolean()) {
985+
terminateAfter = randomIntBetween(1, Integer.MAX_VALUE);
986+
}
987+
if (randomBoolean()) {
988+
minScore = randomFloat();
989+
}
990+
}
962991
TopScoresCollector topDocs = new TopScoresCollector();
963992
Collector aggs = new MockCollector(randomBoolean() ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES);
964-
Collector queryPhaseCollector = new QueryPhaseCollector(topDocs, null, 0, aggs, null);
993+
Collector queryPhaseCollector = new QueryPhaseCollector(topDocs, filterWeight, terminateAfter, aggs, minScore);
965994
LeafReaderContext leafReaderContext = searcher.getLeafContexts().get(0);
966995
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(leafReaderContext);
967996
MinCompetitiveScoreScorable scorer = new MinCompetitiveScoreScorable();
@@ -1053,24 +1082,6 @@ public void testCompetitiveIteratorNoAggs() throws IOException {
10531082
assertTrue(mockCollector.competitiveIteratorCalled);
10541083
}
10551084

1056-
public void testCompetitiveIteratorNoAggsCollectionTerminated() throws IOException {
1057-
// use a post_filter so that we wrap the top docs leaf collector, as this test verifies that
1058-
// the wrapper calls competitiveIterator when appropriated
1059-
Weight postFilterWeight = searcher.createWeight(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
1060-
MockCollector mockCollector = new MockCollector(randomFrom(ScoreMode.values()));
1061-
TerminateAfterCollector terminateAfterCollector = new TerminateAfterCollector(mockCollector, 1);
1062-
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(terminateAfterCollector, postFilterWeight, 0, null, null);
1063-
LeafReaderContext context = searcher.getLeafContexts().get(0);
1064-
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(context);
1065-
leafCollector.competitiveIterator();
1066-
assertTrue(mockCollector.competitiveIteratorCalled);
1067-
mockCollector.competitiveIteratorCalled = false;
1068-
leafCollector.collect(0);
1069-
expectThrows(CollectionTerminatedException.class, () -> leafCollector.collect(1));
1070-
leafCollector.competitiveIterator();
1071-
assertFalse(mockCollector.competitiveIteratorCalled);
1072-
}
1073-
10741085
public void testCompetitiveIteratorWithAggs() throws IOException {
10751086
MockCollector topDocs = new MockCollector(randomFrom(ScoreMode.values()));
10761087
MockCollector aggs = new MockCollector(randomScoreModeExceptTopScores());
@@ -1113,6 +1124,22 @@ public void testCompetitiveIteratorWithAggsCollectionTerminated() throws IOExcep
11131124
assertFalse(aggsMockCollector.competitiveIteratorCalled);
11141125
}
11151126

1127+
public void testLeafCollectorsAreNotPulledOnceTerminatedAfter() throws IOException {
1128+
{
1129+
MockCollector topDocsMockCollector = new MockCollector(randomFrom(ScoreMode.values()));
1130+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(topDocsMockCollector, null, 1, null, null);
1131+
searcher.search(new NonCountingTermQuery(new Term("field1", "value")), queryPhaseCollector);
1132+
assertEquals(1, topDocsMockCollector.leafCollectorsPulled);
1133+
}
1134+
{
1135+
MockCollector topDocsMockCollector = new MockCollector(randomFrom(ScoreMode.values()));
1136+
MockCollector aggsMockCollector = new MockCollector(randomScoreModeExceptTopScores());
1137+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(topDocsMockCollector, null, 1, aggsMockCollector, null);
1138+
searcher.search(new NonCountingTermQuery(new Term("field1", "value")), queryPhaseCollector);
1139+
assertEquals(1, topDocsMockCollector.leafCollectorsPulled);
1140+
}
1141+
}
1142+
11161143
private static ScoreMode randomScoreModeExceptTopScores() {
11171144
return randomFrom(Arrays.stream(ScoreMode.values()).filter(scoreMode -> scoreMode != ScoreMode.TOP_SCORES).toList());
11181145
}
@@ -1181,7 +1208,7 @@ public float score() throws IOException {
11811208

11821209
@Override
11831210
public int docID() {
1184-
throw new UnsupportedOperationException();
1211+
return 0;
11851212
}
11861213

11871214
@Override
@@ -1196,6 +1223,7 @@ private static class MockCollector extends SimpleCollector {
11961223
private boolean setScorerCalled = false;
11971224
private boolean setWeightCalled = false;
11981225
private boolean competitiveIteratorCalled = false;
1226+
private int leafCollectorsPulled = 0;
11991227

12001228
MockCollector(ScoreMode scoreMode) {
12011229
this(scoreMode, null);
@@ -1206,6 +1234,11 @@ private static class MockCollector extends SimpleCollector {
12061234
this.expectedScorable = expectedScorable;
12071235
}
12081236

1237+
@Override
1238+
protected void doSetNextReader(LeafReaderContext context) {
1239+
leafCollectorsPulled++;
1240+
}
1241+
12091242
@Override
12101243
public void setWeight(Weight weight) {
12111244
setWeightCalled = true;

0 commit comments

Comments
 (0)