Skip to content

Commit 663d82c

Browse files
authored
Shortcut total hit count when terminate_after is used (elastic#94889)
We have for long considered terminate_after a "filtered collector", same as min_score and post_filter, for which we disable the hit count optimization as we want to make sure that the total hit count reflects the documents that are in fact going to be collected. When we moved to leveraging Weight#count from Lucene (elastic#88396), we automatically moved to shortcutting the total hit count when using terminate_after and size is set to 0, meaning that in this scenario the total hits may be higher than the terminate after value that is set to the request, in case the total hit count is retrieved from the index statistics through Weight#count. This is acceptable, given that terminate_after is intended to stop additional work after collecting a certain number of docs, but no additional work is performed to collect a higher and accurante total hit count in this case, given it could be retrieved from the index statistics. This also raises that `EarlyTerminatingCollector` isn't necessarily to be considered as a collector that filters documents out. The current situation is inconsistent as we will shortcut the total hit count when size is 0 and terminate after is set, but we won't if size is greater than 0. This commit treats terminate_after the same way regardless of how size is set: the total hit count may be higher than the terminate after if it did not require actual counting to be computed.
1 parent f50725e commit 663d82c

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

docs/changelog/94889.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 94889
2+
summary: Shortcut total hit count when `terminate_after` is used
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/search/query/QueryPhase.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,6 @@ static void executeInternal(SearchContext searchContext) throws QueryPhaseExecut
132132
// add terminate_after before the filter collectors
133133
// it will only be applied on documents accepted by these filter collectors
134134
collectors.add(createEarlyTerminationCollectorContext(searchContext.terminateAfter()));
135-
// this collector can filter documents during the collection
136-
hasFilterCollector = true;
137135
}
138136
if (searchContext.parsedPostFilter() != null) {
139137
// add post filters before aggregations

server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -410,18 +410,19 @@ public void testTerminateAfterEarlyTermination() throws Exception {
410410
}
411411

412412
context.terminateAfter(1);
413+
long countDocs = countDocUpTo.applyAsInt(1);
413414
{
414415
context.setSize(1);
415416
QueryPhase.executeInternal(context);
416417
assertTrue(context.queryResult().terminatedEarly());
417-
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
418+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(countDocs));
418419
assertThat(context.queryResult().topDocs().topDocs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
419420
assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(1));
420421

421422
context.setSize(0);
422423
QueryPhase.executeInternal(context);
423424
assertTrue(context.queryResult().terminatedEarly());
424-
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) countDocUpTo.applyAsInt(1)));
425+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(countDocs));
425426
assertThat(context.queryResult().topDocs().topDocs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
426427
assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(0));
427428
}
@@ -430,7 +431,7 @@ public void testTerminateAfterEarlyTermination() throws Exception {
430431
context.setSize(1);
431432
QueryPhase.executeInternal(context);
432433
assertTrue(context.queryResult().terminatedEarly());
433-
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
434+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(countDocs));
434435
assertThat(context.queryResult().topDocs().topDocs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
435436
assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(1));
436437
}
@@ -440,7 +441,7 @@ public void testTerminateAfterEarlyTermination() throws Exception {
440441
context.registerAggsCollector(collector);
441442
QueryPhase.executeInternal(context);
442443
assertTrue(context.queryResult().terminatedEarly());
443-
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
444+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(countDocs));
444445
assertThat(context.queryResult().topDocs().topDocs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
445446
assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(1));
446447
// TotalHitCountCollector counts num docs in the first leaf
@@ -501,7 +502,7 @@ public void testTerminateAfterEarlyTermination() throws Exception {
501502
equalTo(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO)
502503
);
503504
} else {
504-
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(7L));
505+
assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) countDocUpTo.applyAsInt(7)));
505506
assertThat(context.queryResult().topDocs().topDocs.totalHits.relation, equalTo(TotalHits.Relation.EQUAL_TO));
506507
}
507508
assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(7));

0 commit comments

Comments
 (0)