Skip to content

Commit f087ff8

Browse files
authored
Avoid wrapping leaf collector when possible in QueryPhaseCollector (elastic#97452)
The recently introduced QueryPhaseCollector introduces some overhead caused by wrapping of the leaf collector that could be avoided. When either a post_filter, or a min_score, or terminate_after are provided, we have no other choice than wrapping to filter collection, but if none of these are used, we can skip wrapping when one of the two leaf collectors early terminates.
1 parent 6164fa5 commit f087ff8

File tree

2 files changed

+88
-19
lines changed

2 files changed

+88
-19
lines changed

server/src/main/java/org/elasticsearch/search/query/QueryPhaseCollector.java

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.search.CollectionTerminatedException;
1313
import org.apache.lucene.search.Collector;
1414
import org.apache.lucene.search.DocIdSetIterator;
15+
import org.apache.lucene.search.FilterLeafCollector;
1516
import org.apache.lucene.search.FilterScorable;
1617
import org.apache.lucene.search.LeafCollector;
1718
import org.apache.lucene.search.Scorable;
@@ -118,7 +119,7 @@ private boolean isDocWithinMinScore(Scorable scorer) throws IOException {
118119
return minScore == null || scorer.score() >= minScore;
119120
}
120121

121-
private boolean doesDocMatchPostFilter(int doc, Bits postFilterBits) {
122+
private static boolean doesDocMatchPostFilter(int doc, Bits postFilterBits) {
122123
return postFilterBits == null || postFilterBits.get(doc);
123124
}
124125

@@ -143,31 +144,36 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
143144
Bits postFilterBits = getPostFilterBits(context);
144145

145146
if (aggsCollector == null) {
146-
LeafCollector topDocsLeafCollector;
147+
LeafCollector tdlc = null;
147148
try {
148-
topDocsLeafCollector = topDocsCollector.getLeafCollector(context);
149+
tdlc = topDocsCollector.getLeafCollector(context);
149150
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
150151
// TODO we keep on collecting although we have nothing to collect (there is no top docs nor aggs leaf collector).
151152
// The reason is only to set the early terminated flag to the QueryResult like some tests expect. This needs fixing.
152153
if (terminateAfter == 0) {
153154
throw e;
154155
}
155-
topDocsLeafCollector = null;
156+
}
157+
final LeafCollector topDocsLeafCollector = tdlc;
158+
if (postFilterBits == null && terminateAfter == 0 && minScore == null) {
159+
// no need to wrap if we just need to collect unfiltered docs through leaf collector
160+
// aggs collector was not originally provided so the overall score mode is that of the top docs collector
161+
return topDocsLeafCollector;
156162
}
157163
return new TopDocsLeafCollector(postFilterBits, topDocsLeafCollector);
158164
}
159165

160-
LeafCollector topDocsLeafCollector;
166+
LeafCollector tdlc = null;
161167
try {
162-
topDocsLeafCollector = topDocsCollector.getLeafCollector(context);
168+
tdlc = topDocsCollector.getLeafCollector(context);
163169
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
164170
// top docs collector does not need this segment, but the aggs collector does.
165-
topDocsLeafCollector = null;
166171
}
172+
final LeafCollector topDocsLeafCollector = tdlc;
167173

168-
LeafCollector aggsLeafCollector;
174+
LeafCollector alf = null;
169175
try {
170-
aggsLeafCollector = aggsCollector.getLeafCollector(context);
176+
alf = aggsCollector.getLeafCollector(context);
171177
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {
172178
// aggs collector does not need this segment, but the top docs collector may.
173179
if (topDocsLeafCollector == null) {
@@ -177,11 +183,41 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
177183
throw e;
178184
}
179185
}
180-
aggsLeafCollector = null;
181186
}
182-
// say that the aggs collector early terminates while the top docs collector does not, we still want to wrap in the same way
187+
final LeafCollector aggsLeafCollector = alf;
188+
189+
if (topDocsLeafCollector == null && terminateAfter == 0 && minScore == null) {
190+
// top docs collector early terminated, we can avoid wrapping as long as we don't need to apply terminate_after and min_score.
191+
// post_filter does not matter because it's not applied to aggs collection anyways. terminate_after matters only until we
192+
// address the different TODOs around needless collection to honour terminate_after after early termination.
193+
// aggs don't support skipping low scoring hits, so we can rely on setMinCompetitiveScore being a no-op already.
194+
return aggsLeafCollector;
195+
}
196+
197+
// if that the aggs collector early terminates while the top docs collector does not, we still need to wrap the leaf collector
183198
// to enforce that setMinCompetitiveScore is a no-op. Otherwise we may allow the top docs collector to skip non competitive
184-
// hits despite the score mode of the Collector did not allow it.
199+
// hits despite the score mode of the Collector did not allow it (because aggs don't support TOP_SCORES).
200+
if (aggsLeafCollector == null && postFilterBits == null && terminateAfter == 0 && minScore == null) {
201+
// special case for early terminated aggs
202+
return new FilterLeafCollector(topDocsLeafCollector) {
203+
@Override
204+
public void setScorer(Scorable scorer) throws IOException {
205+
super.setScorer(new FilterScorable(scorer) {
206+
@Override
207+
public void setMinCompetitiveScore(float minScore) {
208+
// Ignore calls to setMinCompetitiveScore. The top docs collector may try to skip low
209+
// scoring hits, but the overall score_mode won't allow it because an aggs collector
210+
// was originally provided which never supports TOP_SCORES is not supported for aggs
211+
}
212+
});
213+
}
214+
215+
@Override
216+
public DocIdSetIterator competitiveIterator() throws IOException {
217+
return topDocsLeafCollector.competitiveIterator();
218+
}
219+
};
220+
}
185221
return new CompositeLeafCollector(postFilterBits, topDocsLeafCollector, aggsLeafCollector);
186222
}
187223

@@ -259,7 +295,7 @@ private class CompositeLeafCollector implements LeafCollector {
259295

260296
@Override
261297
public void setScorer(Scorable scorer) throws IOException {
262-
if (cacheScores) {
298+
if (cacheScores && topDocsLeafCollector != null && aggsLeafCollector != null) {
263299
scorer = ScoreCachingWrappingScorer.wrap(scorer);
264300
}
265301
scorer = new FilterScorable(scorer) {
@@ -300,9 +336,9 @@ public void collect(int doc) throws IOException {
300336
}
301337
}
302338
}
303-
// min_score is applied to aggs as well as top hits
304-
if (isDocWithinMinScore(scorer)) {
305-
if (aggsLeafCollector != null) {
339+
if (aggsLeafCollector != null) {
340+
// min_score is applied to aggs as well as top hits
341+
if (isDocWithinMinScore(scorer)) {
306342
try {
307343
aggsLeafCollector.collect(doc);
308344
} catch (@SuppressWarnings("unused") CollectionTerminatedException e) {

server/src/test/java/org/elasticsearch/search/query/QueryPhaseCollectorTests.java

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.lucene.tests.search.DummyTotalHitCountCollector;
4141
import org.elasticsearch.core.IOUtils;
4242
import org.elasticsearch.test.ESTestCase;
43+
import org.hamcrest.CoreMatchers;
4344
import org.hamcrest.Matchers;
4445

4546
import java.io.IOException;
@@ -1012,21 +1013,53 @@ public void testCacheScoresIfNecessary() throws IOException {
10121013
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(c1, null, 0, c2, null);
10131014
queryPhaseCollector.getLeafCollector(ctx).setScorer(new MinCompetitiveScoreScorable());
10141015
}
1016+
{
1017+
// both collectors need scores => caching, but one early terminates
1018+
Collector c1 = new TerminateAfterCollector(new MockCollector(ScoreMode.COMPLETE, MinCompetitiveScoreScorable.class), 0);
1019+
Collector c2 = new MockCollector(ScoreMode.COMPLETE, MinCompetitiveScoreScorable.class);
1020+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(c1, null, 0, c2, null);
1021+
queryPhaseCollector.getLeafCollector(ctx).setScorer(new MinCompetitiveScoreScorable());
1022+
queryPhaseCollector = new QueryPhaseCollector(c2, null, 0, c1, null);
1023+
queryPhaseCollector.getLeafCollector(ctx).setScorer(new MinCompetitiveScoreScorable());
1024+
}
10151025
}
10161026

1017-
public void testCompetitiveIteratorTopDocsOnly() throws IOException {
1027+
public void testNoWrappingIfUnnecessaryTopDocsOnly() throws IOException {
10181028
MockCollector mockCollector = new MockCollector(randomFrom(ScoreMode.values()));
10191029
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(mockCollector, null, 0, null, null);
10201030
LeafReaderContext context = searcher.getLeafContexts().get(0);
10211031
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(context);
1032+
assertThat(leafCollector, CoreMatchers.sameInstance(mockCollector));
1033+
}
1034+
1035+
public void testNoWrappingIfUnnecessaryTopDocsEarlyTerminated() throws IOException {
1036+
TerminateAfterCollector topDocsCollector = new TerminateAfterCollector(new MockCollector(randomFrom(ScoreMode.values())), 0);
1037+
MockCollector aggsCollector = new MockCollector(randomScoreModeExceptTopScores());
1038+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(topDocsCollector, null, 0, aggsCollector, null);
1039+
LeafReaderContext context = searcher.getLeafContexts().get(0);
1040+
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(context);
1041+
assertThat(leafCollector, CoreMatchers.sameInstance(aggsCollector));
1042+
}
1043+
1044+
public void testCompetitiveIteratorNoAggs() throws IOException {
1045+
// use a post_filter so that we wrap the top docs leaf collector, as this test verifies that
1046+
// the wrapper calls competitiveIterator when appropriated
1047+
Weight postFilterWeight = searcher.createWeight(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
1048+
MockCollector mockCollector = new MockCollector(randomFrom(ScoreMode.values()));
1049+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(mockCollector, postFilterWeight, 0, null, null);
1050+
LeafReaderContext context = searcher.getLeafContexts().get(0);
1051+
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(context);
10221052
leafCollector.competitiveIterator();
10231053
assertTrue(mockCollector.competitiveIteratorCalled);
10241054
}
10251055

1026-
public void testCompetitiveIteratorTopDocsOnlyCollectionTerminated() throws IOException {
1056+
public void testCompetitiveIteratorNoAggsCollectionTerminated() throws IOException {
1057+
// use a post_filter so that we wrap the top docs leaf collector, as this test verifies that
1058+
// the wrapper calls competitiveIterator when appropriated
1059+
Weight postFilterWeight = searcher.createWeight(new MatchAllDocsQuery(), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
10271060
MockCollector mockCollector = new MockCollector(randomFrom(ScoreMode.values()));
10281061
TerminateAfterCollector terminateAfterCollector = new TerminateAfterCollector(mockCollector, 1);
1029-
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(terminateAfterCollector, null, 0, null, null);
1062+
QueryPhaseCollector queryPhaseCollector = new QueryPhaseCollector(terminateAfterCollector, postFilterWeight, 0, null, null);
10301063
LeafReaderContext context = searcher.getLeafContexts().get(0);
10311064
LeafCollector leafCollector = queryPhaseCollector.getLeafCollector(context);
10321065
leafCollector.competitiveIterator();

0 commit comments

Comments
 (0)