Skip to content

Commit 74ec39c

Browse files
authored
ESQL: Fix count optimization with pushable union types (#127225) (#127459)
When pushing down STATS count(field::type) to Lucene for a union-typed field, use the correct field name in the Lucene query and not the synthetic attribute name $$field$converted_to$type.
1 parent 44340f7 commit 74ec39c

File tree

5 files changed

+88
-5
lines changed

5 files changed

+88
-5
lines changed

docs/changelog/127225.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 127225
2+
summary: Fix count optimization with pushable union types
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 127200

x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec

+67
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,73 @@ count:long | @timestamp:date
393393
0 | 2023-10-23T13:50:00.000Z
394394
;
395395

396+
multiIndexIpStatsNonPushableCount
397+
// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
398+
// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
399+
required_capability: union_types
400+
required_capability: fix_count_pushdown_for_union_types
401+
402+
FROM sample_data, sample_data_str
403+
| STATS count=count(client_ip::ip)
404+
;
405+
406+
count:long
407+
14
408+
;
409+
410+
multiIndexIpStatsNonPushableCountEval
411+
// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
412+
// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
413+
required_capability: union_types
414+
required_capability: fix_count_pushdown_for_union_types
415+
416+
FROM sample_data, sample_data_str
417+
| EVAL client_ip = client_ip::ip
418+
| STATS count=count(client_ip)
419+
;
420+
421+
count:long
422+
14
423+
;
424+
425+
multiIndexIpStatsNonPushableCountWithFilter
426+
// Currently not pushable: has 2 aggs and we don't consider multi-typed fields aggregatable.
427+
required_capability: union_types
428+
required_capability: fix_count_pushdown_for_union_types
429+
430+
FROM sample_data, sample_data_ts_long
431+
| STATS count_matching=count(@timestamp::long) WHERE @timestamp::long >= 1698069301543,
432+
total_count=count(@timestamp::long)
433+
;
434+
435+
count_matching:long | total_count:long
436+
2 | 14
437+
;
438+
439+
multiIndexIpStatsPushableCount
440+
required_capability: union_types
441+
required_capability: fix_count_pushdown_for_union_types
442+
443+
FROM sample_data, sample_data_ts_long
444+
| STATS count=count(@timestamp::long)
445+
;
446+
447+
count:long
448+
14
449+
;
450+
451+
multiIndexIpStatsPushableCountEval
452+
required_capability: union_types
453+
required_capability: fix_count_pushdown_for_union_types
454+
455+
FROM sample_data, sample_data_ts_long
456+
| EVAL @timestamp = @timestamp::long
457+
| STATS count=count(@timestamp)
458+
;
459+
460+
count:long
461+
14
462+
;
396463

397464
multiIndexIpStringStatsInline2
398465
required_capability: union_types

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

+6
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,12 @@ public enum Cap {
246246
*/
247247
UNION_TYPES_AGG_CAST,
248248

249+
/**
250+
* When pushing down {@code STATS count(field::type)} for a union type field, we wrongly used a synthetic attribute name in the
251+
* query instead of the actual field name. This led to 0 counts instead of the correct result.
252+
*/
253+
FIX_COUNT_PUSHDOWN_FOR_UNION_TYPES,
254+
249255
/**
250256
* Fix to GROK validation in case of multiple fields with same name and different types
251257
* https://github.com/elastic/elasticsearch/issues/110533

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog
6464

6565
private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> shouldBeRetained) {
6666
if (plan instanceof EsRelation relation) {
67-
// Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
68-
// after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
67+
// For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name
68+
// id!), thus avoiding that InsertFieldExtrations inserts a field extraction later.
6969
// This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
7070
// Project[field1, field2, field3] <- keeps the ordering intact
7171
// \_Eval[field1 = null, field3 = null]
72-
// \_EsRelation[field2]
72+
// \_EsRelation[field1, field2, field3]
7373
List<Attribute> relationOutput = relation.output();
7474
Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
7575
List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,19 @@ private Tuple<List<Attribute>, List<EsStatsQueryExec.Stat>> pushableStats(
9494
// check if regular field
9595
else {
9696
if (target instanceof FieldAttribute fa) {
97-
var fName = fa.name();
97+
var fName = fa.fieldName();
9898
if (context.searchStats().isSingleValue(fName)) {
99-
fieldName = fa.name();
99+
fieldName = fName;
100100
query = QueryBuilders.existsQuery(fieldName);
101101
}
102102
}
103103
}
104104
if (fieldName != null) {
105105
if (count.hasFilter()) {
106+
// Note: currently, it seems like we never actually perform stats pushdown if we reach here.
107+
// That's because stats pushdown only works for 1 agg function (without BY); but in that case, filters
108+
// are extracted into a separate filter node upstream from the aggregation (and hopefully pushed into
109+
// the EsQueryExec separately).
106110
if (canPushToSource(count.filter()) == false) {
107111
return null; // can't push down
108112
}

0 commit comments

Comments
 (0)