Skip to content

[8.19] ESQL: Fix count optimization with pushable union types (#127225) #127460

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/127225.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 127225
summary: Fix count optimization with pushable union types
area: ES|QL
type: bug
issues:
- 127200
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,73 @@ count:long | @timestamp:date
0 | 2023-10-23T13:50:00.000Z
;

multiIndexIpStatsNonPushableCount
// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
required_capability: union_types
required_capability: fix_count_pushdown_for_union_types

FROM sample_data, sample_data_str
| STATS count=count(client_ip::ip)
;

count:long
14
;

multiIndexIpStatsNonPushableCountEval
// Could be pushed to Lucene if we knew whether ip fields are single valued or not.
// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword.
required_capability: union_types
required_capability: fix_count_pushdown_for_union_types

FROM sample_data, sample_data_str
| EVAL client_ip = client_ip::ip
| STATS count=count(client_ip)
;

count:long
14
;

multiIndexIpStatsNonPushableCountWithFilter
// Currently not pushable: has 2 aggs and we don't consider multi-typed fields aggregatable.
required_capability: union_types
required_capability: fix_count_pushdown_for_union_types

FROM sample_data, sample_data_ts_long
| STATS count_matching=count(@timestamp::long) WHERE @timestamp::long >= 1698069301543,
total_count=count(@timestamp::long)
;

count_matching:long | total_count:long
2 | 14
;

multiIndexIpStatsPushableCount
required_capability: union_types
required_capability: fix_count_pushdown_for_union_types

FROM sample_data, sample_data_ts_long
| STATS count=count(@timestamp::long)
;

count:long
14
;

multiIndexIpStatsPushableCountEval
required_capability: union_types
required_capability: fix_count_pushdown_for_union_types

FROM sample_data, sample_data_ts_long
| EVAL @timestamp = @timestamp::long
| STATS count=count(@timestamp)
;

count:long
14
;

multiIndexIpStringStatsInline2
required_capability: union_types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ public enum Cap {
*/
UNION_TYPES_AGG_CAST,

/**
* When pushing down {@code STATS count(field::type)} for a union type field, we wrongly used a synthetic attribute name in the
* query instead of the actual field name. This led to 0 counts instead of the correct result.
*/
FIX_COUNT_PUSHDOWN_FOR_UNION_TYPES,

/**
* Fix to GROK validation in case of multiple fields with same name and different types
* https://github.com/elastic/elasticsearch/issues/110533
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog

private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> shouldBeRetained) {
if (plan instanceof EsRelation relation) {
// Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right
// after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes.
// For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name
// id!), thus avoiding that InsertFieldExtrations inserts a field extraction later.
// This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by
// Project[field1, field2, field3] <- keeps the ordering intact
// \_Eval[field1 = null, field3 = null]
// \_EsRelation[field2]
// \_EsRelation[field1, field2, field3]
List<Attribute> relationOutput = relation.output();
Map<DataType, Alias> nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size());
List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,19 @@ private Tuple<List<Attribute>, List<EsStatsQueryExec.Stat>> pushableStats(
// check if regular field
else {
if (target instanceof FieldAttribute fa) {
var fName = fa.name();
var fName = fa.fieldName();
if (context.searchStats().isSingleValue(fName)) {
fieldName = fa.name();
fieldName = fName;
query = QueryBuilders.existsQuery(fieldName);
}
}
}
if (fieldName != null) {
if (count.hasFilter()) {
// Note: currently, it seems like we never actually perform stats pushdown if we reach here.
// That's because stats pushdown only works for 1 agg function (without BY); but in that case, filters
// are extracted into a separate filter node upstream from the aggregation (and hopefully pushed into
// the EsQueryExec separately).
if (canPushToSource(count.filter()) == false) {
return null; // can't push down
}
Expand Down