Skip to content

Commit 9784e0e

Browse files
authored
ESQL: Split grouping functions based on their EVAL-ability (#126597)
This splits the grouping functions in two: those that can be evaluated independently through the EVAL operator (`BUCKET`) and those that don't (like those that that are evaluated through an agg operator, `CATEGORIZE`). Closes #124608
1 parent f658af6 commit 9784e0e

File tree

10 files changed

+168
-104
lines changed

10 files changed

+168
-104
lines changed

docs/reference/query-languages/esql/_snippets/functions/description/categorize.md

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec

+15
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,21 @@ c:long
584584
3
585585
;
586586

587+
reuse categorize arg expression in agg
588+
required_capability: categorize_v5
589+
590+
FROM sample_data
591+
| STATS m = MAX(LENGTH(CONCAT(message, "_end"))) BY c = CATEGORIZE(CONCAT(message, "_end"))
592+
| SORT m
593+
;
594+
595+
m:integer |c:keyword
596+
16 |.*?Disconnected_end.*?
597+
20 |.*?Connection.+?error_end.*?
598+
25 |.*?Connected.+?to.*?
599+
;
600+
601+
587602
categorize in aggs inside function
588603
required_capability: categorize_v5
589604

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Bucket.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@
5858
* from a number of desired buckets (as a hint) and a range (auto mode).
5959
* In the former case, two parameters will be provided, in the latter four.
6060
*/
61-
public class Bucket extends GroupingFunction implements PostOptimizationVerificationAware, TwoOptionalArguments {
61+
public class Bucket extends GroupingFunction.EvaluatableGroupingFunction
62+
implements
63+
PostOptimizationVerificationAware,
64+
TwoOptionalArguments {
6265
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Bucket", Bucket::new);
6366

6467
// TODO maybe we should just cover the whole of representable dates here - like ten years, 100 years, 1000 years, all the way up.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java

+3-9
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
13-
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
1413
import org.elasticsearch.xpack.esql.core.expression.Expression;
1514
import org.elasticsearch.xpack.esql.core.expression.Nullability;
1615
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
@@ -37,7 +36,7 @@
3736
* For the implementation, see {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeBlockHash}
3837
* </p>
3938
*/
40-
public class Categorize extends GroupingFunction {
39+
public class Categorize extends GroupingFunction.NonEvaluatableGroupingFunction {
4140
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
4241
Expression.class,
4342
"Categorize",
@@ -53,8 +52,8 @@ public class Categorize extends GroupingFunction {
5352
`CATEGORIZE` has the following limitations:
5453
5554
* can’t be used within other expressions
56-
* can’t be used with multiple groupings
57-
* can’t be used or referenced within aggregate functions""",
55+
* can’t be used more than once in the groupings
56+
* can’t be used or referenced within aggregate functions and it has to be the first grouping""",
5857
examples = {
5958
@Example(
6059
file = "docs",
@@ -101,11 +100,6 @@ public Nullability nullable() {
101100
return Nullability.TRUE;
102101
}
103102

104-
@Override
105-
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
106-
throw new UnsupportedOperationException("CATEGORIZE is only evaluated during aggregations");
107-
}
108-
109103
@Override
110104
protected TypeResolution resolveType() {
111105
return isString(field(), sourceText(), DEFAULT);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/GroupingFunction.java

+27-6
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,13 @@
2222

2323
import static org.elasticsearch.xpack.esql.common.Failure.fail;
2424

25-
public abstract class GroupingFunction extends Function implements EvaluatorMapper, PostAnalysisPlanVerificationAware {
25+
public abstract sealed class GroupingFunction extends Function implements PostAnalysisPlanVerificationAware permits
26+
GroupingFunction.NonEvaluatableGroupingFunction, GroupingFunction.EvaluatableGroupingFunction {
2627

2728
protected GroupingFunction(Source source, List<Expression> fields) {
2829
super(source, fields);
2930
}
3031

31-
@Override
32-
public Object fold(FoldContext ctx) {
33-
return EvaluatorMapper.super.fold(source(), ctx);
34-
}
35-
3632
@Override
3733
public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
3834
return (p, failures) -> {
@@ -45,4 +41,29 @@ public BiConsumer<LogicalPlan, Failures> postAnalysisPlanVerification() {
4541
};
4642
}
4743

44+
/**
45+
* This is a class of grouping functions that cannot be evaluated outside the context of an aggregation.
46+
* They will have their evaluation implemented part of an aggregation, which may keep state for their execution, making them "stateful"
47+
* grouping functions.
48+
*/
49+
public abstract static non-sealed class NonEvaluatableGroupingFunction extends GroupingFunction {
50+
protected NonEvaluatableGroupingFunction(Source source, List<Expression> fields) {
51+
super(source, fields);
52+
}
53+
}
54+
55+
/**
56+
* This is a class of grouping functions that can be evaluated independently within an EVAL operator, independent of the aggregation
57+
* they're used by.
58+
*/
59+
public abstract static non-sealed class EvaluatableGroupingFunction extends GroupingFunction implements EvaluatorMapper {
60+
protected EvaluatableGroupingFunction(Source source, List<Expression> fields) {
61+
super(source, fields);
62+
}
63+
64+
@Override
65+
public Object fold(FoldContext ctx) {
66+
return EvaluatorMapper.super.fold(source(), ctx);
67+
}
68+
}
4869
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java

+8-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import org.elasticsearch.xpack.esql.core.expression.Expression;
1616
import org.elasticsearch.xpack.esql.core.expression.Expressions;
1717
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
18-
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
18+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
1919
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
2020
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
2121
import org.elasticsearch.xpack.esql.plan.logical.Project;
@@ -67,11 +67,11 @@ protected LogicalPlan rule(UnaryPlan plan) {
6767
for (Expression grouping : groupings) {
6868
if (grouping instanceof Attribute attribute) {
6969
groupingAttrs.add(attribute);
70-
} else if (grouping instanceof Alias as && as.child() instanceof Categorize) {
70+
} else if (grouping instanceof Alias as && as.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction) {
7171
groupingAttrs.add(as);
7272
} else {
7373
// After applying ReplaceAggregateNestedExpressionWithEval,
74-
// groupings (except Categorize) can only contain attributes.
74+
// evaluatable groupings can only contain attributes.
7575
throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping);
7676
}
7777
}
@@ -145,7 +145,8 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
145145
List<? extends NamedExpression> lowerProjections
146146
) {
147147
assert upperGroupings.size() <= 1
148-
|| upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false
148+
|| upperGroupings.stream()
149+
.anyMatch(group -> group.anyMatch(expr -> expr instanceof GroupingFunction.NonEvaluatableGroupingFunction)) == false
149150
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
150151
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
151152
AttributeMap.Builder<Attribute> aliasesBuilder = AttributeMap.builder();
@@ -159,10 +160,10 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
159160
// Propagate any renames from the lower projection into the upper groupings.
160161
// This can lead to duplicates: e.g.
161162
// | EVAL x = y | STATS ... BY x, y
162-
// All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like
163-
// `c = CATEGORIZE(attribute)`.
163+
// All substitutions happen before; groupings must be attributes at this point except for non-evaluatable groupings which will be
164+
// an alias like `c = CATEGORIZE(attribute)`.
164165
// Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
165-
// TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for
166+
// TODO: The deduplication based on simple equality will be insufficient in case of multiple non-evaluatable groupings, e.g. for
166167
// `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
167168
LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
168169
for (NamedExpression ne : upperGroupings) {

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import org.elasticsearch.xpack.esql.core.expression.Literal;
1414
import org.elasticsearch.xpack.esql.core.expression.Nullability;
1515
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
16-
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
16+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
1717
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
1818
import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext;
1919

@@ -43,9 +43,9 @@ public Expression rule(Expression e, LogicalOptimizerContext ctx) {
4343
return Literal.of(in, null);
4444
}
4545
} else if (e instanceof Alias == false && e.nullable() == Nullability.TRUE
46-
// Categorize function stays as a STATS grouping (It isn't moved to an early EVAL like other groupings),
46+
// Non-evaluatable functions stay as a STATS grouping (It isn't moved to an early EVAL like other groupings),
4747
// so folding it to null would currently break the plan, as we don't create an attribute/channel for that null value.
48-
&& e instanceof Categorize == false
48+
&& e instanceof GroupingFunction.NonEvaluatableGroupingFunction == false
4949
&& Expressions.anyMatch(e.children(), Expressions::isGuaranteedNull)) {
5050
return Literal.of(e, null);
5151
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java

+17-14
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import org.elasticsearch.xpack.esql.core.tree.Source;
1717
import org.elasticsearch.xpack.esql.core.util.Holder;
1818
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
19-
import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize;
19+
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
2020
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
2121
import org.elasticsearch.xpack.esql.plan.logical.Eval;
2222
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -50,20 +50,20 @@ public ReplaceAggregateAggExpressionWithEval() {
5050

5151
@Override
5252
protected LogicalPlan rule(Aggregate aggregate) {
53-
// build alias map
53+
// an alias map for evaluatable grouping functions
5454
AttributeMap.Builder<Expression> aliasesBuilder = AttributeMap.builder();
55-
aggregate.forEachExpressionUp(Alias.class, a -> aliasesBuilder.put(a.toAttribute(), a.child()));
56-
var aliases = aliasesBuilder.build();
57-
58-
// Build Categorize grouping functions map.
59-
// Functions like BUCKET() shouldn't reach this point,
60-
// as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval
61-
Map<Categorize, Attribute> groupingAttributes = new HashMap<>();
55+
// a function map for non-evaluatable grouping functions
56+
Map<GroupingFunction.NonEvaluatableGroupingFunction, Attribute> nonEvalGroupingAttributes = new HashMap<>(
57+
aggregate.groupings().size()
58+
);
6259
aggregate.forEachExpressionUp(Alias.class, a -> {
63-
if (a.child() instanceof Categorize groupingFunction) {
64-
groupingAttributes.put(groupingFunction, a.toAttribute());
60+
if (a.child() instanceof GroupingFunction.NonEvaluatableGroupingFunction groupingFunction) {
61+
nonEvalGroupingAttributes.put(groupingFunction, a.toAttribute());
62+
} else {
63+
aliasesBuilder.put(a.toAttribute(), a.child());
6564
}
6665
});
66+
var aliases = aliasesBuilder.build();
6767

6868
// break down each aggregate into AggregateFunction and/or grouping key
6969
// preserve the projection at the end
@@ -123,8 +123,11 @@ protected LogicalPlan rule(Aggregate aggregate) {
123123
return alias.toAttribute();
124124
});
125125

126-
// replace grouping functions with their references
127-
aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get);
126+
// replace non-evaluatable grouping functions with their references
127+
aggExpression = aggExpression.transformUp(
128+
GroupingFunction.NonEvaluatableGroupingFunction.class,
129+
nonEvalGroupingAttributes::get
130+
);
128131

129132
Alias alias = as.replaceChild(aggExpression);
130133
newEvals.add(alias);
@@ -152,7 +155,7 @@ protected LogicalPlan rule(Aggregate aggregate) {
152155
return plan;
153156
}
154157

155-
static String syntheticName(Expression expression, Expression af, int counter) {
158+
private static String syntheticName(Expression expression, Expression af, int counter) {
156159
return TemporaryNameUtils.temporaryName(expression, af, counter);
157160
}
158161
}

0 commit comments

Comments
 (0)