Skip to content

Commit 8c57db8

Browse files
ES|QL: fix join masking eval (#126614) (#127149)
1 parent 13ba48e commit 8c57db8

File tree

6 files changed

+212
-46
lines changed

6 files changed

+212
-46
lines changed

docs/changelog/126614.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126614
2+
summary: Fix join masking eval
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/generative/GenerativeRestTest.java

+7-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,13 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
4444
// Awaiting fixes
4545
"estimated row size \\[0\\] wasn't set", // https://github.com/elastic/elasticsearch/issues/121739
4646
"unknown physical plan node \\[OrderExec\\]", // https://github.com/elastic/elasticsearch/issues/120817
47-
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741
48-
//
47+
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
48+
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
49+
"only supports KEYWORD or TEXT values, found expression", // https://github.com/elastic/elasticsearch/issues/126017
50+
"token recognition error at: '``", // https://github.com/elastic/elasticsearch/issues/125870
51+
"Unknown column \\[.*\\]", // https://github.com/elastic/elasticsearch/issues/126026
52+
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
53+
"No matches found for pattern", // https://github.com/elastic/elasticsearch/issues/126418
4954
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
5055
);
5156

x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec

+42
Original file line numberDiff line numberDiff line change
@@ -1502,3 +1502,45 @@ from *
15021502
salary_change.long:double|foo:long
15031503
5.0 |1698069301543123456
15041504
;
1505+
1506+
1507+
joinMaskingEval
1508+
required_capability: join_lookup_v12
1509+
required_capability: fix_join_masking_eval
1510+
from languag*
1511+
| eval type = null
1512+
| rename language_name as message
1513+
| lookup join message_types_lookup on message
1514+
| rename type as message
1515+
| lookup join message_types_lookup on message
1516+
| keep `language.name`
1517+
;
1518+
1519+
ignoreOrder:true
1520+
language.name:text
1521+
null
1522+
null
1523+
null
1524+
null
1525+
null
1526+
null
1527+
null
1528+
null
1529+
null
1530+
null
1531+
null
1532+
null
1533+
null
1534+
null
1535+
null
1536+
null
1537+
null
1538+
null
1539+
null
1540+
null
1541+
null
1542+
English
1543+
French
1544+
Spanish
1545+
German
1546+
;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,13 @@ public enum Cap {
711711
* the ownership of that block - but didn't account for the fact that the caller might close it, leading to double releases
712712
* in some union type queries. C.f. https://github.com/elastic/elasticsearch/issues/125850
713713
*/
714-
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER;
714+
FIX_DOUBLY_RELEASED_NULL_BLOCKS_IN_VALUESOURCEREADER,
715+
716+
/**
717+
* During resolution (pre-analysis) we have to consider that joins or enriches can override EVALuated values
718+
* https://github.com/elastic/elasticsearch/issues/126419
719+
*/
720+
FIX_JOIN_MASKING_EVAL;
715721

716722
private final boolean enabled;
717723

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java

+64-13
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,20 @@
5858
import org.elasticsearch.xpack.esql.parser.QueryParams;
5959
import org.elasticsearch.xpack.esql.plan.IndexPattern;
6060
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
61+
import org.elasticsearch.xpack.esql.plan.logical.Drop;
6162
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
63+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
64+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
65+
import org.elasticsearch.xpack.esql.plan.logical.InlineStats;
6266
import org.elasticsearch.xpack.esql.plan.logical.Keep;
67+
import org.elasticsearch.xpack.esql.plan.logical.Limit;
6368
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
69+
import org.elasticsearch.xpack.esql.plan.logical.MvExpand;
70+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
6471
import org.elasticsearch.xpack.esql.plan.logical.Project;
6572
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
73+
import org.elasticsearch.xpack.esql.plan.logical.Rename;
74+
import org.elasticsearch.xpack.esql.plan.logical.TopN;
6675
import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation;
6776
import org.elasticsearch.xpack.esql.plan.logical.join.InlineJoin;
6877
import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes;
@@ -573,6 +582,8 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
573582
AttributeSet keepJoinReferences = new AttributeSet();
574583
Set<String> wildcardJoinIndices = new java.util.HashSet<>();
575584

585+
boolean[] canRemoveAliases = new boolean[] { true };
586+
576587
parsed.forEachDown(p -> {// go over each plan top-down
577588
if (p instanceof RegexExtract re) { // for Grok and Dissect
578589
// remove other down-the-tree references to the extracted fields
@@ -618,20 +629,37 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
618629
}
619630
}
620631

621-
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
622-
// for example "from test | eval x = salary | stats max = max(x) by gender"
623-
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
624-
AttributeSet planRefs = p.references();
625-
Set<String> fieldNames = planRefs.names();
626-
p.forEachExpressionDown(Alias.class, alias -> {
627-
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id"
628-
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
629-
if (fieldNames.contains(alias.name())) {
630-
return;
631-
}
632-
references.removeIf(attr -> matchByName(attr, alias.name(), keepCommandReferences.contains(attr)));
633-
});
632+
// If the current node in the tree is of type JOIN (lookup join, inlinestats) or ENRICH or other type of
633+
// command that we may add in the future which can override already defined Aliases with EVAL
634+
// (for example
635+
//
636+
// from test
637+
// | eval ip = 123
638+
// | enrich ips_policy ON hostname
639+
// | rename ip AS my_ip
640+
//
641+
// and ips_policy enriches the results with the same name ip field),
642+
// these aliases should be kept in the list of fields.
643+
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
644+
canRemoveAliases[0] = false;
645+
}
646+
if (canRemoveAliases[0]) {
647+
// remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree
648+
// for example "from test | eval x = salary | stats max = max(x) by gender"
649+
// remove the UnresolvedAttribute "x", since that is an Alias defined in "eval"
650+
AttributeSet planRefs = p.references();
651+
Set<String> fieldNames = planRefs.names();
652+
p.forEachExpressionDown(Alias.class, alias -> {
653+
// do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id AS id"
654+
// or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)"
655+
if (fieldNames.contains(alias.name())) {
656+
return;
657+
}
658+
references.removeIf(attr -> matchByName(attr, alias.name(), keepCommandReferences.contains(attr)));
659+
});
660+
}
634661
});
662+
635663
// Add JOIN ON column references afterward to avoid Alias removal
636664
references.addAll(keepJoinReferences);
637665
// If any JOIN commands need wildcard field-caps calls, persist the index names
@@ -655,6 +683,29 @@ static PreAnalysisResult fieldNames(LogicalPlan parsed, Set<String> enrichPolicy
655683
}
656684
}
657685

686+
/**
687+
* Could a plan "accidentally" override aliases?
688+
* Examples are JOIN and ENRICH, that _could_ produce fields with the same
689+
* name of an existing alias, based on their index mapping.
690+
* Here we just have to consider commands where this information is not available before index resolution,
691+
* eg. EVAL, GROK, DISSECT can override an alias, but we know it in advance, ie. we don't need to resolve indices to know.
692+
*/
693+
private static boolean couldOverrideAliases(LogicalPlan p) {
694+
return (p instanceof Aggregate
695+
|| p instanceof Drop
696+
|| p instanceof Eval
697+
|| p instanceof Filter
698+
|| p instanceof InlineStats
699+
|| p instanceof Keep
700+
|| p instanceof Limit
701+
|| p instanceof MvExpand
702+
|| p instanceof OrderBy
703+
|| p instanceof Project
704+
|| p instanceof RegexExtract
705+
|| p instanceof Rename
706+
|| p instanceof TopN) == false;
707+
}
708+
658709
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
659710
boolean isPattern = Regex.isSimpleMatchPattern(attr.name());
660711
if (skipIfPattern && isPattern) {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java

+87-30
Original file line numberDiff line numberDiff line change
@@ -478,13 +478,16 @@ public void testDropAllColumns_WithStats() {
478478
}
479479

480480
public void testEnrichOn() {
481-
assertFieldNames("""
482-
from employees
483-
| sort emp_no
484-
| limit 1
485-
| eval x = to_string(languages)
486-
| enrich languages_policy on x
487-
| keep emp_no, language_name""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
481+
assertFieldNames(
482+
"""
483+
from employees
484+
| sort emp_no
485+
| limit 1
486+
| eval x = to_string(languages)
487+
| enrich languages_policy on x
488+
| keep emp_no, language_name""",
489+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
490+
);
488491
}
489492

490493
public void testEnrichOn2() {
@@ -494,7 +497,7 @@ public void testEnrichOn2() {
494497
| enrich languages_policy on x
495498
| keep emp_no, language_name
496499
| sort emp_no
497-
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
500+
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*"));
498501
}
499502

500503
public void testUselessEnrich() {
@@ -512,15 +515,15 @@ public void testSimpleSortLimit() {
512515
| enrich languages_policy on x
513516
| keep emp_no, language_name
514517
| sort emp_no
515-
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
518+
| limit 1""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*"));
516519
}
517520

518521
public void testWith() {
519522
assertFieldNames(
520523
"""
521524
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 1
522525
| enrich languages_policy on x with language_name""",
523-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
526+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
524527
);
525528
}
526529

@@ -529,7 +532,7 @@ public void testWithAlias() {
529532
"""
530533
from employees | sort emp_no | limit 3 | eval x = to_string(languages) | keep emp_no, x
531534
| enrich languages_policy on x with lang = language_name""",
532-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
535+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
533536
);
534537
}
535538

@@ -538,7 +541,7 @@ public void testWithAliasSort() {
538541
"""
539542
from employees | eval x = to_string(languages) | keep emp_no, x | sort emp_no | limit 3
540543
| enrich languages_policy on x with lang = language_name""",
541-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
544+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
542545
);
543546
}
544547

@@ -547,7 +550,7 @@ public void testWithAliasAndPlain() {
547550
"""
548551
from employees | sort emp_no desc | limit 3 | eval x = to_string(languages) | keep emp_no, x
549552
| enrich languages_policy on x with lang = language_name, language_name""",
550-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
553+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
551554
);
552555
}
553556

@@ -556,7 +559,7 @@ public void testWithTwoAliasesSameProp() {
556559
"""
557560
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
558561
| enrich languages_policy on x with lang = language_name, lang2 = language_name""",
559-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
562+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
560563
);
561564
}
562565

@@ -565,7 +568,7 @@ public void testRedundantWith() {
565568
"""
566569
from employees | sort emp_no | limit 1 | eval x = to_string(languages) | keep emp_no, x
567570
| enrich languages_policy on x with language_name, language_name""",
568-
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*")
571+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "x.*")
569572
);
570573
}
571574

@@ -588,28 +591,34 @@ public void testConstantNullInput() {
588591
| eval x = to_string(languages)
589592
| keep emp_no, x
590593
| enrich languages_policy on x with language_name, language_name""",
591-
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*")
594+
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*")
592595
);
593596
}
594597

595598
public void testEnrichEval() {
596-
assertFieldNames("""
597-
from employees
598-
| eval x = to_string(languages)
599-
| enrich languages_policy on x with lang = language_name
600-
| eval language = concat(x, "-", lang)
601-
| keep emp_no, x, lang, language
602-
| sort emp_no desc | limit 3""", Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*"));
599+
assertFieldNames(
600+
"""
601+
from employees
602+
| eval x = to_string(languages)
603+
| enrich languages_policy on x with lang = language_name
604+
| eval language = concat(x, "-", lang)
605+
| keep emp_no, x, lang, language
606+
| sort emp_no desc | limit 3""",
607+
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
608+
);
603609
}
604610

605611
public void testSimple() {
606-
assertFieldNames("""
607-
from employees
608-
| eval x = 1, y = to_string(languages)
609-
| enrich languages_policy on y
610-
| where x > 1
611-
| keep emp_no, language_name
612-
| limit 1""", Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*"));
612+
assertFieldNames(
613+
"""
614+
from employees
615+
| eval x = 1, y = to_string(languages)
616+
| enrich languages_policy on y
617+
| where x > 1
618+
| keep emp_no, language_name
619+
| limit 1""",
620+
Set.of("emp_no", "emp_no.*", "languages", "languages.*", "language_name", "language_name.*", "x", "y", "x.*", "y.*")
621+
);
613622
}
614623

615624
public void testEvalNullSort() {
@@ -1575,6 +1584,54 @@ public void testMultiLookupJoinSameIndexKeepAfter() {
15751584
);
15761585
}
15771586

1587+
public void testJoinMaskingKeep() {
1588+
assertFieldNames(
1589+
"""
1590+
from languag*
1591+
| eval type = null
1592+
| rename language_name as message
1593+
| lookup join message_types_lookup on message
1594+
| rename type as message
1595+
| lookup join message_types_lookup on message
1596+
| keep `language.name`""",
1597+
Set.of("language.name", "type", "language_name", "message", "language_name.*", "message.*", "type.*", "language.name.*")
1598+
);
1599+
}
1600+
1601+
public void testJoinMaskingKeep2() {
1602+
assertFieldNames("""
1603+
from languag*
1604+
| eval type = "foo"
1605+
| rename type as message
1606+
| lookup join message_types_lookup on message
1607+
| rename type as message
1608+
| lookup join message_types_lookup on message
1609+
| keep `language.name`""", Set.of("language.name", "type", "message", "message.*", "type.*", "language.name.*"));
1610+
}
1611+
1612+
public void testEnrichMaskingEvalOn() {
1613+
assertFieldNames("""
1614+
from employees
1615+
| eval language_name = null
1616+
| enrich languages_policy on languages
1617+
| rename language_name as languages
1618+
| eval languages = length(languages)
1619+
| enrich languages_policy on languages
1620+
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
1621+
}
1622+
1623+
public void testEnrichAndJoinMaskingEvalWh() {
1624+
assertFieldNames("""
1625+
from employees
1626+
| eval language_name = null
1627+
| enrich languages_policy on languages
1628+
| rename language_name as languages
1629+
| eval languages = length(languages)
1630+
| enrich languages_policy on languages
1631+
| lookup join message_types_lookup on language_name
1632+
| keep emp_no, language_name""", Set.of("emp_no", "language_name", "languages", "language_name.*", "languages.*", "emp_no.*"));
1633+
}
1634+
15781635
private Set<String> fieldNames(String query, Set<String> enrichPolicyMatchFields) {
15791636
var preAnalysisResult = new EsqlSession.PreAnalysisResult(null);
15801637
return EsqlSession.fieldNames(parser.createStatement(query), enrichPolicyMatchFields, preAnalysisResult).fieldNames();

0 commit comments

Comments
 (0)