Skip to content

Commit 56e19e2

Browse files
authored
ESQL: Improve error message for ( and [ (#124177) (#127620)
Due to recent grammar changes made ( token to no longer be reported by its text rather by his internal token name. Due to the use of pushMode, the symbol is not treated as a literal rather as a symbol. To address this, the parser listener looks at the error message and changes the message before returning it to the user. Fix #124145 Relates #123085 #121948
1 parent 4c8ac4c commit 56e19e2

File tree

2 files changed

+60
-7
lines changed

2 files changed

+60
-7
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java

+41
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.antlr.v4.runtime.Recognizer;
1515
import org.antlr.v4.runtime.Token;
1616
import org.antlr.v4.runtime.TokenSource;
17+
import org.antlr.v4.runtime.VocabularyImpl;
1718
import org.antlr.v4.runtime.atn.PredictionMode;
1819
import org.elasticsearch.logging.LogManager;
1920
import org.elasticsearch.logging.Logger;
@@ -24,6 +25,7 @@
2425

2526
import java.util.BitSet;
2627
import java.util.EmptyStackException;
28+
import java.util.Map;
2729
import java.util.function.BiFunction;
2830
import java.util.function.Function;
2931
import java.util.regex.Matcher;
@@ -46,6 +48,45 @@ public class EsqlParser {
4648
*/
4749
public static final int MAX_LENGTH = 1_000_000;
4850

51+
private static void replaceSymbolWithLiteral(Map<String, String> symbolReplacements, String[] literalNames, String[] symbolicNames) {
52+
for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) {
53+
String symName = symbolicNames[i];
54+
if (symName != null) {
55+
String replacement = symbolReplacements.get(symName);
56+
if (replacement != null && literalNames[i] == null) {
57+
// literals are single quoted
58+
literalNames[i] = "'" + replacement + "'";
59+
replacements--;
60+
}
61+
}
62+
}
63+
}
64+
65+
/**
66+
* Add the literal name to a number of tokens that due to ANTLR internals/ATN
67+
* have their symbolic name returns instead during error reporting.
68+
* When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
69+
* (if set), otherwise falls back to the literal one and eventually uses the symbol name.
70+
* Since the Vocabulary is static and not pluggable, this code modifies the underlying
71+
* arrays by setting the literal string manually based on the token index.
72+
* This is needed since some symbols, especially around setting up the mode, end up losing
73+
* their literal representation.
74+
* NB: this code is highly dependent on the ANTLR internals and thus will likely break
75+
* during upgrades.
76+
* NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
77+
*/
78+
static {
79+
Map<String, String> symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "[");
80+
81+
// the vocabularies have the same content however are different instances
82+
// for extra reliability, perform the replacement for each map
83+
VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY;
84+
replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames());
85+
86+
VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY;
87+
replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames());
88+
}
89+
4990
private EsqlConfig config = new EsqlConfig();
5091

5192
public EsqlConfig config() {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java

+19-7
Original file line numberDiff line numberDiff line change
@@ -793,11 +793,8 @@ public void testInvalidQuotingAsFromIndexPattern() {
793793
expectError("FROM \"foo\"bar\"", ": token recognition error at: '\"'");
794794
expectError("FROM \"foo\"\"bar\"", ": extraneous input '\"bar\"' expecting <EOF>");
795795

796-
expectError("FROM \"\"\"foo\"\"\"bar\"\"\"", ": mismatched input 'bar' expecting {<EOF>, '|', ',', OPENING_BRACKET, 'metadata'}");
797-
expectError(
798-
"FROM \"\"\"foo\"\"\"\"\"\"bar\"\"\"",
799-
": mismatched input '\"bar\"' expecting {<EOF>, '|', ',', OPENING_BRACKET, 'metadata'}"
800-
);
796+
expectError("FROM \"\"\"foo\"\"\"bar\"\"\"", ": mismatched input 'bar' expecting {<EOF>, '|', ',', '[', 'metadata'}");
797+
expectError("FROM \"\"\"foo\"\"\"\"\"\"bar\"\"\"", ": mismatched input '\"bar\"' expecting {<EOF>, '|', ',', '[', 'metadata'}");
801798
}
802799

803800
public void testInvalidQuotingAsMetricsIndexPattern() {
@@ -1087,7 +1084,7 @@ public void testMetadataFieldOnOtherSources() {
10871084
expectError("show info metadata _index", "line 1:11: token recognition error at: 'm'");
10881085
expectError(
10891086
"explain [from foo] metadata _index",
1090-
"line 1:20: mismatched input 'metadata' expecting {'|', ',', OPENING_BRACKET, ']', 'metadata'}"
1087+
"line 1:20: mismatched input 'metadata' expecting {'|', ',', '[', ']', 'metadata'}"
10911088
);
10921089
}
10931090

@@ -3044,7 +3041,7 @@ public void testNamedFunctionArgumentInInvalidPositions() {
30443041
String map = "{\"option1\":\"string\", \"option2\":1}";
30453042

30463043
Map<String, String> commands = Map.ofEntries(
3047-
Map.entry("from {}", "line 1:7: mismatched input '\"option1\"' expecting {<EOF>, '|', ',', OPENING_BRACKET, 'metadata'}"),
3044+
Map.entry("from {}", "line 1:7: mismatched input '\"option1\"' expecting {<EOF>, '|', ',', '[', 'metadata'}"),
30483045
Map.entry("row x = {}", "line 1:9: extraneous input '{' expecting {QUOTED_STRING, INTEGER_LITERAL"),
30493046
Map.entry("eval x = {}", "line 1:22: extraneous input '{' expecting {QUOTED_STRING, INTEGER_LITERAL"),
30503047
Map.entry("where x > {}", "line 1:23: no viable alternative at input 'x > {'"),
@@ -3951,4 +3948,19 @@ public void testUnclosedParenthesis() {
39513948
expectError(q, "Invalid query");
39523949
}
39533950
}
3951+
3952+
// [ and ( are used to trigger a double mode causing their symbol name (instead of text) to be used in error reporting
3953+
// this test checks that their are properly replaced in the error message
3954+
public void testPreserveParanthesis() {
3955+
// test for (
3956+
expectError("row a = 1 not in", "line 1:17: mismatched input '<EOF>' expecting '('");
3957+
expectError("row a = 1 | where a not in", "line 1:27: mismatched input '<EOF>' expecting '('");
3958+
expectError("row a = 1 | where a not in (1", "line 1:30: mismatched input '<EOF>' expecting {',', ')'}");
3959+
expectError("row a = 1 | where a not in [1", "line 1:28: missing '(' at '['");
3960+
expectError("row a = 1 | where a not in 123", "line 1:28: missing '(' at '123'");
3961+
// test for [
3962+
expectError("explain", "line 1:8: mismatched input '<EOF>' expecting '['");
3963+
expectError("explain ]", "line 1:9: token recognition error at: ']'");
3964+
expectError("explain [row x = 1", "line 1:19: missing ']' at '<EOF>'");
3965+
}
39543966
}

0 commit comments

Comments
 (0)