Skip to content

Commit 2fff041

Browse files
authored
ESQL: Push down StartsWith and EndsWith functions to Lucene (#123381)
Fixes #123067 Just like WildcardLike and RLike, some functions can be converted to Lucene queries. Here it's those two, which are nearly identical to WildcardLike This, like some other functions, needs a FoldContext. I'm using the static method for this here, but it's fixed in #123398, which I kept separated as it changes many files
1 parent 6cb5f83 commit 2fff041

File tree

7 files changed

+387
-2
lines changed

7 files changed

+387
-2
lines changed

docs/changelog/123381.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 123381
2+
summary: Push down `StartsWith` and `EndsWith` functions to Lucene
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 123067

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,46 @@ false | null
954954
false | null
955955
;
956956

957+
startsWithLucenePushdown
958+
959+
from hosts
960+
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
961+
| keep host, host_group
962+
| sort host, host_group;
963+
964+
host:keyword | host_group:text
965+
beta | Kubernetes cluster
966+
beta | Kubernetes cluster
967+
beta | Kubernetes cluster
968+
;
969+
970+
startsWithLuceneDisabledPushdown
971+
972+
from hosts
973+
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
974+
| keep host, host_group
975+
| sort host, host_group;
976+
977+
host:keyword | host_group:text
978+
beta | Kubernetes cluster
979+
beta | Kubernetes cluster
980+
beta | Kubernetes cluster
981+
;
982+
983+
startsWithLucenePushdownIgnoreMultivalues
984+
985+
from hosts
986+
| where starts_with(description, "epsilon")
987+
| keep description
988+
| sort description;
989+
990+
warning:Line 2:9: evaluation of [starts_with(description, \"epsilon\")] failed, treating result as null. Only first 20 failures recorded.
991+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
992+
993+
description:text
994+
epsilon gw instance
995+
;
996+
957997
substringOfText
958998

959999
from hosts | where host=="epsilon" | eval l1 = substring(host_group, 0, 5), l2 = substring(description, 0, 5) | keep l1, l2;
@@ -1199,6 +1239,138 @@ Bernatsky |false
11991239
;
12001240

12011241

1242+
endsWithLucenePushdown
1243+
1244+
from hosts
1245+
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
1246+
| keep host, host_group
1247+
| sort host, host_group;
1248+
1249+
host:keyword | host_group:text
1250+
beta | Kubernetes cluster
1251+
beta | Kubernetes cluster
1252+
beta | Kubernetes cluster
1253+
;
1254+
1255+
endsWithLuceneDisabledPushdown
1256+
1257+
from hosts
1258+
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
1259+
| keep host, host_group
1260+
| sort host, host_group;
1261+
1262+
host:keyword | host_group:text
1263+
beta | Kubernetes cluster
1264+
beta | Kubernetes cluster
1265+
beta | Kubernetes cluster
1266+
;
1267+
1268+
endsWithLucenePushdownIgnoreMultivalues
1269+
1270+
from hosts
1271+
| where ends_with(description, "host")
1272+
| keep description
1273+
| sort description;
1274+
1275+
warning:Line 2:9: evaluation of [ends_with(description, \"host\")] failed, treating result as null. Only first 20 failures recorded.
1276+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
1277+
1278+
description:text
1279+
;
1280+
1281+
1282+
lucenePushdownMultipleWhere
1283+
1284+
from hosts
1285+
| where starts_with(host, "bet")
1286+
| keep host, host_group
1287+
| sort host, host_group
1288+
| where ends_with(host_group, "cluster");
1289+
1290+
host:keyword | host_group:text
1291+
beta | Kubernetes cluster
1292+
beta | Kubernetes cluster
1293+
beta | Kubernetes cluster
1294+
;
1295+
1296+
lucenePushdownMultipleIndices
1297+
1298+
from airports* metadata _index
1299+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1300+
| keep abbrev, name, _index
1301+
| sort abbrev, name, _index;
1302+
1303+
abbrev:keyword | name:text | _index:keyword
1304+
LUH | Sahnewal | airports
1305+
LUH | Sahnewal | airports_mp
1306+
LUH | Sahnewal | airports_no_doc_values
1307+
LUH | Sahnewal | airports_not_indexed
1308+
LUH | Sahnewal | airports_not_indexed_nor_doc_values
1309+
LUH | Sahnewal | airports_web
1310+
;
1311+
1312+
lucenePushdownOr
1313+
1314+
from airports
1315+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH")
1316+
| keep abbrev, name
1317+
| sort abbrev, name;
1318+
1319+
abbrev:keyword | name:text
1320+
AUH | Abu Dhabi Int'l
1321+
LUH | Sahnewal
1322+
RUH | King Khalid Int'l
1323+
;
1324+
1325+
lucenePushdownMultipleOr
1326+
1327+
from airports
1328+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH") or starts_with(abbrev, "OOL")
1329+
| keep abbrev, name
1330+
| sort abbrev, name;
1331+
1332+
abbrev:keyword | name:text
1333+
AUH | Abu Dhabi Int'l
1334+
LUH | Sahnewal
1335+
OOL | Gold Coast
1336+
RUH | King Khalid Int'l
1337+
;
1338+
1339+
lucenePushdownMultipleAnd
1340+
1341+
from airports metadata _index
1342+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1343+
| where ends_with(name::keyword, "al")
1344+
| keep abbrev, name, _index
1345+
| sort abbrev, name, _index;
1346+
1347+
abbrev:keyword | name:text | _index:keyword
1348+
LUH | Sahnewal | airports
1349+
;
1350+
1351+
lucenePushdownMixAndOr
1352+
1353+
from airports
1354+
| where starts_with(name::keyword, "Sahn") and (starts_with(name::keyword, "Abc") or ends_with(abbrev, "UH"))
1355+
| keep abbrev, name, scalerank
1356+
| sort abbrev, name;
1357+
1358+
abbrev:keyword | name:text | scalerank:integer
1359+
LUH | Sahnewal | 9
1360+
;
1361+
1362+
lucenePushdownMixOrAnd
1363+
1364+
from airports* metadata _index
1365+
| where starts_with(name::keyword, "Sahn") or (starts_with(abbrev, "G") and ends_with(name::keyword, "Falls Int'l"))
1366+
| where ends_with(_index, "airports")
1367+
| keep abbrev, name, scalerank, _index
1368+
| sort abbrev;
1369+
1370+
abbrev:keyword | name:text | scalerank:integer | _index:keyword
1371+
GTF | Great Falls Int'l | 8 | airports
1372+
LUH | Sahnewal | 9 | airports
1373+
;
12021374

12031375
toLowerRow#[skip:-8.12.99]
12041376
// tag::to_lower[]

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class EndsWith extends EsqlScalarFunction {
43+
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);
3645

3746
private final Expression str;
@@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
129138
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
130139
}
131140

141+
@Override
142+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
143+
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
144+
}
145+
146+
@Override
147+
public Query asQuery(TranslatorHandler handler) {
148+
LucenePushdownPredicates.checkIsPushableAttribute(str);
149+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
150+
151+
// TODO: Get the real FoldContext here
152+
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));
153+
154+
return new WildcardQuery(source(), fieldName, wildcardQuery);
155+
}
156+
157+
@Override
158+
public Expression singleValueField() {
159+
return str;
160+
}
161+
132162
Expression str() {
133163
return str;
134164
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWith.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class StartsWith extends EsqlScalarFunction {
43+
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3645
Expression.class,
3746
"StartsWith",
@@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
126135
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
127136
}
128137

138+
@Override
139+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
140+
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
141+
}
142+
143+
@Override
144+
public Query asQuery(TranslatorHandler handler) {
145+
LucenePushdownPredicates.checkIsPushableAttribute(str);
146+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
147+
148+
// TODO: Get the real FoldContext here
149+
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";
150+
151+
return new WildcardQuery(source(), fieldName, wildcardQuery);
152+
}
153+
154+
@Override
155+
public Expression singleValueField() {
156+
return str;
157+
}
158+
129159
Expression str() {
130160
return str;
131161
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithTests.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,21 @@
1212

1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xpack.esql.core.expression.Expression;
15+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
16+
import org.elasticsearch.xpack.esql.core.expression.Literal;
17+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1518
import org.elasticsearch.xpack.esql.core.tree.Source;
1619
import org.elasticsearch.xpack.esql.core.type.DataType;
20+
import org.elasticsearch.xpack.esql.core.type.EsField;
1721
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
1822
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
23+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
24+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
1925
import org.hamcrest.Matcher;
2026

2127
import java.util.LinkedList;
2228
import java.util.List;
29+
import java.util.Map;
2330
import java.util.function.Supplier;
2431

2532
import static org.hamcrest.Matchers.equalTo;
@@ -98,4 +105,38 @@ private static TestCaseSupplier.TestCase testCase(
98105
protected Expression build(Source source, List<Expression> args) {
99106
return new EndsWith(source, args.get(0), args.get(1));
100107
}
108+
109+
public void testLuceneQuery_AllLiterals_NonTranslatable() {
110+
var function = new EndsWith(
111+
Source.EMPTY,
112+
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
113+
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
114+
);
115+
116+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
117+
}
118+
119+
public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() {
120+
var function = new EndsWith(
121+
Source.EMPTY,
122+
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
123+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true))
124+
);
125+
126+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
127+
}
128+
129+
public void testLuceneQuery_NonFoldableSuffix_Translatable() {
130+
var function = new EndsWith(
131+
Source.EMPTY,
132+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
133+
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
134+
);
135+
136+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));
137+
138+
var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
139+
140+
assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
141+
}
101142
}

0 commit comments

Comments
 (0)