Skip to content

Commit b9c61dc

Browse files
authored
ESQL: Push down StartsWith and EndsWith functions to Lucene (#123381) (#124583)
Fixes #123067 Just like WildcardLike and RLike, some functions can be converted to Lucene queries. Here it's those two, which are nearly identical to WildcardLike This, like some other functions, needs a FoldContext. I'm using the static method for this here, but it's fixed in #123398, which I kept separated as it changes many files
1 parent a4af548 commit b9c61dc

File tree

7 files changed

+387
-2
lines changed

7 files changed

+387
-2
lines changed

docs/changelog/123381.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 123381
2+
summary: Push down `StartsWith` and `EndsWith` functions to Lucene
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 123067

x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec

+172
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,46 @@ false | null
953953
false | null
954954
;
955955

956+
startsWithLucenePushdown
957+
958+
from hosts
959+
| where starts_with(host, "bet") and starts_with(host_group, "Kuber")
960+
| keep host, host_group
961+
| sort host, host_group;
962+
963+
host:keyword | host_group:text
964+
beta | Kubernetes cluster
965+
beta | Kubernetes cluster
966+
beta | Kubernetes cluster
967+
;
968+
969+
startsWithLuceneDisabledPushdown
970+
971+
from hosts
972+
| where host == "unknown host" or (starts_with(host, "bet") and starts_with(host_group, "Kuber"))
973+
| keep host, host_group
974+
| sort host, host_group;
975+
976+
host:keyword | host_group:text
977+
beta | Kubernetes cluster
978+
beta | Kubernetes cluster
979+
beta | Kubernetes cluster
980+
;
981+
982+
startsWithLucenePushdownIgnoreMultivalues
983+
984+
from hosts
985+
| where starts_with(description, "epsilon")
986+
| keep description
987+
| sort description;
988+
989+
warning:Line 2:9: evaluation of [starts_with(description, \"epsilon\")] failed, treating result as null. Only first 20 failures recorded.
990+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
991+
992+
description:text
993+
epsilon gw instance
994+
;
995+
956996
substringOfText
957997

958998
from hosts | where host=="epsilon" | eval l1 = substring(host_group, 0, 5), l2 = substring(description, 0, 5) | keep l1, l2;
@@ -1179,6 +1219,138 @@ Bernatsky |false
11791219
;
11801220

11811221

1222+
endsWithLucenePushdown
1223+
1224+
from hosts
1225+
| where ends_with(host, "ta") and ends_with(host_group, "cluster")
1226+
| keep host, host_group
1227+
| sort host, host_group;
1228+
1229+
host:keyword | host_group:text
1230+
beta | Kubernetes cluster
1231+
beta | Kubernetes cluster
1232+
beta | Kubernetes cluster
1233+
;
1234+
1235+
endsWithLuceneDisabledPushdown
1236+
1237+
from hosts
1238+
| where host == "unknown host" or (ends_with(host, "ta") and ends_with(host_group, "cluster"))
1239+
| keep host, host_group
1240+
| sort host, host_group;
1241+
1242+
host:keyword | host_group:text
1243+
beta | Kubernetes cluster
1244+
beta | Kubernetes cluster
1245+
beta | Kubernetes cluster
1246+
;
1247+
1248+
endsWithLucenePushdownIgnoreMultivalues
1249+
1250+
from hosts
1251+
| where ends_with(description, "host")
1252+
| keep description
1253+
| sort description;
1254+
1255+
warning:Line 2:9: evaluation of [ends_with(description, \"host\")] failed, treating result as null. Only first 20 failures recorded.
1256+
warning:Line 2:9: java.lang.IllegalArgumentException: single-value function encountered multi-value
1257+
1258+
description:text
1259+
;
1260+
1261+
1262+
lucenePushdownMultipleWhere
1263+
1264+
from hosts
1265+
| where starts_with(host, "bet")
1266+
| keep host, host_group
1267+
| sort host, host_group
1268+
| where ends_with(host_group, "cluster");
1269+
1270+
host:keyword | host_group:text
1271+
beta | Kubernetes cluster
1272+
beta | Kubernetes cluster
1273+
beta | Kubernetes cluster
1274+
;
1275+
1276+
lucenePushdownMultipleIndices
1277+
1278+
from airports* metadata _index
1279+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1280+
| keep abbrev, name, _index
1281+
| sort abbrev, name, _index;
1282+
1283+
abbrev:keyword | name:text | _index:keyword
1284+
LUH | Sahnewal | airports
1285+
LUH | Sahnewal | airports_mp
1286+
LUH | Sahnewal | airports_no_doc_values
1287+
LUH | Sahnewal | airports_not_indexed
1288+
LUH | Sahnewal | airports_not_indexed_nor_doc_values
1289+
LUH | Sahnewal | airports_web
1290+
;
1291+
1292+
lucenePushdownOr
1293+
1294+
from airports
1295+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH")
1296+
| keep abbrev, name
1297+
| sort abbrev, name;
1298+
1299+
abbrev:keyword | name:text
1300+
AUH | Abu Dhabi Int'l
1301+
LUH | Sahnewal
1302+
RUH | King Khalid Int'l
1303+
;
1304+
1305+
lucenePushdownMultipleOr
1306+
1307+
from airports
1308+
| where starts_with(name::keyword, "Sahn") or ends_with(abbrev, "UH") or starts_with(abbrev, "OOL")
1309+
| keep abbrev, name
1310+
| sort abbrev, name;
1311+
1312+
abbrev:keyword | name:text
1313+
AUH | Abu Dhabi Int'l
1314+
LUH | Sahnewal
1315+
OOL | Gold Coast
1316+
RUH | King Khalid Int'l
1317+
;
1318+
1319+
lucenePushdownMultipleAnd
1320+
1321+
from airports metadata _index
1322+
| where starts_with(name::keyword, "Sahn") and ends_with(abbrev, "UH")
1323+
| where ends_with(name::keyword, "al")
1324+
| keep abbrev, name, _index
1325+
| sort abbrev, name, _index;
1326+
1327+
abbrev:keyword | name:text | _index:keyword
1328+
LUH | Sahnewal | airports
1329+
;
1330+
1331+
lucenePushdownMixAndOr
1332+
1333+
from airports
1334+
| where starts_with(name::keyword, "Sahn") and (starts_with(name::keyword, "Abc") or ends_with(abbrev, "UH"))
1335+
| keep abbrev, name, scalerank
1336+
| sort abbrev, name;
1337+
1338+
abbrev:keyword | name:text | scalerank:integer
1339+
LUH | Sahnewal | 9
1340+
;
1341+
1342+
lucenePushdownMixOrAnd
1343+
1344+
from airports* metadata _index
1345+
| where starts_with(name::keyword, "Sahn") or (starts_with(abbrev, "G") and ends_with(name::keyword, "Falls Int'l"))
1346+
| where ends_with(_index, "airports")
1347+
| keep abbrev, name, scalerank, _index
1348+
| sort abbrev;
1349+
1350+
abbrev:keyword | name:text | scalerank:integer | _index:keyword
1351+
GTF | Great Falls Int'l | 8 | airports
1352+
LUH | Sahnewal | 9 | airports
1353+
;
11821354

11831355
toLowerRow#[skip:-8.12.99]
11841356
// tag::to_lower[]

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWith.java

+31-1
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class EndsWith extends EsqlScalarFunction {
43+
public class EndsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "EndsWith", EndsWith::new);
3645

3746
private final Expression str;
@@ -129,6 +138,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
129138
return new EndsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(suffix));
130139
}
131140

141+
@Override
142+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
143+
return pushdownPredicates.isPushableAttribute(str) && suffix.foldable();
144+
}
145+
146+
@Override
147+
public Query asQuery(TranslatorHandler handler) {
148+
LucenePushdownPredicates.checkIsPushableAttribute(str);
149+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
150+
151+
// TODO: Get the real FoldContext here
152+
var wildcardQuery = "*" + QueryParser.escape(BytesRefs.toString(suffix.fold(FoldContext.small())));
153+
154+
return new WildcardQuery(source(), fieldName, wildcardQuery);
155+
}
156+
157+
@Override
158+
public Expression singleValueField() {
159+
return str;
160+
}
161+
132162
Expression str() {
133163
return str;
134164
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/StartsWith.java

+31-1
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,20 @@
77

88
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
99

10+
import org.apache.lucene.queryparser.classic.QueryParser;
1011
import org.apache.lucene.util.BytesRef;
1112
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
1213
import org.elasticsearch.common.io.stream.StreamInput;
1314
import org.elasticsearch.common.io.stream.StreamOutput;
15+
import org.elasticsearch.common.lucene.BytesRefs;
1416
import org.elasticsearch.compute.ann.Evaluator;
1517
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
18+
import org.elasticsearch.xpack.esql.capabilities.TranslationAware;
1619
import org.elasticsearch.xpack.esql.core.expression.Expression;
20+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
21+
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
22+
import org.elasticsearch.xpack.esql.core.querydsl.query.Query;
23+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1724
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
1825
import org.elasticsearch.xpack.esql.core.tree.Source;
1926
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -22,6 +29,8 @@
2229
import org.elasticsearch.xpack.esql.expression.function.Param;
2330
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
2431
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
32+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
33+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
2534

2635
import java.io.IOException;
2736
import java.util.Arrays;
@@ -31,7 +40,7 @@
3140
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3241
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;
3342

34-
public class StartsWith extends EsqlScalarFunction {
43+
public class StartsWith extends EsqlScalarFunction implements TranslationAware.SingleValueTranslationAware {
3544
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
3645
Expression.class,
3746
"StartsWith",
@@ -126,6 +135,27 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
126135
return new StartsWithEvaluator.Factory(source(), toEvaluator.apply(str), toEvaluator.apply(prefix));
127136
}
128137

138+
@Override
139+
public boolean translatable(LucenePushdownPredicates pushdownPredicates) {
140+
return pushdownPredicates.isPushableAttribute(str) && prefix.foldable();
141+
}
142+
143+
@Override
144+
public Query asQuery(TranslatorHandler handler) {
145+
LucenePushdownPredicates.checkIsPushableAttribute(str);
146+
var fieldName = handler.nameOf(str instanceof FieldAttribute fa ? fa.exactAttribute() : str);
147+
148+
// TODO: Get the real FoldContext here
149+
var wildcardQuery = QueryParser.escape(BytesRefs.toString(prefix.fold(FoldContext.small()))) + "*";
150+
151+
return new WildcardQuery(source(), fieldName, wildcardQuery);
152+
}
153+
154+
@Override
155+
public Expression singleValueField() {
156+
return str;
157+
}
158+
129159
Expression str() {
130160
return str;
131161
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/EndsWithTests.java

+41
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,21 @@
1212

1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xpack.esql.core.expression.Expression;
15+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
16+
import org.elasticsearch.xpack.esql.core.expression.Literal;
17+
import org.elasticsearch.xpack.esql.core.querydsl.query.WildcardQuery;
1518
import org.elasticsearch.xpack.esql.core.tree.Source;
1619
import org.elasticsearch.xpack.esql.core.type.DataType;
20+
import org.elasticsearch.xpack.esql.core.type.EsField;
1721
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
1822
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
23+
import org.elasticsearch.xpack.esql.optimizer.rules.physical.local.LucenePushdownPredicates;
24+
import org.elasticsearch.xpack.esql.planner.TranslatorHandler;
1925
import org.hamcrest.Matcher;
2026

2127
import java.util.LinkedList;
2228
import java.util.List;
29+
import java.util.Map;
2330
import java.util.function.Supplier;
2431

2532
import static org.hamcrest.Matchers.equalTo;
@@ -98,4 +105,38 @@ private static TestCaseSupplier.TestCase testCase(
98105
protected Expression build(Source source, List<Expression> args) {
99106
return new EndsWith(source, args.get(0), args.get(1));
100107
}
108+
109+
public void testLuceneQuery_AllLiterals_NonTranslatable() {
110+
var function = new EndsWith(
111+
Source.EMPTY,
112+
new Literal(Source.EMPTY, "test", DataType.KEYWORD),
113+
new Literal(Source.EMPTY, "test", DataType.KEYWORD)
114+
);
115+
116+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
117+
}
118+
119+
public void testLuceneQuery_NonFoldableSuffix_NonTranslatable() {
120+
var function = new EndsWith(
121+
Source.EMPTY,
122+
new FieldAttribute(Source.EMPTY, "field", new EsField("field", DataType.KEYWORD, Map.of(), true)),
123+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true))
124+
);
125+
126+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(false));
127+
}
128+
129+
public void testLuceneQuery_NonFoldableSuffix_Translatable() {
130+
var function = new EndsWith(
131+
Source.EMPTY,
132+
new FieldAttribute(Source.EMPTY, "field", new EsField("suffix", DataType.KEYWORD, Map.of(), true)),
133+
new Literal(Source.EMPTY, "a*b?c\\", DataType.KEYWORD)
134+
);
135+
136+
assertThat(function.translatable(LucenePushdownPredicates.DEFAULT), equalTo(true));
137+
138+
var query = function.asQuery(TranslatorHandler.TRANSLATOR_HANDLER);
139+
140+
assertThat(query, equalTo(new WildcardQuery(Source.EMPTY, "field", "*a\\*b\\?c\\\\")));
141+
}
101142
}

0 commit comments

Comments
 (0)