Skip to content

Commit 55a6624

Browse files
authored
ESQL: TO_IP can handle leading zeros (#126532)
Modifies TO_IP so it can handle leading `0`s in ipv4s. Here's how it works now: ``` ROW ip = TO_IP("192.168.0.1") // OK! ROW ip = TO_IP("192.168.010.1") // Fails ``` This adds ``` ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "octal"}) ROW ip = TO_IP("192.168.010.1", {"leading_zeros": "decimal"}) ``` We do this because there isn't a consensus on how to parse leading zeros in ipv4s. The standard unix tools like `ping` and `ftp` interpret leading zeros as octal. Java's built in ip parsing interprets them as decimal. Because folks are using this for security rules we need to support all the choices. Closes #125460
1 parent fa09255 commit 55a6624

39 files changed

+977
-206
lines changed

docs/changelog/126532.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 126532
2+
summary: TO_IP can handle leading zeros
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 125460

docs/reference/query-languages/esql/_snippets/functions/examples/to_ip.md

+23-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/to_ip.md

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/layout/to_ip.md

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/parameters/to_ip.md

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/_snippets/functions/types/to_ip.md

+5-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/reference/query-languages/esql/images/functions/to_ip.svg

+1-1
Loading

docs/reference/query-languages/esql/kibana/definition/functions/to_ip.json

+3-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ public class CsvTestsDataLoader {
133133
private static final TestDataset ADDRESSES = new TestDataset("addresses");
134134
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
135135
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
136+
private static final TestDataset LOGS = new TestDataset("logs");
136137

137138
public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
138139
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -182,7 +183,8 @@ public class CsvTestsDataLoader {
182183
Map.entry(DISTANCES.indexName, DISTANCES),
183184
Map.entry(ADDRESSES.indexName, ADDRESSES),
184185
Map.entry(BOOKS.indexName, BOOKS),
185-
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT)
186+
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
187+
Map.entry(LOGS.indexName, LOGS)
186188
);
187189

188190
private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@timestamp:date ,system:keyword,message:keyword
2+
2023-10-23T13:55:01.543Z, ping,Pinging 192.168.86.046
3+
2023-10-23T13:55:01.544Z, cron,Running cats
4+
2023-10-23T13:55:01.545Z, java,Doing java stuff for 192.168.86.038
5+
2023-10-23T13:55:01.546Z, java,More java stuff

x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec

+80
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,86 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
271271
// end::to_ip-result[]
272272
;
273273

274+
convertFromStringLeadingZerosDecimal
275+
required_capability: to_ip_leading_zeros
276+
// tag::to_ip_leading_zeros_decimal[]
277+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"})
278+
// end::to_ip_leading_zeros_decimal[]
279+
;
280+
281+
// tag::to_ip_leading_zeros_decimal-result[]
282+
s:keyword | ip:ip
283+
1.1.010.1 | 1.1.10.1
284+
// end::to_ip_leading_zeros_decimal-result[]
285+
;
286+
287+
convertFromStringLeadingZerosOctal
288+
required_capability: to_ip_leading_zeros
289+
// tag::to_ip_leading_zeros_octal[]
290+
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"})
291+
// end::to_ip_leading_zeros_octal[]
292+
;
293+
294+
// tag::to_ip_leading_zeros_octal-result[]
295+
s:keyword | ip:ip
296+
1.1.010.1 | 1.1.8.1
297+
// end::to_ip_leading_zeros_octal-result[]
298+
;
299+
300+
convertFromStringFancy
301+
required_capability: to_ip_leading_zeros
302+
FROM logs
303+
| KEEP @timestamp, system, message
304+
| EVAL client = CASE(
305+
system == "ping",
306+
TO_IP(REPLACE(message, "Pinging ", ""), {"leading_zeros": "octal"}),
307+
system == "java" AND STARTS_WITH(message, "Doing java stuff for "),
308+
TO_IP(REPLACE(message, "Doing java stuff for ", ""), {"leading_zeros": "decimal"}))
309+
| SORT @timestamp
310+
| LIMIT 4
311+
;
312+
313+
@timestamp:date |system:keyword|message:keyword |client:ip
314+
2023-10-23T13:55:01.543Z| ping|Pinging 192.168.86.046 |192.168.86.38
315+
2023-10-23T13:55:01.544Z| cron|Running cats |null
316+
2023-10-23T13:55:01.545Z| java|Doing java stuff for 192.168.86.038|192.168.86.38
317+
2023-10-23T13:55:01.546Z| java|More java stuff |null
318+
;
319+
320+
toIpInAgg
321+
ROW s = "1.1.1.1" | STATS COUNT(*) BY ip = TO_IP(s)
322+
;
323+
324+
COUNT(*):long | ip:ip
325+
1 | 1.1.1.1
326+
;
327+
328+
toIpInSort
329+
ROW s = "1.1.1.1" | SORT TO_IP(s)
330+
;
331+
332+
s:keyword
333+
1.1.1.1
334+
;
335+
336+
toIpInAggOctal
337+
required_capability: to_ip_leading_zeros
338+
ROW s = "1.1.010.1" | STATS COUNT(*) BY ip = TO_IP(s, {"leading_zeros":"octal"})
339+
;
340+
341+
COUNT(*):long | ip:ip
342+
1 | 1.1.8.1
343+
;
344+
345+
toIpInSortOctal
346+
required_capability: to_ip_leading_zeros
347+
ROW s = "1.1.010.1" | SORT TO_IP(s, {"leading_zeros":"octal"})
348+
;
349+
350+
s:keyword
351+
1.1.010.1
352+
;
353+
274354
cdirMatchOrsIPs
275355
required_capability: combine_disjunctive_cidrmatches
276356

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"properties" : {
3+
"@timestamp" : {
4+
"type" : "date"
5+
},
6+
"system" : {
7+
"type" : "keyword"
8+
},
9+
"message" : {
10+
"type" : "keyword"
11+
}
12+
}
13+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

+5
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,11 @@ public enum Cap {
982982
*/
983983
FORK_V2(Build.current().isSnapshot()),
984984

985+
/**
986+
* Support for the {@code leading_zeros} named parameter.
987+
*/
988+
TO_IP_LEADING_ZEROS,
989+
985990
/**
986991
* Does the usage information for ESQL contain a histogram of {@code took} values?
987992
*/

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java

+22-9
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
5959
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
6060
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
61+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction;
6162
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction;
6263
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
6364
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
@@ -70,6 +71,7 @@
7071
import org.elasticsearch.xpack.esql.index.EsIndex;
7172
import org.elasticsearch.xpack.esql.index.IndexResolution;
7273
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
74+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
7375
import org.elasticsearch.xpack.esql.parser.ParsingException;
7476
import org.elasticsearch.xpack.esql.plan.IndexPattern;
7577
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
@@ -1578,10 +1580,12 @@ private LogicalPlan doRule(LogicalPlan plan) {
15781580
int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size();
15791581
// See if the eval function has an unresolved MultiTypeEsField field
15801582
// Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
1581-
plan = plan.transformExpressionsOnly(
1582-
AbstractConvertFunction.class,
1583-
convert -> resolveConvertFunction(convert, unionFieldAttributes)
1584-
);
1583+
plan = plan.transformExpressionsOnly(e -> {
1584+
if (e instanceof ConvertFunction convert) {
1585+
return resolveConvertFunction(convert, unionFieldAttributes);
1586+
}
1587+
return e;
1588+
});
15851589
// If no union fields were generated, return the plan as is
15861590
if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) {
15871591
return plan;
@@ -1612,7 +1616,8 @@ private LogicalPlan doRule(LogicalPlan plan) {
16121616
return plan;
16131617
}
16141618

1615-
private Expression resolveConvertFunction(AbstractConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1619+
private Expression resolveConvertFunction(ConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
1620+
Expression convertExpression = (Expression) convert;
16161621
if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
16171622
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
16181623
Set<DataType> supportedTypes = convert.supportedTypes();
@@ -1639,9 +1644,11 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
16391644
return createIfDoesNotAlreadyExist(fa, resolvedField, unionFieldAttributes);
16401645
}
16411646
} else if (convert.field() instanceof AbstractConvertFunction subConvert) {
1642-
return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
1647+
return convertExpression.replaceChildren(
1648+
Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))
1649+
);
16431650
}
1644-
return convert;
1651+
return convertExpression;
16451652
}
16461653

16471654
private Expression createIfDoesNotAlreadyExist(
@@ -1677,7 +1684,7 @@ private MultiTypeEsField resolvedMultiTypeEsField(FieldAttribute fa, HashMap<Typ
16771684
return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions);
16781685
}
16791686

1680-
private Expression typeSpecificConvert(AbstractConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
1687+
private Expression typeSpecificConvert(ConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
16811688
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
16821689
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
16831690
FieldAttribute resolvedAttr = new FieldAttribute(
@@ -1689,7 +1696,13 @@ private Expression typeSpecificConvert(AbstractConvertFunction convert, Source s
16891696
originalFieldAttr.id(),
16901697
true
16911698
);
1692-
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
1699+
Expression e = ((Expression) convert).replaceChildren(Collections.singletonList(resolvedAttr));
1700+
/*
1701+
* Resolve surrogates immediately because these type specific conversions are serialized
1702+
* and SurrogateExpressions are expected to be resolved on the coordinating node. At least,
1703+
* TO_IP is expected to be resolved there.
1704+
*/
1705+
return SubstituteSurrogateExpressions.rule(e);
16931706
}
16941707
}
16951708

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
2626
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
2727
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
28-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
2928
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
29+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
30+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
31+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
3032
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
3133
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
3234
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -192,7 +194,9 @@ public static List<NamedWriteableRegistry.Entry> unaryScalars() {
192194
entries.add(ToGeoShape.ENTRY);
193195
entries.add(ToCartesianShape.ENTRY);
194196
entries.add(ToGeoPoint.ENTRY);
195-
entries.add(ToIP.ENTRY);
197+
entries.add(ToIpLeadingZerosDecimal.ENTRY);
198+
entries.add(ToIpLeadingZerosOctal.ENTRY);
199+
entries.add(ToIpLeadingZerosRejected.ENTRY);
196200
entries.add(ToInteger.ENTRY);
197201
entries.add(ToLong.ENTRY);
198202
entries.add(ToRadians.ENTRY);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java

+16-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.elasticsearch.xpack.esql.core.tree.Source;
1717
import org.elasticsearch.xpack.esql.core.type.DataType;
1818
import org.elasticsearch.xpack.esql.core.util.Check;
19+
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
1920
import org.elasticsearch.xpack.esql.expression.function.aggregate.Avg;
2021
import org.elasticsearch.xpack.esql.expression.function.aggregate.AvgOverTime;
2122
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
@@ -56,8 +57,11 @@
5657
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
5758
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
5859
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
59-
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
6060
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
61+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIp;
62+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
63+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
64+
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
6165
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
6266
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
6367
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
@@ -230,6 +234,7 @@ public class EsqlFunctionRegistry {
230234
public EsqlFunctionRegistry() {
231235
register(functions());
232236
buildDataTypesForStringLiteralConversion(functions());
237+
nameSurrogates();
233238
}
234239

235240
EsqlFunctionRegistry(FunctionDefinition... functions) {
@@ -391,7 +396,7 @@ private static FunctionDefinition[][] functions() {
391396
def(ToDouble.class, ToDouble::new, "to_double", "to_dbl"),
392397
def(ToGeoPoint.class, ToGeoPoint::new, "to_geopoint"),
393398
def(ToGeoShape.class, ToGeoShape::new, "to_geoshape"),
394-
def(ToIP.class, ToIP::new, "to_ip"),
399+
def(ToIp.class, ToIp::new, "to_ip"),
395400
def(ToInteger.class, ToInteger::new, "to_integer", "to_int"),
396401
def(ToLong.class, ToLong::new, "to_long"),
397402
def(ToRadians.class, ToRadians::new, "to_radians"),
@@ -795,6 +800,15 @@ protected void buildDataTypesForStringLiteralConversion(FunctionDefinition[]...
795800
}
796801
}
797802

803+
/**
804+
* Add {@link #names} entries for functions that are not registered, but we rewrite to using {@link SurrogateExpression}.
805+
*/
806+
private void nameSurrogates() {
807+
names.put(ToIpLeadingZerosRejected.class, "TO_IP");
808+
names.put(ToIpLeadingZerosDecimal.class, "TO_IP");
809+
names.put(ToIpLeadingZerosOctal.class, "TO_IP");
810+
}
811+
798812
protected interface FunctionBuilder {
799813
Function build(Source source, List<Expression> children, Configuration cfg);
800814
}

0 commit comments

Comments
 (0)