Skip to content

ESQL: TO_IP can handle leading zeros #126532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 11, 2025
Merged
6 changes: 6 additions & 0 deletions docs/changelog/126532.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 126532
summary: TO_IP can handle leading zeros
area: ES|QL
type: bug
issues:
- 125460

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ public class CsvTestsDataLoader {
private static final TestDataset ADDRESSES = new TestDataset("addresses");
private static final TestDataset BOOKS = new TestDataset("books").withSetting("books-settings.json");
private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true);
private static final TestDataset LOGS = new TestDataset("logs");

public static final Map<String, TestDataset> CSV_DATASET_MAP = Map.ofEntries(
Map.entry(EMPLOYEES.indexName, EMPLOYEES),
Expand Down Expand Up @@ -182,7 +183,8 @@ public class CsvTestsDataLoader {
Map.entry(DISTANCES.indexName, DISTANCES),
Map.entry(ADDRESSES.indexName, ADDRESSES),
Map.entry(BOOKS.indexName, BOOKS),
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT)
Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT),
Map.entry(LOGS.indexName, LOGS)
);

private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@timestamp:date ,system:keyword,message:keyword
2023-10-23T13:55:01.543Z, ping,Pinging 192.168.86.046
2023-10-23T13:55:01.544Z, cron,Running cats
2023-10-23T13:55:01.545Z, java,Doing java stuff for 192.168.86.038
2023-10-23T13:55:01.546Z, java,More java stuff
80 changes: 80 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,86 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
// end::to_ip-result[]
;

convertFromStringLeadingZerosDecimal
required_capability: to_ip_leading_zeros
// tag::to_ip_leading_zeros_decimal[]
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"})
// end::to_ip_leading_zeros_decimal[]
;

// tag::to_ip_leading_zeros_decimal-result[]
s:keyword | ip:ip
1.1.010.1 | 1.1.10.1
// end::to_ip_leading_zeros_decimal-result[]
;

convertFromStringLeadingZerosOctal
required_capability: to_ip_leading_zeros
// tag::to_ip_leading_zeros_octal[]
ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"})
// end::to_ip_leading_zeros_octal[]
;

// tag::to_ip_leading_zeros_octal-result[]
s:keyword | ip:ip
1.1.010.1 | 1.1.8.1
// end::to_ip_leading_zeros_octal-result[]
;

convertFromStringFancy
required_capability: to_ip_leading_zeros
FROM logs
| KEEP @timestamp, system, message
| EVAL client = CASE(
system == "ping",
TO_IP(REPLACE(message, "Pinging ", ""), {"leading_zeros": "octal"}),
system == "java" AND STARTS_WITH(message, "Doing java stuff for "),
TO_IP(REPLACE(message, "Doing java stuff for ", ""), {"leading_zeros": "decimal"}))
| SORT @timestamp
| LIMIT 4
;

@timestamp:date |system:keyword|message:keyword |client:ip
2023-10-23T13:55:01.543Z| ping|Pinging 192.168.86.046 |192.168.86.38
2023-10-23T13:55:01.544Z| cron|Running cats |null
2023-10-23T13:55:01.545Z| java|Doing java stuff for 192.168.86.038|192.168.86.38
2023-10-23T13:55:01.546Z| java|More java stuff |null
;

toIpInAgg
ROW s = "1.1.1.1" | STATS COUNT(*) BY ip = TO_IP(s)
;

COUNT(*):long | ip:ip
1 | 1.1.1.1
;

toIpInSort
ROW s = "1.1.1.1" | SORT TO_IP(s)
;

s:keyword
1.1.1.1
;

toIpInAggOctal
required_capability: to_ip_leading_zeros
ROW s = "1.1.010.1" | STATS COUNT(*) BY ip = TO_IP(s, {"leading_zeros":"octal"})
;

COUNT(*):long | ip:ip
1 | 1.1.8.1
;

toIpInSortOctal
required_capability: to_ip_leading_zeros
ROW s = "1.1.010.1" | SORT TO_IP(s, {"leading_zeros":"octal"})
;

s:keyword
1.1.010.1
;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be great to have some tests where the ip address with leading 0s appears in predicates with real indices, if they are valid use cases. For example,

+ curl -u elastic:password -H 'Content-Type: application/json' '127.0.0.1:9200/_query?format=txt' -d '{
    "query": "from sample_data | where client.ip == to_ip(\"172.021.00.05\", {\"leading_zeros\":\"decimal\"})"
}   
'
       @timestamp       |   client.ip   |event.duration |    message    |       test_date        
------------------------+---------------+---------------+---------------+------------------------
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected   |2025-11-23T00:00:00.000Z

+ curl -u elastic:password -H 'Content-Type: application/json' '127.0.0.1:9200/_query?format=txt' -d '{
    "query": "from sample_data | where client.ip in ( to_ip(\"172.021.00.05\", {\"leading_zeros\":\"decimal\"}), to_ip(\"172.21.3.15\"))"
}
'
       @timestamp       |   client.ip   |event.duration |       message       |       test_date        
------------------------+---------------+---------------+---------------------+------------------------
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected         |2025-11-23T00:00:00.000Z
2023-10-23T13:51:54.732Z|172.21.3.15    |725448         |Connection error     |2025-11-24T00:00:00.000Z
2023-10-23T13:52:55.015Z|172.21.3.15    |8268153        |Connection error     |2025-11-25T00:00:00.000Z
2023-10-23T13:53:55.832Z|172.21.3.15    |5033755        |Connection error     |2025-11-26T00:00:00.000Z
2023-10-23T13:55:01.543Z|172.21.3.15    |1756467        |Connected to 10.1.0.1|2025-11-27T00:00:00.000Z

Do we expect that the ip addresses with leading 0s can appear in index fields? I gave it a try and got this error, it seems like they cannot be loaded into indices as valid ips.

    {
      "index" : {
        "_index" : "sample_data",
        "_id" : "KBZsG5YBxZF4dlPHvvyx",
        "status" : 400,
        "error" : {
          "type" : "document_parsing_exception",
          "reason" : "[1:57] failed to parse field [client.ip] of type [ip] in document with id 'KBZsG5YBxZF4dlPHvvyx'. Preview of field's value: '172.21.04.15'",
          "caused_by" : {
            "type" : "illegal_argument_exception",
            "reason" : "'172.21.04.15' is not an IP string literal."
          }
        }
      }
    }

However it can be loaded into keyword fields, I changed the schema to have client.ip as keyword.

       @timestamp       |   client.ip   |event.duration |       message       |       test_date        
------------------------+---------------+---------------+---------------------+------------------------
2023-10-23T12:15:03.360Z|172.21.2.162   |3450233        |Connected to 10.1.0.3|2025-11-21T00:00:00.000Z
2023-10-23T12:27:28.948Z|172.21.2.113   |2764889        |Connected to 10.1.0.2|2025-11-22T00:00:00.000Z
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected         |2025-11-23T00:00:00.000Z
2023-10-23T13:51:54.732Z|172.21.3.15    |725448         |Connection error     |2025-11-24T00:00:00.000Z
2023-10-23T13:52:55.015Z|172.21.3.15    |8268153        |Connection error     |2025-11-25T00:00:00.000Z
2023-10-23T13:53:55.832Z|172.21.3.15    |5033755        |Connection error     |2025-11-26T00:00:00.000Z
2023-10-23T13:55:01.543Z|172.21.3.15    |1756467        |Connected to 10.1.0.1|2025-11-27T00:00:00.000Z
2023-10-23T13:55:01.543Z|172.21.04.15   |1756467        |Connected to 10.1.0.1|2025-11-27T00:00:00.000Z

I tried some queries, the to_ip with leading 0s options, and it works on index fields too! I don't know if this is a valid use case, it seems like the original Github issue only mention leading 0s in string literals.

+ curl -u elastic:password -H 'Content-Type: application/json' '127.0.0.1:9200/_query?format=txt' -d '{
    "query": "from sample_data | where to_ip(client.ip) == \"172.21.0.5\""
}
'
       @timestamp       |   client.ip   |event.duration |    message    |       test_date        
------------------------+---------------+---------------+---------------+------------------------
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected   |2025-11-23T00:00:00.000Z
+ curl -u elastic:password -H 'Content-Type: application/json' '127.0.0.1:9200/_query?format=txt' -d '{
    "query": "from sample_data | where to_ip(client.ip) == to_ip(\"172.021.00.05\", {\"leading_zeros\":\"decimal\"})"
}
'
       @timestamp       |   client.ip   |event.duration |    message    |       test_date        
------------------------+---------------+---------------+---------------+------------------------
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected   |2025-11-23T00:00:00.000Z
+ curl -u elastic:password -H 'Content-Type: application/json' '127.0.0.1:9200/_query?format=txt' -d '{
    "query": "from sample_data | where to_ip(client.ip, {\"leading_zeros\":\"decimal\"}) in ( to_ip(\"172.021.00.05\", {\"leading_zeros\":\"decimal\"}), \"172.21.4.15\" )"
}
'
       @timestamp       |   client.ip   |event.duration |       message       |       test_date        
------------------------+---------------+---------------+---------------------+------------------------
2023-10-23T13:33:34.937Z|172.21.0.5     |1232382        |Disconnected         |2025-11-23T00:00:00.000Z
2023-10-23T13:55:01.543Z|172.21.04.15   |1756467        |Connected to 10.1.0.1|2025-11-27T00:00:00.000Z

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we expect that the ip addresses with leading 0s can appear in index fields? I gave it a try and got this error, it seems like they cannot be loaded into indices as valid ips.

You can't index such a field, no. But with this ESQL can parse them!

However it can be loaded into keyword fields

Just like that, yeah.

I tried some queries, the to_ip with leading 0s options, and it works on index fields too! I don't know if this is a valid use case, it seems like the original Github issue only mention leading 0s in string literals.

I think it's valid. Let me add a test case for it too.

cdirMatchOrsIPs
required_capability: combine_disjunctive_cidrmatches

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"properties" : {
"@timestamp" : {
"type" : "date"
},
"system" : {
"type" : "keyword"
},
"message" : {
"type" : "keyword"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,11 @@ public enum Cap {
*/
FORK_V2(Build.current().isSnapshot()),

/**
* Support for the {@code leading_zeros} named parameter.
*/
TO_IP_LEADING_ZEROS,

/**
* Does the usage information for ESQL contain a histogram of {@code took} values?
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest;
import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
Expand All @@ -70,6 +71,7 @@
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.index.IndexResolution;
import org.elasticsearch.xpack.esql.inference.ResolvedInference;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.SubstituteSurrogateExpressions;
import org.elasticsearch.xpack.esql.parser.ParsingException;
import org.elasticsearch.xpack.esql.plan.IndexPattern;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
Expand Down Expand Up @@ -1578,10 +1580,12 @@ private LogicalPlan doRule(LogicalPlan plan) {
int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size();
// See if the eval function has an unresolved MultiTypeEsField field
// Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
plan = plan.transformExpressionsOnly(
AbstractConvertFunction.class,
convert -> resolveConvertFunction(convert, unionFieldAttributes)
);
plan = plan.transformExpressionsOnly(e -> {
if (e instanceof ConvertFunction convert) {
return resolveConvertFunction(convert, unionFieldAttributes);
}
return e;
});
// If no union fields were generated, return the plan as is
if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) {
return plan;
Expand Down Expand Up @@ -1612,7 +1616,8 @@ private LogicalPlan doRule(LogicalPlan plan) {
return plan;
}

private Expression resolveConvertFunction(AbstractConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
private Expression resolveConvertFunction(ConvertFunction convert, List<FieldAttribute> unionFieldAttributes) {
Expression convertExpression = (Expression) convert;
if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
HashMap<TypeResolutionKey, Expression> typeResolutions = new HashMap<>();
Set<DataType> supportedTypes = convert.supportedTypes();
Expand All @@ -1639,9 +1644,11 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List<
return createIfDoesNotAlreadyExist(fa, resolvedField, unionFieldAttributes);
}
} else if (convert.field() instanceof AbstractConvertFunction subConvert) {
return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
return convertExpression.replaceChildren(
Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))
);
}
return convert;
return convertExpression;
}

private Expression createIfDoesNotAlreadyExist(
Expand Down Expand Up @@ -1677,7 +1684,7 @@ private MultiTypeEsField resolvedMultiTypeEsField(FieldAttribute fa, HashMap<Typ
return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions);
}

private Expression typeSpecificConvert(AbstractConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
private Expression typeSpecificConvert(ConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
FieldAttribute originalFieldAttr = (FieldAttribute) convert.field();
FieldAttribute resolvedAttr = new FieldAttribute(
Expand All @@ -1689,7 +1696,13 @@ private Expression typeSpecificConvert(AbstractConvertFunction convert, Source s
originalFieldAttr.id(),
true
);
return convert.replaceChildren(Collections.singletonList(resolvedAttr));
Expression e = ((Expression) convert).replaceChildren(Collections.singletonList(resolvedAttr));
/*
* Resolve surrogates immediately because these type specific conversions are serialized
* and SurrogateExpressions are expected to be resolved on the coordinating node. At least,
* TO_IP is expected to be resolved there.
*/
return SubstituteSurrogateExpressions.rule(e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
Expand Down Expand Up @@ -192,7 +194,9 @@ public static List<NamedWriteableRegistry.Entry> unaryScalars() {
entries.add(ToGeoShape.ENTRY);
entries.add(ToCartesianShape.ENTRY);
entries.add(ToGeoPoint.ENTRY);
entries.add(ToIP.ENTRY);
entries.add(ToIpLeadingZerosDecimal.ENTRY);
entries.add(ToIpLeadingZerosOctal.ENTRY);
entries.add(ToIpLeadingZerosRejected.ENTRY);
entries.add(ToInteger.ENTRY);
entries.add(ToLong.ENTRY);
entries.add(ToRadians.ENTRY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.util.Check;
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Avg;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AvgOverTime;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Count;
Expand Down Expand Up @@ -56,8 +57,11 @@
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIp;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosDecimal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosOctal;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIpLeadingZerosRejected;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToRadians;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
Expand Down Expand Up @@ -230,6 +234,7 @@ public class EsqlFunctionRegistry {
public EsqlFunctionRegistry() {
register(functions());
buildDataTypesForStringLiteralConversion(functions());
nameSurrogates();
}

EsqlFunctionRegistry(FunctionDefinition... functions) {
Expand Down Expand Up @@ -391,7 +396,7 @@ private static FunctionDefinition[][] functions() {
def(ToDouble.class, ToDouble::new, "to_double", "to_dbl"),
def(ToGeoPoint.class, ToGeoPoint::new, "to_geopoint"),
def(ToGeoShape.class, ToGeoShape::new, "to_geoshape"),
def(ToIP.class, ToIP::new, "to_ip"),
def(ToIp.class, ToIp::new, "to_ip"),
def(ToInteger.class, ToInteger::new, "to_integer", "to_int"),
def(ToLong.class, ToLong::new, "to_long"),
def(ToRadians.class, ToRadians::new, "to_radians"),
Expand Down Expand Up @@ -795,6 +800,15 @@ protected void buildDataTypesForStringLiteralConversion(FunctionDefinition[]...
}
}

/**
* Add {@link #names} entries for functions that are not registered, but we rewrite to using {@link SurrogateExpression}.
*/
private void nameSurrogates() {
names.put(ToIpLeadingZerosRejected.class, "TO_IP");
names.put(ToIpLeadingZerosDecimal.class, "TO_IP");
names.put(ToIpLeadingZerosOctal.class, "TO_IP");
}

protected interface FunctionBuilder {
Function build(Source source, List<Expression> children, Configuration cfg);
}
Expand Down
Loading
Loading