Skip to content

Commit a9f31bd

Browse files
authored
Support synthetic source for date fields when ignore_malformed is used (#109410)
1 parent 7490f5f commit a9f31bd

File tree

6 files changed

+137
-122
lines changed

6 files changed

+137
-122
lines changed

docs/changelog/109410.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 109410
2+
summary: Support synthetic source for date fields when `ignore_malformed` is used
3+
area: Mapping
4+
type: enhancement
5+
issues: []

docs/reference/mapping/types/date.asciidoc

+1-2
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,7 @@ of official GA features.
242242

243243
`date` fields support <<synthetic-source,synthetic `_source`>> in their
244244
default configuration. Synthetic `_source` cannot be used together with
245-
<<copy-to,`copy_to`>>, <<ignore-malformed,`ignore_malformed`>> set to true
246-
or with <<doc-values,`doc_values`>> disabled.
245+
<<copy-to,`copy_to`>> or with <<doc-values,`doc_values`>> disabled.
247246

248247
Synthetic source always sorts `date` fields. For example:
249248
[source,console,id=synthetic-source-date-example]

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

+17-6
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,16 @@ public DateFieldMapper build(MapperBuilderContext context) {
364364
&& ignoreMalformed.isConfigured() == false) {
365365
ignoreMalformed.setValue(false);
366366
}
367-
return new DateFieldMapper(name(), ft, multiFieldsBuilder.build(this, context), copyTo, nullTimestamp, resolution, this);
367+
return new DateFieldMapper(
368+
name(),
369+
ft,
370+
multiFieldsBuilder.build(this, context),
371+
copyTo,
372+
nullTimestamp,
373+
resolution,
374+
context.isSourceSynthetic(),
375+
this
376+
);
368377
}
369378
}
370379

@@ -850,6 +859,7 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) {
850859
private final Long nullValue;
851860
private final String nullValueAsString;
852861
private final Resolution resolution;
862+
private final boolean isSourceSynthetic;
853863

854864
private final boolean ignoreMalformedByDefault;
855865
private final IndexVersion indexCreatedVersion;
@@ -865,6 +875,7 @@ private DateFieldMapper(
865875
CopyTo copyTo,
866876
Long nullValue,
867877
Resolution resolution,
878+
boolean isSourceSynthetic,
868879
Builder builder
869880
) {
870881
super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.get());
@@ -877,6 +888,7 @@ private DateFieldMapper(
877888
this.nullValueAsString = builder.nullValue.getValue();
878889
this.nullValue = nullValue;
879890
this.resolution = resolution;
891+
this.isSourceSynthetic = isSourceSynthetic;
880892
this.ignoreMalformedByDefault = builder.ignoreMalformed.getDefaultValue();
881893
this.indexCreatedVersion = builder.indexCreatedVersion;
882894
this.script = builder.script.get();
@@ -915,6 +927,10 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
915927
} catch (IllegalArgumentException | ElasticsearchParseException | DateTimeException | ArithmeticException e) {
916928
if (ignoreMalformed) {
917929
context.addIgnoredField(mappedFieldType.name());
930+
if (isSourceSynthetic) {
931+
// Save a copy of the field so synthetic source can load it
932+
context.doc().add(IgnoreMalformedStoredValues.storedField(name(), context.parser()));
933+
}
918934
return;
919935
} else {
920936
throw e;
@@ -976,11 +992,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() {
976992
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it doesn't have doc values"
977993
);
978994
}
979-
if (ignoreMalformed) {
980-
throw new IllegalArgumentException(
981-
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it ignores malformed dates"
982-
);
983-
}
984995
if (copyTo.copyToFields().isEmpty() != true) {
985996
throw new IllegalArgumentException(
986997
"field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"

server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java

+50-26
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import org.elasticsearch.common.time.DateUtils;
1616
import org.elasticsearch.core.CheckedConsumer;
1717
import org.elasticsearch.core.Strings;
18-
import org.elasticsearch.core.Tuple;
1918
import org.elasticsearch.index.IndexVersion;
2019
import org.elasticsearch.index.IndexVersions;
2120
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
@@ -35,6 +34,7 @@
3534
import java.util.Comparator;
3635
import java.util.List;
3736
import java.util.function.Function;
37+
import java.util.stream.Stream;
3838

3939
import static org.elasticsearch.index.mapper.DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER;
4040
import static org.hamcrest.Matchers.containsString;
@@ -152,7 +152,13 @@ protected List<ExampleMalformedValue> exampleMalformedValues() {
152152
return List.of(
153153
exampleMalformedValue("2016-03-99").mapping(mappingWithFormat("strict_date_optional_time||epoch_millis"))
154154
.errorMatches("failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]"),
155-
exampleMalformedValue("-522000000").mapping(mappingWithFormat("date_optional_time")).errorMatches("long overflow")
155+
exampleMalformedValue("-522000000").mapping(mappingWithFormat("date_optional_time")).errorMatches("long overflow"),
156+
exampleMalformedValue("2020").mapping(mappingWithFormat("strict_date"))
157+
.errorMatches("failed to parse date field [2020] with format [strict_date]"),
158+
exampleMalformedValue("hello world").mapping(mappingWithFormat("strict_date_optional_time"))
159+
.errorMatches("failed to parse date field [hello world]"),
160+
exampleMalformedValue("true").mapping(mappingWithFormat("strict_date_optional_time"))
161+
.errorMatches("failed to parse date field [true]")
156162
);
157163
}
158164

@@ -561,7 +567,6 @@ public void testScriptAndPrecludedParameters() {
561567

562568
@Override
563569
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
564-
assumeFalse("synthetic _source for date and date_millis doesn't support ignore_malformed", ignoreMalformed);
565570
return new SyntheticSourceSupport() {
566571
private final DateFieldMapper.Resolution resolution = randomFrom(DateFieldMapper.Resolution.values());
567572
private final Object nullValue = usually()
@@ -577,36 +582,62 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed)
577582
@Override
578583
public SyntheticSourceExample example(int maxValues) {
579584
if (randomBoolean()) {
580-
Tuple<Object, String> v = generateValue();
585+
Value v = generateValue();
586+
if (v.malformedOutput != null) {
587+
return new SyntheticSourceExample(v.input, v.malformedOutput, null, this::mapping);
588+
}
589+
581590
return new SyntheticSourceExample(
582-
v.v1(),
583-
v.v2(),
584-
resolution.convert(Instant.from(formatter.parse(v.v2()))),
591+
v.input,
592+
v.output,
593+
resolution.convert(Instant.from(formatter.parse(v.output))),
585594
this::mapping
586595
);
587596
}
588-
List<Tuple<Object, String>> values = randomList(1, maxValues, this::generateValue);
589-
List<Object> in = values.stream().map(Tuple::v1).toList();
590-
List<String> outList = values.stream()
597+
598+
List<Value> values = randomList(1, maxValues, this::generateValue);
599+
List<Object> in = values.stream().map(Value::input).toList();
600+
601+
List<String> outputFromDocValues = values.stream()
602+
.filter(v -> v.malformedOutput == null)
591603
.sorted(
592-
Comparator.comparing(v -> Instant.from(formatter.parse(v.v1() == null ? nullValue.toString() : v.v1().toString())))
604+
Comparator.comparing(
605+
v -> Instant.from(formatter.parse(v.input == null ? nullValue.toString() : v.input.toString()))
606+
)
593607
)
594-
.map(Tuple::v2)
608+
.map(Value::output)
595609
.toList();
610+
611+
Stream<Object> malformedOutput = values.stream().filter(v -> v.malformedOutput != null).map(Value::malformedOutput);
612+
613+
// Malformed values are always last in the implementation.
614+
List<Object> outList = Stream.concat(outputFromDocValues.stream(), malformedOutput).toList();
596615
Object out = outList.size() == 1 ? outList.get(0) : outList;
597616

598-
List<Long> outBlockList = outList.stream().map(v -> resolution.convert(Instant.from(formatter.parse(v)))).toList();
617+
List<Long> outBlockList = outputFromDocValues.stream()
618+
.map(v -> resolution.convert(Instant.from(formatter.parse(v))))
619+
.toList();
599620
Object outBlock = outBlockList.size() == 1 ? outBlockList.get(0) : outBlockList;
600621
return new SyntheticSourceExample(in, out, outBlock, this::mapping);
601622
}
602623

603-
private Tuple<Object, String> generateValue() {
624+
private record Value(Object input, String output, Object malformedOutput) {}
625+
626+
private Value generateValue() {
604627
if (nullValue != null && randomBoolean()) {
605-
return Tuple.tuple(null, outValue(nullValue));
628+
return new Value(null, outValue(nullValue), null);
629+
}
630+
// Different malformed values are tested in #exampleMalformedValues().
631+
// Here we only verify behavior of arrays that contain malformed
632+
// values since there are modifications specific to synthetic source.
633+
if (ignoreMalformed && randomBoolean()) {
634+
var malformedInput = randomAlphaOfLengthBetween(1, 10);
635+
return new Value(malformedInput, null, malformedInput);
606636
}
637+
607638
Object in = randomValue();
608639
String out = outValue(in);
609-
return Tuple.tuple(in, out);
640+
return new Value(in, out, null);
610641
}
611642

612643
private Object randomValue() {
@@ -637,6 +668,9 @@ private void mapping(XContentBuilder b) throws IOException {
637668
if (nullValue != null) {
638669
b.field("null_value", nullValue);
639670
}
671+
if (ignoreMalformed) {
672+
b.field("ignore_malformed", true);
673+
}
640674
}
641675

642676
@Override
@@ -653,16 +687,6 @@ public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
653687
b -> b.field("type", fieldType).field("doc_values", false)
654688
)
655689
);
656-
examples.add(
657-
new SyntheticSourceInvalidExample(
658-
equalTo(
659-
"field [field] of type ["
660-
+ fieldType
661-
+ "] doesn't support synthetic source because it ignores malformed dates"
662-
),
663-
b -> b.field("type", fieldType).field("ignore_malformed", true)
664-
)
665-
);
666690
}
667691
return examples;
668692
}

test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -1129,8 +1129,16 @@ public final void testSyntheticSource() throws IOException {
11291129

11301130
public void testSyntheticSourceIgnoreMalformedExamples() throws IOException {
11311131
assumeTrue("type doesn't support ignore_malformed", supportsIgnoreMalformed());
1132-
CheckedConsumer<XContentBuilder, IOException> mapping = syntheticSourceSupport(true).example(1).mapping();
1132+
// We need to call this in order to hit the assumption inside so that
1133+
// it tells us when field supports ignore_malformed but doesn't support it together with synthetic source.
1134+
// E.g. `assumeFalse(ignoreMalformed)`
1135+
syntheticSourceSupport(true);
1136+
11331137
for (ExampleMalformedValue v : exampleMalformedValues()) {
1138+
CheckedConsumer<XContentBuilder, IOException> mapping = b -> {
1139+
v.mapping.accept(b);
1140+
b.field("ignore_malformed", true);
1141+
};
11341142
assertSyntheticSource(new SyntheticSourceExample(v.value, v.value, v.value, mapping));
11351143
}
11361144
}

0 commit comments

Comments
 (0)