Skip to content

Commit a4f6c0c

Browse files
Enable sort optimization on float and half_float (#126342) (#126542)
Before we enabled sort optimization on long, double and date types, but left other types for follow-up. This enables sort optimization on float and half_float types. Optimizations on INT, BYTE, SHORT are left for follow-up, because they need more work: we currently use SORT.LONG type for all integer types and this doesn't allow to use optimization.
1 parent f802b0f commit a4f6c0c

File tree

10 files changed

+480
-161
lines changed

10 files changed

+480
-161
lines changed

docs/changelog/126342.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126342
2+
summary: Enable sort optimization on float and `half_float`
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/search/sort/FieldSortIT.java

+230-91
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java

+17-15
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
2020
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
2121
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
22+
import org.elasticsearch.index.fielddata.fieldcomparator.HalfFloatValuesComparatorSource;
2223
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
2324
import org.elasticsearch.search.DocValueFormat;
2425
import org.elasticsearch.search.MultiValueMode;
@@ -46,7 +47,7 @@ public enum NumericType {
4647
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
4748
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
4849
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
49-
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
50+
HALF_FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
5051
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
5152
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC);
5253

@@ -95,11 +96,13 @@ public final SortField sortField(
9596
* 3. We Aren't using max or min to resolve the duplicates.
9697
* 4. We have to cast the results to another type.
9798
*/
98-
if (sortRequiresCustomComparator()
99-
|| nested != null
99+
boolean requiresCustomComparator = nested != null
100100
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
101-
|| targetNumericType != getNumericType()) {
102-
return new SortField(getFieldName(), source, reverse);
101+
|| targetNumericType != getNumericType();
102+
if (sortRequiresCustomComparator() || requiresCustomComparator) {
103+
SortField sortField = new SortField(getFieldName(), source, reverse);
104+
sortField.setOptimizeSortWithPoints(requiresCustomComparator == false && isIndexed());
105+
return sortField;
103106
}
104107

105108
SortedNumericSelector.Type selectorType = sortMode == MultiValueMode.MAX
@@ -108,20 +111,18 @@ public final SortField sortField(
108111
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
109112
sortField.setMissingValue(source.missingObject(missingValue, reverse));
110113

111-
// TODO: Now that numeric sort uses indexed points to skip over non-competitive documents,
112-
// Lucene 9 requires that the same data/type is stored in points and doc values.
113-
// We break this assumption in ES by using the wider numeric sort type for every field,
114-
// (e.g. shorts use longs and floats use doubles). So for now we forbid the usage of
115-
// points in numeric sort on field types that use a different sort type.
116-
// We could expose these optimizations for all numeric types but that would require
117-
// to rewrite the logic to handle types when merging results coming from different
118-
// indices.
114+
// TODO: enable sort optimization for BYTE, SHORT and INT types
115+
// They can use custom comparator logic, similarly to HalfFloatValuesComparatorSource.
116+
// The problem comes from the fact that we use SortField.Type.LONG for all these types.
117+
// Investigate how to resolve this.
119118
switch (getNumericType()) {
120119
case DATE_NANOSECONDS:
121120
case DATE:
122121
case LONG:
123122
case DOUBLE:
124-
// longs, doubles and dates use the same type for doc-values and points.
123+
case FLOAT:
124+
// longs, doubles and dates use the same type for doc-values and points
125+
// floats uses longs for doc-values, but Lucene's FloatComparator::getValueForDoc converts long value to float
125126
sortField.setOptimizeSortWithPoints(isIndexed());
126127
break;
127128

@@ -199,7 +200,8 @@ private XFieldComparatorSource comparatorSource(
199200
Nested nested
200201
) {
201202
return switch (targetNumericType) {
202-
case HALF_FLOAT, FLOAT -> new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
203+
case FLOAT -> new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
204+
case HALF_FLOAT -> new HalfFloatValuesComparatorSource(this, missingValue, sortMode, nested);
203205
case DOUBLE -> new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
204206
case DATE -> dateComparatorSource(missingValue, sortMode, nested);
205207
case DATE_NANOSECONDS -> dateNanosComparatorSource(missingValue, sortMode, nested);

server/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/FloatValuesComparatorSource.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*/
3838
public class FloatValuesComparatorSource extends IndexFieldData.XFieldComparatorSource {
3939

40-
private final IndexNumericFieldData indexFieldData;
40+
final IndexNumericFieldData indexFieldData;
4141

4242
public FloatValuesComparatorSource(
4343
IndexNumericFieldData indexFieldData,
@@ -54,7 +54,7 @@ public SortField.Type reducedType() {
5454
return SortField.Type.FLOAT;
5555
}
5656

57-
private NumericDoubleValues getNumericDocValues(LeafReaderContext context, double missingValue) throws IOException {
57+
NumericDoubleValues getNumericDocValues(LeafReaderContext context, double missingValue) throws IOException {
5858
final SortedNumericDoubleValues values = indexFieldData.load(context).getDoubleValues();
5959
if (nested == null) {
6060
return FieldData.replaceMissing(sortMode.select(values), missingValue);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.fielddata.fieldcomparator;
11+
12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.sandbox.document.HalfFloatPoint;
14+
import org.apache.lucene.search.LeafFieldComparator;
15+
import org.apache.lucene.search.Pruning;
16+
import org.apache.lucene.search.comparators.NumericComparator;
17+
import org.apache.lucene.util.BitUtil;
18+
19+
import java.io.IOException;
20+
21+
/**
22+
* Comparator for hal_float values.
23+
* This comparator provides a skipping functionality – an iterator that can skip over non-competitive documents.
24+
*/
25+
public class HalfFloatComparator extends NumericComparator<Float> {
26+
private final float[] values;
27+
protected float topValue;
28+
protected float bottom;
29+
30+
public HalfFloatComparator(int numHits, String field, Float missingValue, boolean reverse, Pruning pruning) {
31+
super(field, missingValue != null ? missingValue : 0.0f, reverse, pruning, HalfFloatPoint.BYTES);
32+
values = new float[numHits];
33+
}
34+
35+
@Override
36+
public int compare(int slot1, int slot2) {
37+
return Float.compare(values[slot1], values[slot2]);
38+
}
39+
40+
@Override
41+
public void setTopValue(Float value) {
42+
super.setTopValue(value);
43+
topValue = value;
44+
}
45+
46+
@Override
47+
public Float value(int slot) {
48+
return Float.valueOf(values[slot]);
49+
}
50+
51+
@Override
52+
protected long missingValueAsComparableLong() {
53+
return HalfFloatPoint.halfFloatToSortableShort(missingValue);
54+
}
55+
56+
@Override
57+
protected long sortableBytesToLong(byte[] bytes) {
58+
// Copied form HalfFloatPoint::sortableBytesToShort
59+
short x = (short) BitUtil.VH_BE_SHORT.get(bytes, 0);
60+
// Re-flip the sign bit to restore the original value:
61+
return (short) (x ^ 0x8000);
62+
}
63+
64+
@Override
65+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
66+
return new HalfFloatLeafComparator(context);
67+
}
68+
69+
/** Leaf comparator for {@link HalfFloatComparator} that provides skipping functionality */
70+
public class HalfFloatLeafComparator extends NumericLeafComparator {
71+
72+
public HalfFloatLeafComparator(LeafReaderContext context) throws IOException {
73+
super(context);
74+
}
75+
76+
private float getValueForDoc(int doc) throws IOException {
77+
if (docValues.advanceExact(doc)) {
78+
return Float.intBitsToFloat((int) docValues.longValue());
79+
} else {
80+
return missingValue;
81+
}
82+
}
83+
84+
@Override
85+
public void setBottom(int slot) throws IOException {
86+
bottom = values[slot];
87+
super.setBottom(slot);
88+
}
89+
90+
@Override
91+
public int compareBottom(int doc) throws IOException {
92+
return Float.compare(bottom, getValueForDoc(doc));
93+
}
94+
95+
@Override
96+
public int compareTop(int doc) throws IOException {
97+
return Float.compare(topValue, getValueForDoc(doc));
98+
}
99+
100+
@Override
101+
public void copy(int slot, int doc) throws IOException {
102+
values[slot] = getValueForDoc(doc);
103+
super.copy(slot, doc);
104+
}
105+
106+
@Override
107+
protected long bottomAsComparableLong() {
108+
return HalfFloatPoint.halfFloatToSortableShort(bottom);
109+
}
110+
111+
@Override
112+
protected long topAsComparableLong() {
113+
return HalfFloatPoint.halfFloatToSortableShort(topValue);
114+
}
115+
}
116+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.index.fielddata.fieldcomparator;
10+
11+
import org.apache.lucene.index.LeafReaderContext;
12+
import org.apache.lucene.index.NumericDocValues;
13+
import org.apache.lucene.search.FieldComparator;
14+
import org.apache.lucene.search.LeafFieldComparator;
15+
import org.apache.lucene.search.Pruning;
16+
import org.elasticsearch.core.Nullable;
17+
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
18+
import org.elasticsearch.search.MultiValueMode;
19+
20+
import java.io.IOException;
21+
22+
/**
23+
* Comparator source for half_float values.
24+
*/
25+
public class HalfFloatValuesComparatorSource extends FloatValuesComparatorSource {
26+
public HalfFloatValuesComparatorSource(
27+
IndexNumericFieldData indexFieldData,
28+
@Nullable Object missingValue,
29+
MultiValueMode sortMode,
30+
Nested nested
31+
) {
32+
super(indexFieldData, missingValue, sortMode, nested);
33+
}
34+
35+
@Override
36+
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning enableSkipping, boolean reversed) {
37+
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());
38+
39+
final float fMissingValue = (Float) missingObject(missingValue, reversed);
40+
// NOTE: it's important to pass null as a missing value in the constructor so that
41+
// the comparator doesn't check docsWithField since we replace missing values in select()
42+
return new HalfFloatComparator(numHits, fieldname, null, reversed, enableSkipping) {
43+
@Override
44+
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
45+
return new HalfFloatLeafComparator(context) {
46+
@Override
47+
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field) throws IOException {
48+
return HalfFloatValuesComparatorSource.this.getNumericDocValues(context, fMissingValue).getRawFloatValues();
49+
}
50+
};
51+
}
52+
};
53+
}
54+
}

server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
6161
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
6262
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
63+
import org.elasticsearch.index.fielddata.fieldcomparator.HalfFloatValuesComparatorSource;
6364
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
6465
import org.elasticsearch.search.MultiValueMode;
6566
import org.elasticsearch.search.sort.ShardDocSortField;
@@ -627,7 +628,7 @@ private static Tuple<SortField, SortField> randomSortFieldCustomComparatorSource
627628
IndexFieldData.XFieldComparatorSource comparatorSource;
628629
boolean reverse = randomBoolean();
629630
Object missingValue = null;
630-
switch (randomIntBetween(0, 3)) {
631+
switch (randomIntBetween(0, 4)) {
631632
case 0 -> comparatorSource = new LongValuesComparatorSource(
632633
null,
633634
randomBoolean() ? randomLong() : null,
@@ -647,7 +648,13 @@ private static Tuple<SortField, SortField> randomSortFieldCustomComparatorSource
647648
randomFrom(MultiValueMode.values()),
648649
null
649650
);
650-
case 3 -> {
651+
case 3 -> comparatorSource = new HalfFloatValuesComparatorSource(
652+
null,
653+
randomBoolean() ? randomFloat() : null,
654+
randomFrom(MultiValueMode.values()),
655+
null
656+
);
657+
case 4 -> {
651658
comparatorSource = new BytesRefFieldComparatorSource(
652659
null,
653660
randomBoolean() ? "_first" : "_last",

server/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataTestCase.java

+10
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,16 @@ public <IFD extends IndexFieldData<?>> IFD getForField(String type, String field
108108
IndexVersion.current(),
109109
null
110110
).docValues(docValues).build(context).fieldType();
111+
} else if (type.equals("half_float")) {
112+
fieldType = new NumberFieldMapper.Builder(
113+
fieldName,
114+
NumberFieldMapper.NumberType.HALF_FLOAT,
115+
ScriptCompiler.NONE,
116+
false,
117+
true,
118+
IndexVersion.current(),
119+
null
120+
).docValues(docValues).build(context).fieldType();
111121
} else if (type.equals("double")) {
112122
fieldType = new NumberFieldMapper.Builder(
113123
fieldName,

server/src/test/java/org/elasticsearch/index/search/nested/FloatNestedSortingTests.java

-51
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,13 @@
1010

1111
import org.apache.lucene.document.SortedNumericDocValuesField;
1212
import org.apache.lucene.index.IndexableField;
13-
import org.apache.lucene.search.ConstantScoreQuery;
14-
import org.apache.lucene.search.FieldDoc;
15-
import org.apache.lucene.search.IndexSearcher;
16-
import org.apache.lucene.search.Query;
17-
import org.apache.lucene.search.Sort;
18-
import org.apache.lucene.search.SortField;
19-
import org.apache.lucene.search.TopDocs;
20-
import org.apache.lucene.search.join.QueryBitSetProducer;
21-
import org.apache.lucene.search.join.ScoreMode;
22-
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
2313
import org.apache.lucene.util.NumericUtils;
24-
import org.elasticsearch.common.lucene.search.Queries;
2514
import org.elasticsearch.index.fielddata.IndexFieldData;
26-
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource;
2715
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
2816
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
2917
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
3018
import org.elasticsearch.search.MultiValueMode;
3119

32-
import java.io.IOException;
33-
34-
import static org.hamcrest.Matchers.equalTo;
35-
3620
public class FloatNestedSortingTests extends DoubleNestedSortingTests {
3721

3822
@Override
@@ -55,39 +39,4 @@ protected IndexFieldData.XFieldComparatorSource createFieldComparator(
5539
protected IndexableField createField(String name, int value) {
5640
return new SortedNumericDocValuesField(name, NumericUtils.floatToSortableInt(value));
5741
}
58-
59-
protected void assertAvgScoreMode(
60-
Query parentFilter,
61-
IndexSearcher searcher,
62-
IndexFieldData.XFieldComparatorSource innerFieldComparator
63-
) throws IOException {
64-
MultiValueMode sortMode = MultiValueMode.AVG;
65-
Query childFilter = Queries.not(parentFilter);
66-
XFieldComparatorSource nestedComparatorSource = createFieldComparator(
67-
"field2",
68-
sortMode,
69-
-127,
70-
createNested(searcher, parentFilter, childFilter)
71-
);
72-
Query query = new ToParentBlockJoinQuery(
73-
new ConstantScoreQuery(childFilter),
74-
new QueryBitSetProducer(parentFilter),
75-
ScoreMode.None
76-
);
77-
Sort sort = new Sort(new SortField("field2", nestedComparatorSource));
78-
TopDocs topDocs = searcher.search(query, 5, sort);
79-
assertThat(topDocs.totalHits.value(), equalTo(7L));
80-
assertThat(topDocs.scoreDocs.length, equalTo(5));
81-
assertThat(topDocs.scoreDocs[0].doc, equalTo(11));
82-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[0]).fields[0]).intValue(), equalTo(2));
83-
assertThat(topDocs.scoreDocs[1].doc, equalTo(7));
84-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[1]).fields[0]).intValue(), equalTo(2));
85-
assertThat(topDocs.scoreDocs[2].doc, equalTo(3));
86-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[2]).fields[0]).intValue(), equalTo(3));
87-
assertThat(topDocs.scoreDocs[3].doc, equalTo(15));
88-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[3]).fields[0]).intValue(), equalTo(3));
89-
assertThat(topDocs.scoreDocs[4].doc, equalTo(19));
90-
assertThat(((Number) ((FieldDoc) topDocs.scoreDocs[4]).fields[0]).intValue(), equalTo(3));
91-
}
92-
9342
}

0 commit comments

Comments
 (0)