Skip to content

Commit 3e07b7c

Browse files
authored
Refactor nested field handling in FieldFetcher (elastic#97683)
The current recursive nested field handling implementation in FieldFetcher can be O(n^2) in the number of nested mappings, whether or not a nested field has been requested or not. For indexes with a very large number of nested fields, this can mean it takes multiple seconds to build a FieldFetcher, making the fetch phase of queries extremely slow, even if no nested fields are actually asked for. This commit reworks the logic so that building nested fetchers is only O(n log n) in the number of nested mappers; additionally, we only pay this cost for nested fields that have been requested.
1 parent 5c386f1 commit 3e07b7c

File tree

8 files changed

+653
-207
lines changed

8 files changed

+653
-207
lines changed

docs/changelog/97683.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 97683
2+
summary: Refactor nested field handling in `FieldFetcher`
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/NestedLookup.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.apache.lucene.search.Query;
1212

13+
import java.util.ArrayList;
1314
import java.util.Collections;
1415
import java.util.Comparator;
1516
import java.util.HashMap;
@@ -42,6 +43,12 @@ public interface NestedLookup {
4243
*/
4344
String getNestedParent(String path);
4445

46+
/**
47+
* Given a nested object path, returns a list of paths of its
48+
* immediate children
49+
*/
50+
List<String> getImmediateChildMappers(String path);
51+
4552
/**
4653
* A NestedLookup for a mapping with no nested mappers
4754
*/
@@ -60,6 +67,11 @@ public Map<String, Query> getNestedParentFilters() {
6067
public String getNestedParent(String path) {
6168
return null;
6269
}
70+
71+
@Override
72+
public List<String> getImmediateChildMappers(String path) {
73+
return List.of();
74+
}
6375
};
6476

6577
/**
@@ -84,6 +96,7 @@ static NestedLookup build(List<NestedObjectMapper> mappers) {
8496
previous = mapper;
8597
}
8698
List<String> nestedPathNames = mappers.stream().map(NestedObjectMapper::name).toList();
99+
87100
return new NestedLookup() {
88101

89102
@Override
@@ -98,6 +111,9 @@ public Map<String, Query> getNestedParentFilters() {
98111

99112
@Override
100113
public String getNestedParent(String path) {
114+
if (path.contains(".") == false) {
115+
return null;
116+
}
101117
String parent = null;
102118
for (String parentPath : nestedPathNames) {
103119
if (path.startsWith(parentPath + ".")) {
@@ -108,6 +124,33 @@ public String getNestedParent(String path) {
108124
}
109125
return parent;
110126
}
127+
128+
@Override
129+
public List<String> getImmediateChildMappers(String path) {
130+
String prefix = "".equals(path) ? "" : path + ".";
131+
List<String> childMappers = new ArrayList<>();
132+
int parentPos = Collections.binarySearch(nestedPathNames, path);
133+
if (parentPos < -1 || parentPos >= nestedPathNames.size() - 1) {
134+
return List.of();
135+
}
136+
int i = parentPos + 1;
137+
String lastChild = nestedPathNames.get(i);
138+
if (lastChild.startsWith(prefix)) {
139+
childMappers.add(lastChild);
140+
}
141+
i++;
142+
while (i < nestedPathNames.size() && nestedPathNames.get(i).startsWith(prefix)) {
143+
if (nestedPathNames.get(i).startsWith(lastChild + ".")) {
144+
// child of child, skip
145+
i++;
146+
continue;
147+
}
148+
lastChild = nestedPathNames.get(i);
149+
childMappers.add(lastChild);
150+
i++;
151+
}
152+
return childMappers;
153+
}
111154
};
112155
}
113156
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.search;
10+
11+
import java.util.ArrayList;
12+
import java.util.HashMap;
13+
import java.util.Iterator;
14+
import java.util.List;
15+
import java.util.Map;
16+
import java.util.function.Function;
17+
18+
/**
19+
* Utility methods for dealing with nested mappers
20+
*/
21+
public final class NestedUtils {
22+
23+
private NestedUtils() {}
24+
25+
/**
26+
* Partition a set of input objects by the children of a specific nested scope
27+
*
28+
* The returned map will contain an entry for all children, even if some of them
29+
* are empty in the inputs.
30+
*
31+
* All children, and all input paths, must begin with the scope. Both children
32+
* and inputs should be in sorted order.
33+
*
34+
* @param scope the nested scope to base partitions on
35+
* @param children the immediate children of the nested scope
36+
* @param inputs a set of inputs to partition
37+
* @param pathFunction a function to retrieve a path for each input
38+
* @param <T> the type of the inputs
39+
* @return a map of nested paths to lists of inputs
40+
*/
41+
public static <T> Map<String, List<T>> partitionByChildren(
42+
String scope,
43+
List<String> children,
44+
List<T> inputs,
45+
Function<T, String> pathFunction
46+
) {
47+
// No immediate nested children, so we can shortcut and just return all inputs
48+
// under the current scope
49+
if (children.isEmpty()) {
50+
return Map.of(scope, inputs);
51+
}
52+
53+
// Set up the output map, with one entry for the current scope and one for each
54+
// of its children
55+
Map<String, List<T>> output = new HashMap<>();
56+
output.put(scope, new ArrayList<>());
57+
for (String child : children) {
58+
output.put(child, new ArrayList<>());
59+
}
60+
61+
// No inputs, so we can return the output map with all entries empty
62+
if (inputs.isEmpty()) {
63+
return output;
64+
}
65+
66+
Iterator<String> childrenIterator = children.iterator();
67+
String currentChild = childrenIterator.next();
68+
Iterator<T> inputIterator = inputs.iterator();
69+
T currentInput = inputIterator.next();
70+
String currentInputName = pathFunction.apply(currentInput);
71+
assert currentInputName.startsWith(scope);
72+
73+
// Find all the inputs that sort before the first child, and add them to the current scope entry
74+
while (currentInputName.compareTo(currentChild) < 0) {
75+
output.get(scope).add(currentInput);
76+
if (inputIterator.hasNext() == false) {
77+
return output;
78+
}
79+
currentInput = inputIterator.next();
80+
currentInputName = pathFunction.apply(currentInput);
81+
assert currentInputName.startsWith(scope);
82+
}
83+
84+
// Iterate through all the children
85+
while (currentChild != null) {
86+
if (currentInputName.startsWith(currentChild + ".")) {
87+
// If this input sits under the current child, add it to that child scope
88+
// and then get the next input
89+
output.get(currentChild).add(currentInput);
90+
if (inputIterator.hasNext() == false) {
91+
// return if no more inputs
92+
return output;
93+
}
94+
currentInput = inputIterator.next();
95+
currentInputName = pathFunction.apply(currentInput);
96+
assert currentInputName.startsWith(scope);
97+
} else {
98+
// If there are no more children then skip to filling up the parent scope again
99+
if (childrenIterator.hasNext() == false) {
100+
break;
101+
}
102+
// Move to the next child
103+
currentChild = childrenIterator.next();
104+
if (currentChild == null || currentInputName.compareTo(currentChild) < 0) {
105+
// If we still sort before the next child, then add to the parent scope
106+
// and move to the next input
107+
output.get(scope).add(currentInput);
108+
if (inputIterator.hasNext() == false) {
109+
// if no more inputs then return
110+
return output;
111+
}
112+
currentInput = inputIterator.next();
113+
currentInputName = pathFunction.apply(currentInput);
114+
assert currentInputName.startsWith(scope);
115+
}
116+
}
117+
}
118+
output.get(scope).add(currentInput);
119+
120+
// if there are inputs left, then they all sort after the last child but
121+
// are not contained by them, so just add them all to the parent scope
122+
while (inputIterator.hasNext()) {
123+
currentInput = inputIterator.next();
124+
currentInputName = pathFunction.apply(currentInput);
125+
assert currentInputName.startsWith(scope);
126+
output.get(scope).add(currentInput);
127+
}
128+
return output;
129+
}
130+
131+
}

0 commit comments

Comments
 (0)