Skip to content

Commit 80667d0

Browse files
authored
Fix NPE in flat_bbq scorer when all vectors are missing (#129548)
It is possible to get all the way down to the knn format reader and there be no vectors in the index. This execution path is possible if utilizing nested queries (which bypasses the higher level checks in `KnnFloatVectorQuery#approximateSearch`). bbq_flat should check for the existence of vectors before attempting to create the scorer.
1 parent 8321077 commit 80667d0

File tree

7 files changed

+143
-2
lines changed

7 files changed

+143
-2
lines changed

docs/changelog/129548.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129548
2+
summary: Fix NPE in `flat_bbq` scorer when all vectors are missing
3+
area: Vector Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ public RandomVectorScorer getRandomVectorScorer(
5959
float[] target
6060
) throws IOException {
6161
if (vectorValues instanceof BinarizedByteVectorValues binarizedVectors) {
62+
assert binarizedVectors.getQuantizer() != null
63+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
64+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6265
BinaryQuantizer quantizer = binarizedVectors.getQuantizer();
6366
float[] centroid = binarizedVectors.getCentroid();
6467
// FIXME: precompute this once?

server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
160160
@Override
161161
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
162162
FieldEntry fi = fields.get(field);
163-
if (fi == null) {
163+
if (fi == null || fi.size() == 0) {
164164
return null;
165165
}
166166
return vectorScorer.getRandomVectorScorer(

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryFlatVectorsScorer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ public RandomVectorScorer getRandomVectorScorer(
6666
float[] target
6767
) throws IOException {
6868
if (vectorValues instanceof BinarizedByteVectorValues binarizedVectors) {
69+
assert binarizedVectors.getQuantizer() != null
70+
: "BinarizedByteVectorValues must have a quantizer for ES816BinaryFlatVectorsScorer";
71+
assert binarizedVectors.size() > 0 : "BinarizedByteVectorValues must have at least one vector for ES816BinaryFlatVectorsScorer";
6972
OptimizedScalarQuantizer quantizer = binarizedVectors.getQuantizer();
7073
float[] centroid = binarizedVectors.getCentroid();
7174
// We make a copy as the quantization process mutates the input

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ static void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
160160
@Override
161161
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
162162
FieldEntry fi = fields.get(field);
163-
if (fi == null) {
163+
if (fi == null || fi.size() == 0) {
164164
return null;
165165
}
166166
return vectorScorer.getRandomVectorScorer(

server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.codecs.KnnVectorsReader;
2626
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2727
import org.apache.lucene.document.Document;
28+
import org.apache.lucene.document.Field;
2829
import org.apache.lucene.document.KnnFloatVectorField;
2930
import org.apache.lucene.index.CodecReader;
3031
import org.apache.lucene.index.DirectoryReader;
@@ -34,12 +35,21 @@
3435
import org.apache.lucene.index.IndexWriterConfig;
3536
import org.apache.lucene.index.KnnVectorValues;
3637
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
39+
import org.apache.lucene.index.Term;
3740
import org.apache.lucene.index.VectorSimilarityFunction;
41+
import org.apache.lucene.search.FieldExistsQuery;
3842
import org.apache.lucene.search.IndexSearcher;
3943
import org.apache.lucene.search.KnnFloatVectorQuery;
44+
import org.apache.lucene.search.MatchAllDocsQuery;
4045
import org.apache.lucene.search.Query;
46+
import org.apache.lucene.search.TermQuery;
4147
import org.apache.lucene.search.TopDocs;
4248
import org.apache.lucene.search.TotalHits;
49+
import org.apache.lucene.search.join.BitSetProducer;
50+
import org.apache.lucene.search.join.CheckJoinIndex;
51+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
52+
import org.apache.lucene.search.join.QueryBitSetProducer;
4353
import org.apache.lucene.store.Directory;
4454
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4555
import org.apache.lucene.tests.util.TestUtil;
@@ -48,6 +58,9 @@
4858
import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils;
4959

5060
import java.io.IOException;
61+
import java.util.ArrayList;
62+
import java.util.Arrays;
63+
import java.util.List;
5164
import java.util.Locale;
5265

5366
import static java.lang.String.format;
@@ -70,6 +83,58 @@ protected Codec getCodec() {
7083
return codec;
7184
}
7285

86+
static String encodeInts(int[] i) {
87+
return Arrays.toString(i);
88+
}
89+
90+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
91+
// Create a filter that defines "parent" documents in the index
92+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
93+
CheckJoinIndex.check(r, parentsFilter);
94+
return parentsFilter;
95+
}
96+
97+
Document makeParent(int[] children) {
98+
Document parent = new Document();
99+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
100+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
101+
return parent;
102+
}
103+
104+
public void testEmptyDiversifiedChildSearch() throws Exception {
105+
String fieldName = "field";
106+
int dims = random().nextInt(4, 65);
107+
float[] vector = randomVector(dims);
108+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
109+
try (Directory d = newDirectory()) {
110+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(codec);
111+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
112+
try (IndexWriter w = new IndexWriter(d, iwc)) {
113+
List<Document> toAdd = new ArrayList<>();
114+
for (int j = 1; j <= 5; j++) {
115+
Document doc = new Document();
116+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
117+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
118+
toAdd.add(doc);
119+
}
120+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
121+
w.addDocuments(toAdd);
122+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
123+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
124+
w.flush();
125+
w.commit();
126+
w.forceMerge(1);
127+
try (IndexReader reader = DirectoryReader.open(w)) {
128+
IndexSearcher searcher = new IndexSearcher(reader);
129+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
130+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
131+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
132+
}
133+
}
134+
135+
}
136+
}
137+
73138
public void testSearch() throws Exception {
74139
String fieldName = "field";
75140
int numVectors = random().nextInt(99, 500);

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.codecs.KnnVectorsReader;
2626
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2727
import org.apache.lucene.document.Document;
28+
import org.apache.lucene.document.Field;
2829
import org.apache.lucene.document.KnnFloatVectorField;
2930
import org.apache.lucene.index.CodecReader;
3031
import org.apache.lucene.index.DirectoryReader;
@@ -34,13 +35,22 @@
3435
import org.apache.lucene.index.IndexWriterConfig;
3536
import org.apache.lucene.index.KnnVectorValues;
3637
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
39+
import org.apache.lucene.index.Term;
3740
import org.apache.lucene.index.VectorSimilarityFunction;
3841
import org.apache.lucene.misc.store.DirectIODirectory;
42+
import org.apache.lucene.search.FieldExistsQuery;
3943
import org.apache.lucene.search.IndexSearcher;
4044
import org.apache.lucene.search.KnnFloatVectorQuery;
45+
import org.apache.lucene.search.MatchAllDocsQuery;
4146
import org.apache.lucene.search.Query;
47+
import org.apache.lucene.search.TermQuery;
4248
import org.apache.lucene.search.TopDocs;
4349
import org.apache.lucene.search.TotalHits;
50+
import org.apache.lucene.search.join.BitSetProducer;
51+
import org.apache.lucene.search.join.CheckJoinIndex;
52+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
53+
import org.apache.lucene.search.join.QueryBitSetProducer;
4454
import org.apache.lucene.store.Directory;
4555
import org.apache.lucene.store.FSDirectory;
4656
import org.apache.lucene.store.IOContext;
@@ -64,6 +74,9 @@
6474
import java.io.IOException;
6575
import java.nio.file.Files;
6676
import java.nio.file.Path;
77+
import java.util.ArrayList;
78+
import java.util.Arrays;
79+
import java.util.List;
6780
import java.util.Locale;
6881
import java.util.OptionalLong;
6982

@@ -87,6 +100,58 @@ protected Codec getCodec() {
87100
return codec;
88101
}
89102

103+
static String encodeInts(int[] i) {
104+
return Arrays.toString(i);
105+
}
106+
107+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
108+
// Create a filter that defines "parent" documents in the index
109+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
110+
CheckJoinIndex.check(r, parentsFilter);
111+
return parentsFilter;
112+
}
113+
114+
Document makeParent(int[] children) {
115+
Document parent = new Document();
116+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
117+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
118+
return parent;
119+
}
120+
121+
public void testEmptyDiversifiedChildSearch() throws Exception {
122+
String fieldName = "field";
123+
int dims = random().nextInt(4, 65);
124+
float[] vector = randomVector(dims);
125+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
126+
try (Directory d = newDirectory()) {
127+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(codec);
128+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
129+
try (IndexWriter w = new IndexWriter(d, iwc)) {
130+
List<Document> toAdd = new ArrayList<>();
131+
for (int j = 1; j <= 5; j++) {
132+
Document doc = new Document();
133+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
134+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
135+
toAdd.add(doc);
136+
}
137+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
138+
w.addDocuments(toAdd);
139+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
140+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
141+
w.flush();
142+
w.commit();
143+
w.forceMerge(1);
144+
try (IndexReader reader = DirectoryReader.open(w)) {
145+
IndexSearcher searcher = new IndexSearcher(reader);
146+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
147+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
148+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
149+
}
150+
}
151+
152+
}
153+
}
154+
90155
public void testSearch() throws Exception {
91156
String fieldName = "field";
92157
int numVectors = random().nextInt(99, 500);

0 commit comments

Comments
 (0)