Skip to content

Commit eadc07c

Browse files
committed
Fix 9.12.0 backcompat break (Lucene 9.12.0 cannot read 9.11.x indices written with quantized HNSW, Lucene99HnswScalarQuantizedVectorsFormat) (#13874)
* carefully regenerate the int8_hnsw bwc indices so that they do in fact use Lucene99ScalarQuantizedVectorsFormat ... when running TestInt8HnswBackwardsCompatibility it now fails (as expected) on 9.11.0 and 9.11.1 bwc indices, but not on 9.10.0 * rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization * actually fix the bwc bug: only allow compress=true when bits is 7 or 8 in HNSW scalar quantization * tidy * Revert "rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization" This reverts commit eeb3f8a. * Reapply "rename int8 -> int7 bwc tests since we are actually testing 7 bit quantization" This reverts commit 3487c42. * #13880: add test to verify the int7 quantized indices are in fact using quantized vectors not float32 * bump 9.12.x version to 9.12.1 and add bwc indices for 9.12.0 * remove duplicate 9.12.0 Version constant * revert changes to index.9.12.0-cfs.zip, index.9.12.0-nocfs.zip, sorted.9.12.0.zip * remove unused bwc index Closes #13867 Closes #13880
1 parent e6bb5e2 commit eadc07c

File tree

12 files changed

+48
-19
lines changed

12 files changed

+48
-19
lines changed

lucene/backward-codecs/src/test/org/apache/lucene/backward_index/BackwardsCompatibilityTestBase.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
106106
* This is a base constructor for parameterized BWC tests. The constructor arguments are provided
107107
* by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link
108108
* com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass
109-
* provides a list lists of arguments for the tests and RandomizedRunner will execute the test for
110-
* each of the argument list.
109+
* provides a list of arguments for the tests and RandomizedRunner will execute the test for each
110+
* of the argument list.
111111
*
112112
* @param version the version this test should run for
113113
* @param indexPattern an index pattern in order to open an index of see {@link

lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestGenerateBwcIndices.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
3939
// To generate backcompat indexes with the current default codec, run the following gradle
4040
// command:
4141
// gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default
42-
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices
42+
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices --max-workers=1
4343
//
4444
// Also add testmethod with one of the index creation methods below, for example:
4545
// -Ptestmethod=testCreateCFS
@@ -82,14 +82,14 @@ public void testCreateSortedIndex() throws IOException {
8282
sortedTest.createBWCIndex();
8383
}
8484

85-
public void testCreateInt8HNSWIndices() throws IOException {
86-
TestInt8HnswBackwardsCompatibility int8HnswBackwardsCompatibility =
87-
new TestInt8HnswBackwardsCompatibility(
85+
public void testCreateInt7HNSWIndices() throws IOException {
86+
TestInt7HnswBackwardsCompatibility int7HnswBackwardsCompatibility =
87+
new TestInt7HnswBackwardsCompatibility(
8888
Version.LATEST,
8989
createPattern(
90-
TestInt8HnswBackwardsCompatibility.INDEX_NAME,
91-
TestInt8HnswBackwardsCompatibility.SUFFIX));
92-
int8HnswBackwardsCompatibility.createBWCIndex();
90+
TestInt7HnswBackwardsCompatibility.INDEX_NAME,
91+
TestInt7HnswBackwardsCompatibility.SUFFIX));
92+
int7HnswBackwardsCompatibility.createBWCIndex();
9393
}
9494

9595
private boolean isInitialMajorVersionRelease() {
Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,41 +23,46 @@
2323
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
2424
import org.apache.lucene.codecs.Codec;
2525
import org.apache.lucene.codecs.KnnVectorsFormat;
26+
import org.apache.lucene.codecs.KnnVectorsReader;
2627
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
2728
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
29+
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
30+
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2831
import org.apache.lucene.document.Document;
2932
import org.apache.lucene.document.Field;
3033
import org.apache.lucene.document.FieldType;
3134
import org.apache.lucene.document.KnnFloatVectorField;
3235
import org.apache.lucene.document.StringField;
36+
import org.apache.lucene.index.CodecReader;
3337
import org.apache.lucene.index.DirectoryReader;
3438
import org.apache.lucene.index.IndexReader;
3539
import org.apache.lucene.index.IndexWriter;
3640
import org.apache.lucene.index.IndexWriterConfig;
41+
import org.apache.lucene.index.LeafReaderContext;
3742
import org.apache.lucene.index.NoMergePolicy;
3843
import org.apache.lucene.index.VectorSimilarityFunction;
3944
import org.apache.lucene.search.IndexSearcher;
4045
import org.apache.lucene.store.Directory;
4146
import org.apache.lucene.tests.analysis.MockAnalyzer;
4247
import org.apache.lucene.tests.util.TestUtil;
4348
import org.apache.lucene.util.Version;
49+
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
4450

45-
public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
51+
public class TestInt7HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
4652

47-
static final String INDEX_NAME = "int8_hnsw";
53+
static final String INDEX_NAME = "int7_hnsw";
4854
static final String SUFFIX = "";
49-
private static final Version FIRST_INT8_HNSW_VERSION = Version.LUCENE_9_10_0;
55+
private static final Version FIRST_INT7_HNSW_VERSION = Version.LUCENE_9_10_0;
5056
private static final String KNN_VECTOR_FIELD = "knn_field";
5157
private static final int DOC_COUNT = 30;
5258
private static final FieldType KNN_VECTOR_FIELD_TYPE =
5359
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
5460
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
5561

56-
public TestInt8HnswBackwardsCompatibility(Version version, String pattern) {
62+
public TestInt7HnswBackwardsCompatibility(Version version, String pattern) {
5763
super(version, pattern);
5864
}
5965

60-
/** Provides all sorted versions to the test-framework */
6166
@ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s")
6267
public static Iterable<Object[]> testVersionsFactory() throws IllegalAccessException {
6368
return allVersion(INDEX_NAME, SUFFIX);
@@ -76,15 +81,15 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
7681

7782
@Override
7883
protected boolean supportsVersion(Version version) {
79-
return version.onOrAfter(FIRST_INT8_HNSW_VERSION);
84+
return version.onOrAfter(FIRST_INT7_HNSW_VERSION);
8085
}
8186

8287
@Override
8388
void verifyUsesDefaultCodec(Directory dir, String name) throws IOException {
8489
// We don't use the default codec
8590
}
8691

87-
public void testInt8HnswIndexAndSearch() throws Exception {
92+
public void testInt7HnswIndexAndSearch() throws Exception {
8893
IndexWriterConfig indexWriterConfig =
8994
newIndexWriterConfig(new MockAnalyzer(random()))
9095
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
@@ -108,7 +113,6 @@ public void testInt8HnswIndexAndSearch() throws Exception {
108113
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
109114
}
110115
}
111-
// This will confirm the docs are really sorted
112116
TestUtil.checkIndex(directory);
113117
}
114118

@@ -117,7 +121,7 @@ protected void createIndex(Directory dir) throws IOException {
117121
IndexWriterConfig conf =
118122
new IndexWriterConfig(new MockAnalyzer(random()))
119123
.setMaxBufferedDocs(10)
120-
.setCodec(TestUtil.getDefaultCodec())
124+
.setCodec(getCodec())
121125
.setMergePolicy(NoMergePolicy.INSTANCE);
122126
try (IndexWriter writer = new IndexWriter(dir, conf)) {
123127
for (int i = 0; i < DOC_COUNT; i++) {
@@ -147,4 +151,29 @@ public void testReadOldIndices() throws Exception {
147151
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
148152
}
149153
}
154+
155+
// #13880: make sure the BWC index really contains quantized HNSW not float32
156+
public void testIndexIsReallyQuantized() throws Exception {
157+
try (DirectoryReader reader = DirectoryReader.open(directory)) {
158+
for (LeafReaderContext leafContext : reader.leaves()) {
159+
KnnVectorsReader knnVectorsReader = ((CodecReader) leafContext.reader()).getVectorReader();
160+
assertTrue(
161+
"expected PerFieldKnnVectorsFormat.FieldsReader but got: " + knnVectorsReader,
162+
knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader);
163+
164+
KnnVectorsReader forField =
165+
((PerFieldKnnVectorsFormat.FieldsReader) knnVectorsReader)
166+
.getFieldReader(KNN_VECTOR_FIELD);
167+
168+
assertTrue(forField instanceof Lucene99HnswVectorsReader);
169+
170+
QuantizedByteVectorValues quantized =
171+
((Lucene99HnswVectorsReader) forField).getQuantizedVectorValues(KNN_VECTOR_FIELD);
172+
173+
assertNotNull(
174+
"KnnVectorsReader should have quantized interface for field " + KNN_VECTOR_FIELD,
175+
quantized);
176+
}
177+
}
178+
}
150179
}

0 commit comments

Comments
 (0)