Get length calculations correct

elastic · thecoop · Mar 25, 2025 · Mar 12, 2025 · Mar 12, 2025 · Mar 13, 2025
commit 646f1b8fe92acfbe6fef8e7e153d11fdcb2f5b7c
diff --git a/...in21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java b/...in21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java
@@ -194,17 +194,23 @@ public static long ipByteBin128(byte[] q, byte[] d) {
         VectorShape.forBitSize(INT_SPECIES_256.vectorBitSize() / Integer.BYTES)
     );
 
+    private static int limit(int length, int sectionSize) {
+        return length - (length % sectionSize);
+    }
+
     static int ipByteBit512(byte[] q, byte[] d) {
         assert q.length == d.length * Byte.SIZE;
         int i = 0;
         int sum = 0;
 
-        if (q.length >= INT_SPECIES_512.length() * 4) {
+        int sectionLength = INT_SPECIES_512.length() * 4;
+        if (q.length >= sectionLength) {
             IntVector acc0 = IntVector.zero(INT_SPECIES_512);
             IntVector acc1 = IntVector.zero(INT_SPECIES_512);
             IntVector acc2 = IntVector.zero(INT_SPECIES_512);
             IntVector acc3 = IntVector.zero(INT_SPECIES_512);
-            for (; i < INT_SPECIES_512.loopBound(q.length); i += INT_SPECIES_512.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var vals0 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_512, q, i).castShape(INT_SPECIES_512, 0);
                 var vals1 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_512, q, i + INT_SPECIES_512.length()).castShape(INT_SPECIES_512, 0);
                 var vals2 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_512, q, i + INT_SPECIES_512.length() * 2)
@@ -227,12 +233,14 @@ static int ipByteBit512(byte[] q, byte[] d) {
                 + acc3.reduceLanes(VectorOperators.ADD);
         }
 
-        if (q.length - i >= INT_SPECIES_256.length() * 4) {
+        sectionLength = INT_SPECIES_256.length() * 4;
+        if (q.length - i >= sectionLength) {
             IntVector acc0 = IntVector.zero(INT_SPECIES_256);
             IntVector acc1 = IntVector.zero(INT_SPECIES_256);
             IntVector acc2 = IntVector.zero(INT_SPECIES_256);
             IntVector acc3 = IntVector.zero(INT_SPECIES_256);
-            for (; i < INT_SPECIES_256.loopBound(q.length); i += INT_SPECIES_256.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var vals0 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i).castShape(INT_SPECIES_256, 0);
                 var vals1 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i + INT_SPECIES_256.length()).castShape(INT_SPECIES_256, 0);
                 var vals2 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i + INT_SPECIES_256.length() * 2)
@@ -257,7 +265,8 @@ static int ipByteBit512(byte[] q, byte[] d) {
 
         if (i < q.length) {
             // do the tail
-            sum += DefaultESVectorUtilSupport.ipByteBitImpl(q, d, i);
+            // default implementation uses length of data vector, not query vector
+            sum += DefaultESVectorUtilSupport.ipByteBitImpl(q, d, i / 8);
         }
         return sum;
     }
@@ -267,12 +276,14 @@ static int ipByteBit256(byte[] q, byte[] d) {
         int i = 0;
         int sum = 0;
 
-        if (q.length >= INT_SPECIES_256.length() * 4) {
+        int sectionLength = INT_SPECIES_256.length() * 4;
+        if (q.length >= sectionLength) {
             IntVector acc0 = IntVector.zero(INT_SPECIES_256);
             IntVector acc1 = IntVector.zero(INT_SPECIES_256);
             IntVector acc2 = IntVector.zero(INT_SPECIES_256);
             IntVector acc3 = IntVector.zero(INT_SPECIES_256);
-            for (; i < INT_SPECIES_256.loopBound(q.length); i += INT_SPECIES_256.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var vals0 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i).castShape(INT_SPECIES_256, 0);
                 var vals1 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i + INT_SPECIES_256.length()).castShape(INT_SPECIES_256, 0);
                 var vals2 = ByteVector.fromArray(BYTE_SPECIES_FOR_INT_256, q, i + INT_SPECIES_256.length() * 2)
@@ -297,7 +308,8 @@ static int ipByteBit256(byte[] q, byte[] d) {
 
         if (i < q.length) {
             // do the tail
-            sum += DefaultESVectorUtilSupport.ipByteBitImpl(q, d, i);
+            // default implementation uses length of data vector, not query vector
+            sum += DefaultESVectorUtilSupport.ipByteBitImpl(q, d, i / 8);
         }
         return sum;
     }
@@ -310,12 +322,14 @@ static float ipFloatBit512(float[] q, byte[] d) {
         int i = 0;
         float sum = 0;
 
-        if (q.length >= FLOAT_SPECIES_512.length() * 4) {
+        int sectionLength = FLOAT_SPECIES_512.length() * 4;
+        if (q.length >= sectionLength) {
             FloatVector acc0 = FloatVector.zero(FLOAT_SPECIES_512);
             FloatVector acc1 = FloatVector.zero(FLOAT_SPECIES_512);
             FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES_512);
             FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES_512);
-            for (; i < FLOAT_SPECIES_512.loopBound(q.length); i += FLOAT_SPECIES_512.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var floats0 = FloatVector.fromArray(FLOAT_SPECIES_512, q, i);
                 var floats1 = FloatVector.fromArray(FLOAT_SPECIES_512, q, i + FLOAT_SPECIES_512.length());
                 var floats2 = FloatVector.fromArray(FLOAT_SPECIES_512, q, i + FLOAT_SPECIES_512.length() * 2);
@@ -336,12 +350,14 @@ static float ipFloatBit512(float[] q, byte[] d) {
                 + acc3.reduceLanes(VectorOperators.ADD);
         }
 
-        if (q.length - i >= FLOAT_SPECIES_256.length() * 4) {
+        sectionLength = FLOAT_SPECIES_256.length() * 4;
+        if (q.length - i >= sectionLength) {
             FloatVector acc0 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc1 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES_256);
-            for (; i < FLOAT_SPECIES_256.loopBound(q.length); i += FLOAT_SPECIES_256.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var floats0 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i);
                 var floats1 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i + FLOAT_SPECIES_256.length());
                 var floats2 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i + FLOAT_SPECIES_256.length() * 2);
@@ -364,7 +380,8 @@ static float ipFloatBit512(float[] q, byte[] d) {
 
         if (i < q.length) {
             // do the tail
-            sum += DefaultESVectorUtilSupport.ipFloatBitImpl(q, d, i);
+            // default implementation uses length of data vector, not query vector
+            sum += DefaultESVectorUtilSupport.ipFloatBitImpl(q, d, i / 8);
         }
 
         return sum;
@@ -375,12 +392,14 @@ static float ipFloatBit256(float[] q, byte[] d) {
         int i = 0;
         float sum = 0;
 
-        if (q.length >= FLOAT_SPECIES_256.length() * 4) {
+        int sectionLength = FLOAT_SPECIES_256.length() * 4;
+        if (q.length >= sectionLength) {
             FloatVector acc0 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc1 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES_256);
             FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES_256);
-            for (; i < FLOAT_SPECIES_256.loopBound(q.length); i += FLOAT_SPECIES_256.length() * 4) {
+            int limit = limit(q.length, sectionLength);
+            for (; i < limit; i += sectionLength) {
                 var floats0 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i);
                 var floats1 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i + FLOAT_SPECIES_256.length());
                 var floats2 = FloatVector.fromArray(FLOAT_SPECIES_256, q, i + FLOAT_SPECIES_256.length() * 2);
@@ -403,7 +422,8 @@ static float ipFloatBit256(float[] q, byte[] d) {
 
         if (i < q.length) {
             // do the tail
-            sum += DefaultESVectorUtilSupport.ipFloatBitImpl(q, d, i);
+            // default implementation uses length of data vector, not query vector
+            sum += DefaultESVectorUtilSupport.ipFloatBitImpl(q, d, i / 8);
         }
 
         return sum;

diff --git a/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java
@@ -22,25 +22,41 @@ public class ESVectorUtilTests extends BaseVectorizationTests {
     static final ESVectorizationProvider defOrPanamaProvider = BaseVectorizationTests.maybePanamaProvider();
 
     public void testIpByteBit() {
-        byte[] q = new byte[16];
-        byte[] d = new byte[] { (byte) Integer.parseInt("01100010", 2), (byte) Integer.parseInt("10100111", 2) };
+        byte[] d = new byte[random().nextInt(128)];
+        byte[] q = new byte[d.length * 8];
+        random().nextBytes(d);
         random().nextBytes(q);
-        int expected = q[1] + q[2] + q[6] + q[8] + q[10] + q[13] + q[14] + q[15];
-        assertEquals(expected, ESVectorUtil.ipByteBit(q, d));
-        assertEquals(expected, defaultedProvider.getVectorUtilSupport().ipByteBit(q, d));
-        assertEquals(expected, defOrPanamaProvider.getVectorUtilSupport().ipByteBit(q, d));
+
+        int sum = 0;
+        for (int i = 0; i < q.length; i++) {
+            if (((d[i / 8] << (i % 8)) & 0x80) == 0x80) {
+                sum += q[i];
+            }
+        }
+
+        assertEquals(sum, ESVectorUtil.ipByteBit(q, d));
+        assertEquals(sum, defaultedProvider.getVectorUtilSupport().ipByteBit(q, d));
+        assertEquals(sum, defOrPanamaProvider.getVectorUtilSupport().ipByteBit(q, d));
     }
 
     public void testIpFloatBit() {
-        float[] q = new float[16];
-        byte[] d = new byte[] { (byte) Integer.parseInt("01100010", 2), (byte) Integer.parseInt("10100111", 2) };
+        byte[] d = new byte[random().nextInt(128)];
+        float[] q = new float[d.length * 8];
+        random().nextBytes(d);
+
+        float sum = 0;
         for (int i = 0; i < q.length; i++) {
             q[i] = random().nextFloat();
+            if (((d[i / 8] << (i % 8)) & 0x80) == 0x80) {
+                sum += q[i];
+            }
         }
-        float expected = q[1] + q[2] + q[6] + q[8] + q[10] + q[13] + q[14] + q[15];
-        assertEquals(expected, ESVectorUtil.ipFloatBit(q, d), 1e-6);
-        assertEquals(expected, defaultedProvider.getVectorUtilSupport().ipFloatBit(q, d), 1e-6);
-        assertEquals(expected, defOrPanamaProvider.getVectorUtilSupport().ipFloatBit(q, d), 1e-6);
+
+        double delta = 1e-5 * q.length;
+
+        assertEquals(sum, ESVectorUtil.ipFloatBit(q, d), delta);
+        assertEquals(sum, defaultedProvider.getVectorUtilSupport().ipFloatBit(q, d), delta);
+        assertEquals(sum, defOrPanamaProvider.getVectorUtilSupport().ipFloatBit(q, d), delta);
     }
 
     public void testIpFloatByte() {