Skip to content

feat: Vector Search #1639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
7284ac5
Add vector value type
cindy-peng Oct 22, 2024
fe5e966
Add vectorValue type
cindy-peng Oct 22, 2024
9c19afa
Add vector value test
cindy-peng Oct 22, 2024
8163c95
Add unit tests and system tests
cindy-peng Oct 26, 2024
6df2d8a
Fix formatting
cindy-peng Oct 26, 2024
32fdd41
Fix empty FindNearest pb instance
cindy-peng Oct 28, 2024
2b11399
Fix formatting
cindy-peng Oct 28, 2024
6165685
Fix javadoc
cindy-peng Oct 28, 2024
4461f5b
Merge from main
cindy-peng Oct 28, 2024
17b411f
fix(sample): change update entity sample to use transaction (#1633)
cindy-peng Oct 24, 2024
2aafa17
deps: update dependency com.google.cloud:sdk-platform-java-config to …
renovate-bot Oct 24, 2024
949a0ae
chore(main): release 2.24.0 (#1631)
release-please[bot] Oct 25, 2024
11b3227
deps: update googleapis/sdk-platform-java action to v2.49.0 (#1638)
renovate-bot Oct 28, 2024
742c7b9
chore(main): release 2.24.1-SNAPSHOT (#1635)
release-please[bot] Oct 28, 2024
089b68e
chore: Update generation configuration at Sun Oct 27 02:26:19 UTC 202…
cloud-java-bot Oct 28, 2024
81980d2
deps: update dependency com.google.cloud:sdk-platform-java-config to …
renovate-bot Oct 28, 2024
546cf81
chore(main): release 2.24.1 (#1641)
release-please[bot] Oct 28, 2024
4b21c3e
merging conflict
cindy-peng Oct 28, 2024
c495bb6
chore: generate libraries at Mon Oct 28 20:25:23 UTC 2024
cloud-java-bot Oct 28, 2024
e93ce5c
Fix import
cindy-peng Oct 28, 2024
0072de4
chore: generate libraries at Mon Oct 28 20:30:34 UTC 2024
cloud-java-bot Oct 28, 2024
a371467
Add Integration test
cindy-peng Oct 30, 2024
c8340bf
Add comment and fix formatting
cindy-peng Oct 30, 2024
62da35d
Modify comment and fix formatting
cindy-peng Oct 30, 2024
9694abb
Add setExcludeFromIndexes back to vectorvalue builder
cindy-peng Oct 30, 2024
f267339
Adjust testVectorSearch sample code
cindy-peng Oct 31, 2024
e2f8e87
Add system tests check details
cindy-peng Dec 2, 2024
d66e0d6
Merge branch 'main' of https://github.com/googleapis/java-datastore i…
cindy-peng Dec 2, 2024
1806f0a
chore: generate libraries at Mon Dec 2 20:09:57 UTC 2024
cloud-java-bot Dec 2, 2024
fca3689
fix initial numer of entities for ITDatastoreTst
cindy-peng Dec 2, 2024
9d79efc
Merge branch 'cindy/vector-search-1' of https://github.com/googleapis…
cindy-peng Dec 2, 2024
60dd19d
Add sample code for Java datastore vector search
cindy-peng Dec 3, 2024
d7c2559
chore: generate libraries at Tue Dec 3 00:29:52 UTC 2024
cloud-java-bot Dec 3, 2024
4d0a125
Merge branch 'main' into cindy/vector-search-1
cindy-peng Dec 5, 2024
6679cd2
Add tests to sample code
cindy-peng Dec 14, 2024
402d2cd
Resolving conflicts
cindy-peng Dec 14, 2024
cf1bb19
Fix added interface method warning
cindy-peng Dec 14, 2024
7c46b40
Fix mvn lint formatting
cindy-peng Dec 14, 2024
a4f2a6c
Merge branch 'main' into cindy/vector-search-1
cindy-peng Dec 14, 2024
1c73ec9
chore: generate libraries at Sat Dec 14 01:46:19 UTC 2024
cloud-java-bot Dec 14, 2024
d03f9a1
Merge branch 'main' into cindy/vector-search-1
cindy-peng Jan 22, 2025
c31e66a
Merge branch 'main' into cindy/vector-search-1
cindy-peng Mar 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add system tests check details
  • Loading branch information
cindy-peng committed Dec 2, 2024
commit e2f8e8746598826e3877a7405bebe8c0710ceb63
2 changes: 1 addition & 1 deletion com/google/datastore/snippets/ConceptsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ private void setUpQueryTests() {
.set("tag", "fun", "l", "programming", "learn")
.set(
"vector_property",
VectorValue.newBuilder(3.0, 1.0, 2.0).setExcludeFromIndexes(true).build())
VectorValue.newBuilder(3.0, 1.0, 2.0).build())
.build());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,12 @@ public static VectorValue of(List<Value<Double>> values) {
/** Returns a builder for {@code VectorValue} objects. */
public static Builder newBuilder() {
Builder builder = new VectorValue.Builder();
builder.setExcludeFromIndexes(true);
builder.setMeaning(VECTOR_MEANING);
return builder;
}

public static Builder newBuilder(double first, double... other) {
VectorValue.Builder builder = new VectorValue.Builder();
builder.setExcludeFromIndexes(true);
builder.setMeaning(VECTOR_MEANING);
return builder.addValue(first, other);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ public class BaseEntityTest {
private static final VectorValue VECTOR =
VectorValue.newBuilder(1.78, 2.56, 3.88)
.setMeaning(VECTOR_MEANING)
.setExcludeFromIndexes(true)
.build();
private static final Key KEY = Key.newBuilder("ds1", "k1", "n1").build();
private static final Entity ENTITY = Entity.newBuilder(KEY).set("name", "foo").build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,7 @@ public void testType() {
@Test
public void testExcludeFromIndexes() {
for (Map.Entry<ValueType, Value<?>> entry : typeToValue.entrySet()) {
if (entry.getKey() == ValueType.VECTOR) {
assertTrue(entry.getValue().excludeFromIndexes());
} else {
assertFalse(entry.getValue().excludeFromIndexes());
}
assertFalse(entry.getValue().excludeFromIndexes());
}
TestBuilder builder = new TestBuilder();
assertFalse(builder.build().excludeFromIndexes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,10 @@ public void testOf() {
assertEquals(
ImmutableList.of(DoubleValue.of(0.3), DoubleValue.of(4.2), DoubleValue.of(3.7)),
value.get());
assertTrue(value.excludeFromIndexes());
assertEquals(31, value.getMeaning());
VectorValue value1 = VectorValue.of(vectorList);
assertEquals(vectorList, value1.get());
assertTrue(value1.excludeFromIndexes());
assertEquals(31, value1.getMeaning());
VectorValue vectorListValue = VectorValue.of(vectorList);
assertEquals(vectorList, vectorListValue.get());
assertEquals(31, vectorListValue.getMeaning());
}

@SuppressWarnings("deprecation")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ private void setUpQueryTests() {
StringValue.newBuilder("Learn Cloud Datastore").setExcludeFromIndexes(true).build())
.set("tag", "fun", "l", "programming", "learn")
.set(
"vector_property",
VectorValue.newBuilder(3.0, 1.0, 2.0).setExcludeFromIndexes(true).build())
"embedding_field",
VectorValue.newBuilder(3.0, 1.0, 2.0).build())
.build());
}

Expand Down Expand Up @@ -580,27 +580,28 @@ public void testVectorSearch() {
VectorValue vectorValue = VectorValue.newBuilder(1.78, 2.56, 3.88).build();
FindNearest vectorQuery =
new FindNearest(
"vector_property", vectorValue, FindNearest.DistanceMeasure.COSINE, 1, "distance");
"embedding_field", vectorValue, FindNearest.DistanceMeasure.COSINE, 1, "distance");

Query<Entity> query = Query.newEntityQueryBuilder().setFindNearest(vectorQuery).build();
Query<Entity> query = Query.newEntityQueryBuilder()
.setKind(TASK_CONCEPTS).setFindNearest(vectorQuery).build();
assertValidQuery(query);
}

@Test
public void testVectorSearchWithEmptyVector() {
VectorValue emptyVector = VectorValue.newBuilder().build();
FindNearest vectorQuery =
new FindNearest("vector_property", emptyVector, FindNearest.DistanceMeasure.EUCLIDEAN, 1);
Query<Entity> query = Query.newEntityQueryBuilder().setFindNearest(vectorQuery).build();
new FindNearest("embedding_field", emptyVector, FindNearest.DistanceMeasure.EUCLIDEAN, 1);
Query<Entity> query = Query.newEntityQueryBuilder().setKind(TASK_CONCEPTS).setFindNearest(vectorQuery).build();
assertInvalidQuery(query);
}

@Test
public void testVectorSearchWithUnmatchedVectorSize() {
VectorValue vectorValue = VectorValue.newBuilder(1.78, 2.56, 3.88, 4.33).build();
FindNearest vectorQuery =
new FindNearest("vector_property", vectorValue, FindNearest.DistanceMeasure.DOT_PRODUCT, 1);
Query<Entity> query = Query.newEntityQueryBuilder().setFindNearest(vectorQuery).build();
new FindNearest("embedding_field", vectorValue, FindNearest.DistanceMeasure.DOT_PRODUCT, 1);
Query<Entity> query = Query.newEntityQueryBuilder().setKind(TASK_CONCEPTS).setFindNearest(vectorQuery).build();
assertInvalidQuery(query);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,46 +32,12 @@

import com.google.cloud.Timestamp;
import com.google.cloud.Tuple;
import com.google.cloud.datastore.AggregationQuery;
import com.google.cloud.datastore.AggregationResult;
import com.google.cloud.datastore.AggregationResults;
import com.google.cloud.datastore.Batch;
import com.google.cloud.datastore.BooleanValue;
import com.google.cloud.datastore.Cursor;
import com.google.cloud.datastore.Datastore;
import com.google.cloud.datastore.*;
import com.google.cloud.datastore.Datastore.TransactionCallable;
import com.google.cloud.datastore.DatastoreException;
import com.google.cloud.datastore.DatastoreOptions;
import com.google.cloud.datastore.DatastoreReaderWriter;
import com.google.cloud.datastore.Entity;
import com.google.cloud.datastore.EntityQuery;
import com.google.cloud.datastore.EntityValue;
import com.google.cloud.datastore.FindNearest;
import com.google.cloud.datastore.FullEntity;
import com.google.cloud.datastore.GqlQuery;
import com.google.cloud.datastore.IncompleteKey;
import com.google.cloud.datastore.Key;
import com.google.cloud.datastore.KeyFactory;
import com.google.cloud.datastore.KeyValue;
import com.google.cloud.datastore.LatLng;
import com.google.cloud.datastore.LatLngValue;
import com.google.cloud.datastore.ListValue;
import com.google.cloud.datastore.NullValue;
import com.google.cloud.datastore.PathElement;
import com.google.cloud.datastore.ProjectionEntity;
import com.google.cloud.datastore.Query;
import com.google.cloud.datastore.Query.ResultType;
import com.google.cloud.datastore.QueryResults;
import com.google.cloud.datastore.ReadOption;
import com.google.cloud.datastore.StringValue;
import com.google.cloud.datastore.StructuredQuery;
import com.google.cloud.datastore.StructuredQuery.CompositeFilter;
import com.google.cloud.datastore.StructuredQuery.OrderBy;
import com.google.cloud.datastore.StructuredQuery.PropertyFilter;
import com.google.cloud.datastore.TimestampValue;
import com.google.cloud.datastore.Transaction;
import com.google.cloud.datastore.ValueType;
import com.google.cloud.datastore.VectorValue;
import com.google.cloud.datastore.models.ExecutionStats;
import com.google.cloud.datastore.models.ExplainMetrics;
import com.google.cloud.datastore.models.ExplainOptions;
Expand Down Expand Up @@ -127,6 +93,7 @@ public class ITDatastoreTest {
private static final String KIND1 = "kind1";
private static final String KIND2 = "kind2";
private static final String KIND3 = "kind3";
private static final String VECTOR_KIND = "CoffeeBean";
private static final NullValue NULL_VALUE = NullValue.of();
private static final StringValue STR_VALUE = StringValue.of("str");
private static final BooleanValue BOOL_VALUE =
Expand All @@ -147,6 +114,7 @@ public class ITDatastoreTest {
private static Key KEY4;
private static Key KEY5;
private static Key KEY6;
private static Key VECTORKEY;
private static final String MARKS_KIND = "Marks";
private static FullEntity<IncompleteKey> PARTIAL_ENTITY1;
private static FullEntity<IncompleteKey> PARTIAL_ENTITY2;
Expand All @@ -158,6 +126,9 @@ public class ITDatastoreTest {
private static Entity AGGREGATION_ENTITY_1;
private static Entity AGGREGATION_ENTITY_2;
private static Entity AGGREGATION_ENTITY_3;
private static Entity VECTOR_ENTITY_1;
private static Entity VECTOR_ENTITY_2;
private static Entity VECTOR_ENTITY_3;

@Rule public Timeout globalTimeout = Timeout.seconds(100);

Expand All @@ -178,6 +149,7 @@ public ITDatastoreTest(

PROJECT_ID = this.options.getProjectId();
NAMESPACE = this.options.getNamespace();
System.out.println("Project: " + PROJECT_ID + ", Namespace: " + NAMESPACE + ", db: " + options.getDatabaseId());

ROOT_KEY =
Key.newBuilder(PROJECT_ID, "rootkey", "default", options.getDatabaseId())
Expand All @@ -200,6 +172,9 @@ public ITDatastoreTest(
Key.newBuilder(options.getProjectId(), KIND2, 100, options.getDatabaseId())
.setNamespace(NAMESPACE)
.build();
VECTORKEY = Key.newBuilder(PROJECT_ID, VECTOR_KIND, "bean1", options.getDatabaseId())
.setNamespace(NAMESPACE)
.build();

LIST_VALUE2 = ListValue.of(Collections.singletonList(KeyValue.of(KEY1)));

Expand Down Expand Up @@ -249,6 +224,29 @@ public ITDatastoreTest(
.set("partial1", PARTIAL_ENTITY2)
.set("partial2", ENTITY2)
.build();
VECTOR_ENTITY_1 =
Entity.newBuilder(VECTORKEY)
.set("name", "Arabica")
.set(
"embedding_field",
VectorValue.newBuilder(1.0, 7.0, 11.1).build())
.build();
VECTOR_ENTITY_2 =
Entity.newBuilder(
Key.newBuilder(VECTORKEY).setName("bean2").build())
.set("name", "Robusta")
.set(
"embedding_field",
VectorValue.newBuilder(1.0, 9.0, 11.1).build()).set("vector_distance", 0)
.build();
VECTOR_ENTITY_3 =
Entity.newBuilder(
Key.newBuilder(VECTORKEY).setName("bean3").build())
.set("name", "Excelsa")
.set(
"embedding_field",
VectorValue.newBuilder(4.0, 9.0, 11.1).build())
.build();

Key aggregationKey1 = datastore.newKeyFactory().setKind(MARKS_KIND).newKey(1);
Key aggregationKey2 = datastore.newKeyFactory().setKind(MARKS_KIND).newKey(2);
Expand Down Expand Up @@ -291,7 +289,7 @@ public void tearDown() {
@Parameterized.Parameters(name = "database: {2}")
public static Iterable<Object[]> data() {
return Arrays.asList(
new Object[][] {{OPTIONS_1, DATASTORE_1, "default"}, {OPTIONS_2, DATASTORE_2, "test-db"}});
new Object[][] {{OPTIONS_1, DATASTORE_1, "default"}, {OPTIONS_2, DATASTORE_2, "test-db"}});
}

private <T> Iterator<T> getStronglyConsistentResults(Query scQuery, Query query)
Expand Down Expand Up @@ -2061,7 +2059,7 @@ public Integer run(DatastoreReaderWriter transaction) {
public void testSkippedResults() {
Query<Key> query = Query.newKeyQueryBuilder().setOffset(Integer.MAX_VALUE).build();
int numberOfEntities = datastore.run(query).getSkippedResults();
assertEquals(2, numberOfEntities);
assertEquals(5, numberOfEntities);
}

@Test
Expand Down Expand Up @@ -2118,61 +2116,77 @@ public void testQueryWithStartCursor() {
}

@Test
public void testQueryWithVectorSearch() {
Entity entity1 =
Entity.newBuilder(
Key.newBuilder(PROJECT_ID, KIND1, "name-01", options.getDatabaseId()).build())
.set(
"vector_property",
VectorValue.newBuilder(3.0, 9.0, 11.1).setExcludeFromIndexes(true).build())
.build();
Entity entity2 =
Entity.newBuilder(
Key.newBuilder(PROJECT_ID, KIND1, "name-02", options.getDatabaseId()).build())
.set(
"vector_property",
VectorValue.newBuilder(2.8, 2.56, 3.8).setExcludeFromIndexes(true).build())
.build();
Entity entity3 =
Entity.newBuilder(
Key.newBuilder(PROJECT_ID, KIND1, "name-03", options.getDatabaseId()).build())
.set(
"vector_property",
VectorValue.newBuilder(2.8, 2.56, 3.88).setExcludeFromIndexes(true).build())
.build();
datastore.put(entity1, entity2, entity3);
public void testVectorSearchQueryWithLimit() {
datastore.put(VECTOR_ENTITY_1, VECTOR_ENTITY_2, VECTOR_ENTITY_3);
// Test FindNearest query with limit
FindNearest findNearestQueryWithLimit =
new FindNearest(
"embedding_field",
VectorValue.newBuilder(1, 9, 11.1).build(),
FindNearest.DistanceMeasure.DOT_PRODUCT,
3);
Query<Entity> queryWithLimit =
Query.newEntityQueryBuilder().setKind(VECTOR_KIND).setFindNearest(findNearestQueryWithLimit).build();

QueryResults<Entity> resultWithLimit = datastore.run(queryWithLimit);

List<Entity> resultsCopyWithLimit = makeResultsCopy(resultWithLimit);

// Verify limit was applied
assertEquals(3, resultsCopyWithLimit.size());
}

// Query to find the nearest 2 neighbors with COSINE distance
FindNearest findNearestQuery =
new FindNearest(
"vector_property",
VectorValue.newBuilder(1.78, 2.56, 3.88).build(),
FindNearest.DistanceMeasure.COSINE,
2,
"distance");
Query<Entity> queryWithVectorSearch =
Query.newEntityQueryBuilder().setKind(KIND1).setFindNearest(findNearestQuery).build();
QueryResults<Entity> vectorSearchResult = datastore.run(queryWithVectorSearch);
List<Entity> resultsCopy = makeResultsCopy(vectorSearchResult);
// Should return nearest 2 neighbors
assertEquals(2, resultsCopy.size());

// Query to find the nearest neighbor with EUCLIDEAN distance
FindNearest findNearestWithLimit1 =
new FindNearest(
"vector_property",
VectorValue.newBuilder(2.8, 2.56, 3.88).build(),
FindNearest.DistanceMeasure.EUCLIDEAN,
1,
"distance");
Query<Entity> vectorQueryWithLimit1 =
Query.newEntityQueryBuilder().setKind(KIND1).setFindNearest(findNearestWithLimit1).build();
QueryResults<Entity> resultsWithVectorLimit1 = datastore.run(vectorQueryWithLimit1);
assertTrue(resultsWithVectorLimit1.hasNext());
// entity3 should be the nearest neighbor
assertEquals(entity3, resultsWithVectorLimit1.next());
assertFalse(resultsWithVectorLimit1.hasNext());
datastore.delete(entity1.getKey(), entity2.getKey(), entity3.getKey());
@Test
public void testVectorSearchQueryWithDistanceThreshold() {
datastore.put(VECTOR_ENTITY_1, VECTOR_ENTITY_2, VECTOR_ENTITY_3);

VectorValue vectorValue = VectorValue.newBuilder(1.78, 2.56, 3.88).build();
FindNearest vectorQuery =
new FindNearest(
"embedding_field", vectorValue, FindNearest.DistanceMeasure.COSINE, 1, "distance");

Query<Entity> query = Query.newEntityQueryBuilder().setFindNearest(vectorQuery).build();


// Test FindNearest query with distanceThreshold
FindNearest findNearestQueryWithThreshold =
new FindNearest(
"embedding_field",
VectorValue.newBuilder(1, 9, 11.1).build(),
FindNearest.DistanceMeasure.EUCLIDEAN,
3, "vector_distance", 2.0);
Query<Entity> queryWithWithThreshold =
Query.newEntityQueryBuilder().setKind(VECTOR_KIND).setFindNearest(findNearestQueryWithThreshold).build();
QueryResults<Entity> resultWithThreshold = datastore.run(queryWithWithThreshold);
List<Entity> resultsCopyWithThreshold = makeResultsCopy(resultWithThreshold);
// Verify threshold was applied regardless of limit
assertEquals(2, resultsCopyWithThreshold.size());
// Verify qualified EUCLIDEAN distance: d((1, 9, 11.1), (1, 9, 11.1)) = 0.0, d((1, 9, 11.1), (1, 7, 11.1)) = 2.0
assertEquals(DoubleValue.of(0.0), resultsCopyWithThreshold.get(0).getValue("vector_distance"));
assertEquals(DoubleValue.of(2.0), resultsCopyWithThreshold.get(1).getValue("vector_distance"));
}

@Test
public void testQueryWithVectorSearchWithDistanceField() {
datastore.put(VECTOR_ENTITY_1, VECTOR_ENTITY_2, VECTOR_ENTITY_3);
// Test FindNearest query with distanceField
FindNearest findNearestQueryWithDistanceField =
new FindNearest(
"embedding_field",
VectorValue.newBuilder(1, 9, 11.1).build(),
FindNearest.DistanceMeasure.DOT_PRODUCT,
3, "vector_distance", 0.0);
Query<Entity> queryWithWithDistanceField =
Query.newEntityQueryBuilder().setKind(VECTOR_KIND).setFindNearest(findNearestQueryWithDistanceField).build();
QueryResults<Entity> resultWithDistanceField = datastore.run(queryWithWithDistanceField);
List<Entity> resultsCopyWithDistanceField = makeResultsCopy(resultWithDistanceField);
// Verify results count
assertEquals(3, resultsCopyWithDistanceField.size());
for (int i = 0; i < resultsCopyWithDistanceField.size(); i++)
{
// Verify distance field was not 0
assertNotEquals(DoubleValue.of(0.0), resultsCopyWithDistanceField.get(i).getValue("vector_distance"));
}
}

@Test
Expand Down Expand Up @@ -2224,7 +2238,7 @@ public void testQueryWithReadTime() throws InterruptedException {
assertEquals(entity2, withReadTime.next());
assertFalse(withReadTime.hasNext());
} finally {
datastore.delete(entity1.getKey(), entity2.getKey(), entity3.getKey());
// datastore.delete(entity1.getKey(), entity2.getKey(), entity3.getKey());
}
}

Expand Down
Loading
Loading