Skip to content
This repository was archived by the owner on Sep 9, 2023. It is now read-only.

feat(samples): add all feature values samples #981

Merged
merged 8 commits into from
Aug 3, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions samples/install-without-bom/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
<version>1.1.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<version>2.13.6</version>
</dependency>
</dependencies>

<!-- compile and run all snippet tests -->
Expand Down
5 changes: 5 additions & 0 deletions samples/snapshot/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@
<version>1.1.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<version>2.13.6</version>
</dependency>
</dependencies>

<!-- compile and run all snippet tests -->
Expand Down
6 changes: 5 additions & 1 deletion samples/snippets/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@
<artifactId>proto-google-cloud-aiplatform-v1beta1</artifactId>
<version>0.15.7</version>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<version>2.13.6</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Create features in bulk for an existing entity type. See
* https://cloud.google.com/vertex-ai/docs/featurestore/setup
* before running the code snippet
*/

package aiplatform;

// [START aiplatform_batch_create_features_sample]

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.BatchCreateFeaturesOperationMetadata;
import com.google.cloud.aiplatform.v1.BatchCreateFeaturesRequest;
import com.google.cloud.aiplatform.v1.BatchCreateFeaturesResponse;
import com.google.cloud.aiplatform.v1.CreateFeatureRequest;
import com.google.cloud.aiplatform.v1.EntityTypeName;
import com.google.cloud.aiplatform.v1.Feature;
import com.google.cloud.aiplatform.v1.Feature.ValueType;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceClient;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceSettings;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class BatchCreateFeaturesSample {

public static void main(String[] args)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// TODO(developer): Replace these variables before running the sample.
String project = "YOUR_PROJECT_ID";
String featurestoreId = "YOUR_FEATURESTORE_ID";
String entityTypeId = "YOUR_ENTITY_TYPE_ID";
String location = "us-central1";
String endpoint = "us-central1-aiplatform.googleapis.com:443";
int timeout = 300;
batchCreateFeaturesSample(project, featurestoreId, entityTypeId, location, endpoint, timeout);
}

static void batchCreateFeaturesSample(String project, String featurestoreId, String entityTypeId,
String location, String endpoint, int timeout)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
FeaturestoreServiceSettings featurestoreServiceSettings =
FeaturestoreServiceSettings.newBuilder().setEndpoint(endpoint).build();

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (FeaturestoreServiceClient featurestoreServiceClient =
FeaturestoreServiceClient.create(featurestoreServiceSettings)) {

List<CreateFeatureRequest> createFeatureRequests = new ArrayList<>();

Feature titleFeature = Feature.newBuilder().setDescription("The title of the movie")
.setValueType(ValueType.valueOf("STRING")).build();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the ENUM directly here and in other places as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, will update to ENUM type everywhere as below,

Suggested change
.setValueType(ValueType.valueOf("STRING")).build();
.setValueType(ValueType.STRING).build();

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Feature genresFeature = Feature.newBuilder().setDescription("The genres of the movie")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: change to singular genre, to me genres represents an array of genres.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the same features as https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/gapic-feature-store.ipynb sample tutorial provided in SOW.
So thought it is ok, should we change it here?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, let's retain it for now. It was a minor nit.

.setValueType(ValueType.valueOf("STRING")).build();
Feature averageRatingFeature = Feature.newBuilder()
.setDescription("The average rating for the movie, range is [1.0-5.0]")
.setValueType(ValueType.valueOf("DOUBLE")).build();

createFeatureRequests.add(
CreateFeatureRequest.newBuilder().setFeature(titleFeature).setFeatureId("title").build());

createFeatureRequests.add(CreateFeatureRequest.newBuilder().setFeature(genresFeature)
.setFeatureId("genres").build());

createFeatureRequests.add(CreateFeatureRequest.newBuilder().setFeature(averageRatingFeature)
.setFeatureId("average_rating").build());

BatchCreateFeaturesRequest request = BatchCreateFeaturesRequest.newBuilder()
.setParent(EntityTypeName.of(project, location, featurestoreId, entityTypeId).toString())
.addAllRequests(createFeatureRequests).build();

OperationFuture<BatchCreateFeaturesResponse, BatchCreateFeaturesOperationMetadata> future =
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Name the operationFutures consistently across the files, maybe use batchCreateFeaturesOperationFuture or just future, but use same across.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, will use same across

featurestoreServiceClient.batchCreateFeaturesAsync(request);
System.out.format("Operation name: %s%n", future.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
BatchCreateFeaturesResponse batchCreateFeaturesResponse =
future.get(timeout, TimeUnit.SECONDS);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to confirm, is the behavior here to wait the 300 sec and timeout otherwise. If there is a response available, it's returned.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, if the response is not received in 300 sec it will throw timeout

System.out.println("Batch Create Features Response");
System.out.println(batchCreateFeaturesResponse);
}
}
}
// [END aiplatform_batch_create_features_sample]

Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Batch read feature values from a featurestore, as determined by your
* read instances list file, to export data. See
* https://cloud.google.com/vertex-ai/docs/featurestore/setup before running
* the code snippet
*/

package aiplatform;

// [START aiplatform_batch_read_feature_values_sample]

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.BatchReadFeatureValuesOperationMetadata;
import com.google.cloud.aiplatform.v1.BatchReadFeatureValuesRequest;
import com.google.cloud.aiplatform.v1.BatchReadFeatureValuesRequest.EntityTypeSpec;
import com.google.cloud.aiplatform.v1.BatchReadFeatureValuesResponse;
import com.google.cloud.aiplatform.v1.BigQueryDestination;
import com.google.cloud.aiplatform.v1.CsvSource;
import com.google.cloud.aiplatform.v1.FeatureSelector;
import com.google.cloud.aiplatform.v1.FeatureValueDestination;
import com.google.cloud.aiplatform.v1.FeaturestoreName;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceClient;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceSettings;
import com.google.cloud.aiplatform.v1.GcsSource;
import com.google.cloud.aiplatform.v1.IdMatcher;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class BatchReadFeatureValuesSample {

public static void main(String[] args)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// TODO(developer): Replace these variables before running the sample.
String project = "YOUR_PROJECT_ID";
String featurestoreId = "YOUR_FEATURESTORE_ID";
String entityTypeId = "YOUR_ENTITY_TYPE_ID";
String inputCsvFile = "YOU_INPUT_CSV_FILE";
String destinationTableUri = "YOUR_DESTINATION_TABLE_URI";
String location = "us-central1";
String endpoint = "us-central1-aiplatform.googleapis.com:443";
int timeout = 300;
batchReadFeatureValuesSample(project, featurestoreId, entityTypeId, inputCsvFile,
destinationTableUri, location, endpoint, timeout);
}

static void batchReadFeatureValuesSample(String project, String featurestoreId,
String entityTypeId, String inputCsvFile, String destinationTableUri, String location,
String endpoint, int timeout)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
FeaturestoreServiceSettings featurestoreServiceSettings =
FeaturestoreServiceSettings.newBuilder().setEndpoint(endpoint).build();

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (FeaturestoreServiceClient featurestoreServiceClient =
FeaturestoreServiceClient.create(featurestoreServiceSettings)) {

List<EntityTypeSpec> entityTypeSpecs = new ArrayList<>();

List<String> ids = new ArrayList<>();
ids.add("*");
FeatureSelector featureSelector = FeatureSelector.newBuilder()
.setIdMatcher(IdMatcher.newBuilder().addAllIds(ids).build()).build();
EntityTypeSpec entityTypeSpec = EntityTypeSpec.newBuilder().setEntityTypeId(entityTypeId)
.setFeatureSelector(featureSelector).build();
entityTypeSpecs.add(entityTypeSpec);

BigQueryDestination bigQueryDestination =
BigQueryDestination.newBuilder().setOutputUri(destinationTableUri).build();
GcsSource gcsSource = GcsSource.newBuilder().addUris(inputCsvFile).build();
BatchReadFeatureValuesRequest batchReadFeatureValuesRequest =
BatchReadFeatureValuesRequest.newBuilder()
.setFeaturestore(FeaturestoreName.of(project, location, featurestoreId).toString())
.setCsvReadInstances(CsvSource.newBuilder().setGcsSource(gcsSource))
.setDestination(
FeatureValueDestination.newBuilder().setBigqueryDestination(bigQueryDestination))
// .addAllPassThroughFields(passThroughFieldsList)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we include this as well, if possible ?

Copy link
Contributor Author

@sai-chaithu sai-chaithu Jul 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removing that as there are no passThroughFields for the movie prediction example.

.addAllEntityTypeSpecs(entityTypeSpecs).build();

OperationFuture<BatchReadFeatureValuesResponse, BatchReadFeatureValuesOperationMetadata> brf =
featurestoreServiceClient.batchReadFeatureValuesAsync(batchReadFeatureValuesRequest);
System.out.format("Operation name: %s%n", brf.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
BatchReadFeatureValuesResponse batchReadFeatureValuesResponse =
brf.get(timeout, TimeUnit.SECONDS);
System.out.println("Batch Read Feature Values Response");
System.out.println(batchReadFeatureValuesResponse);
}
}
}
// [END aiplatform_batch_read_feature_values_sample]

Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Bulk export feature values from a featurestore. See
* https://cloud.google.com/vertex-ai/docs/featurestore/setup before running
* the code snippet
*/

package aiplatform;

// [START aiplatform_export_feature_values_sample]

import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.aiplatform.v1.BigQueryDestination;
import com.google.cloud.aiplatform.v1.EntityTypeName;
import com.google.cloud.aiplatform.v1.ExportFeatureValuesOperationMetadata;
import com.google.cloud.aiplatform.v1.ExportFeatureValuesRequest;
import com.google.cloud.aiplatform.v1.ExportFeatureValuesRequest.FullExport;
import com.google.cloud.aiplatform.v1.ExportFeatureValuesResponse;
import com.google.cloud.aiplatform.v1.FeatureSelector;
import com.google.cloud.aiplatform.v1.FeatureValueDestination;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceClient;
import com.google.cloud.aiplatform.v1.FeaturestoreServiceSettings;
import com.google.cloud.aiplatform.v1.IdMatcher;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

public class ExportFeatureValuesSample {

public static void main(String[] args)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
// TODO(developer): Replace these variables before running the sample.
String project = "YOUR_PROJECT_ID";
String featurestoreId = "YOUR_FEATURESTORE_ID";
String entityTypeId = "YOUR_ENTITY_TYPE_ID";
String destinationTableUri = "YOUR_DESTINATION_TABLE_URI";
String location = "us-central1";
String endpoint = "us-central1-aiplatform.googleapis.com:443";
int timeout = 300;
exportFeatureValuesSample(project, featurestoreId, entityTypeId, destinationTableUri, location,
endpoint, timeout);
}

static void exportFeatureValuesSample(String project, String featurestoreId, String entityTypeId,
String destinationTableUri, String location, String endpoint, int timeout)
throws IOException, InterruptedException, ExecutionException, TimeoutException {
FeaturestoreServiceSettings featurestoreServiceSettings =
FeaturestoreServiceSettings.newBuilder().setEndpoint(endpoint).build();

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (FeaturestoreServiceClient featurestoreServiceClient =
FeaturestoreServiceClient.create(featurestoreServiceSettings)) {

List<String> ids = new ArrayList<>();
ids.add("*");
FeatureSelector featureSelector = FeatureSelector.newBuilder()
.setIdMatcher(IdMatcher.newBuilder().addAllIds(ids).build()).build();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: inline something like addAllIds(ArrayList.of()...), or make a class variable if intended for developer to update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, will add inline as below,

Suggested change
.setIdMatcher(IdMatcher.newBuilder().addAllIds(ids).build()).build();
.setIdMatcher(IdMatcher.newBuilder().addAllIds(Arrays.asList("title","genres","average_rating")).build()).build();

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel it's better to make it one of the variables the developer has to update and pass it down here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated code to pass the feature selector ids as a list object


ExportFeatureValuesRequest exportFeatureValuesRequest =
ExportFeatureValuesRequest.newBuilder()
.setEntityType(
EntityTypeName.of(project, location, featurestoreId, entityTypeId).toString())
.setDestination(FeatureValueDestination.newBuilder().setBigqueryDestination(
BigQueryDestination.newBuilder().setOutputUri(destinationTableUri)))
.setFeatureSelector(featureSelector).setFullExport(FullExport.newBuilder()).build();

OperationFuture<ExportFeatureValuesResponse, ExportFeatureValuesOperationMetadata> future =
featurestoreServiceClient.exportFeatureValuesAsync(exportFeatureValuesRequest);
System.out.format("Operation name: %s%n",
future.getInitialFuture().get().getName());
System.out.println("Waiting for operation to finish...");
ExportFeatureValuesResponse exportFeatureValuesResponse =
future.get(timeout, TimeUnit.SECONDS);
System.out.println("Export Feature Values Response");
System.out.println(exportFeatureValuesResponse);
}
}
}
// [END aiplatform_export_feature_values_sample]

Loading