Skip to content

Commit 5635da4

Browse files
committed
cleanup, add example training package dist, changed container URI
1 parent f51e2a6 commit 5635da4

File tree

8 files changed

+174
-221
lines changed

8 files changed

+174
-221
lines changed

ml/kubeflow-pipelines/keras_tuner/components/ucaip/model_deploy_component.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ inputs:
1010
- {name: timeout, type: Integer, default: '7200', optional: true}
1111
implementation:
1212
container:
13-
image: gcr.io/aju-vtests2/bw-aiplatform:v1
13+
image: gcr.io/google-samples/bw-aiplatform:v1
1414
command:
1515
- sh
1616
- -ec
@@ -87,7 +87,6 @@ implementation:
8787
# project=project, location=location, endpoint=endpoint_id
8888
# )
8989
response = client.deploy_model(
90-
# endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
9190
endpoint=endpoint_path, deployed_model=deployed_model, traffic_split=traffic_split
9291
)
9392
logging.info("Long running operation: %s", response.operation.name)

ml/kubeflow-pipelines/keras_tuner/components/ucaip/model_train_component.yaml

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ outputs:
1919
- {name: model_dispname, type: String}
2020
implementation:
2121
container:
22-
image: gcr.io/aju-vtests2/bw-aiplatform:v1
22+
image: gcr.io/google-samples/bw-aiplatform:v1
2323
command:
2424
- sh
2525
- -ec
@@ -43,8 +43,6 @@ implementation:
4343
api_endpoint, # "us-central1-aiplatform.googleapis.com",
4444
data_dir,
4545
hptune_dict,
46-
# model_id: OutputPath('String'),
47-
# model_dispname: OutputPath('String')
4846
):
4947
5048
import logging
@@ -97,6 +95,9 @@ implementation:
9795
"replica_count": 1,
9896
"container_spec": container_spec,
9997
}
98+
else:
99+
logging.warning('unknown train_container_type; exiting')
100+
exit(1)
100101
101102
training_task_inputs_dict = {
102103
"workerPoolSpecs": [
@@ -110,8 +111,6 @@ implementation:
110111
training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())
111112
112113
training_task_definition = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml"
113-
# image_uri = "gcr.io/cloud-aiplatform/prediction/tf-cpu.1-15:latest"
114-
# image_uri = 'us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest'
115114
116115
training_pipeline = {
117116
"display_name": display_name,
@@ -139,21 +138,11 @@ implementation:
139138
logging.info('job state: %s', mresponse.state)
140139
if mresponse.state == pipeline_state.PipelineState.PIPELINE_STATE_FAILED:
141140
logging.warning('training pipeline failed: %s', mresponse)
142-
break
141+
exit(1)
143142
if mresponse.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED:
144143
logging.info('training finished')
145144
model_name = mresponse.model_to_upload.name
146145
return (model_name, model_display_name)
147-
# # write some outputs once finished
148-
# model_name = mresponse.model_to_upload.name
149-
# logging.info('got model name: %s', model_name)
150-
# with open('temp.txt', "w") as outfile:
151-
# outfile.write(model_name)
152-
# subprocess.run(['gsutil', 'cp', 'temp.txt', model_id])
153-
# with open('temp2.txt', "w") as outfile:
154-
# outfile.write(model_display_name)
155-
# subprocess.run(['gsutil', 'cp', 'temp2.txt', model_dispname])
156-
# break
157146
else:
158147
time.sleep(SLEEP_INTERVAL)
159148

ml/kubeflow-pipelines/keras_tuner/components/ucaip/model_upload_component.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ outputs:
1111
- {name: model_id, type: String}
1212
implementation:
1313
container:
14-
image: gcr.io/aju-vtests2/bw-aiplatform:v1
14+
image: gcr.io/google-samples/bw-aiplatform:v1
1515
command:
1616
- sh
1717
- -ec
@@ -28,7 +28,6 @@ implementation:
2828
location, # "us-central1",
2929
api_endpoint, #"us-central1-aiplatform.googleapis.com",
3030
timeout, # 1800,
31-
# model_id: OutputPath('String')
3231
):
3332
3433
import logging

ml/kubeflow-pipelines/keras_tuner/components/ucaip/serving/deploy_model.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,18 +80,16 @@ def create_endpoint(
8080
# project=project, location=location, endpoint=endpoint_id
8181
# )
8282
response = client.deploy_model(
83-
# endpoint=endpoint, deployed_model=deployed_model, traffic_split=traffic_split
8483
endpoint=endpoint_path, deployed_model=deployed_model, traffic_split=traffic_split
8584
)
8685
logging.info("Long running operation: %s", response.operation.name)
8786
deploy_model_response = response.result(timeout=timeout)
8887
logging.info("deploy_model_response: %s", deploy_model_response)
88+
# TODO: output status info in some form
8989

9090

9191
if __name__ == '__main__':
92-
# deploy_model('aju-vtests2', 'endpoint_test2',
93-
# 'projects/467744782358/locations/us-central1/models/6181278449496227840', 'sdk_test1')
9492
import kfp
9593
kfp.components.func_to_container_op(deploy_model,
9694
output_component_file='../model_deploy_component.yaml',
97-
base_image='gcr.io/aju-vtests2/bw-aiplatform:v1')
95+
base_image='gcr.io/google-samples/bw-aiplatform:v1')

ml/kubeflow-pipelines/keras_tuner/components/ucaip/serving/model_upload.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def upload_model(
2424
location: str, # "us-central1",
2525
api_endpoint: str, #"us-central1-aiplatform.googleapis.com",
2626
timeout: int, # 1800,
27-
# model_id: OutputPath('String')
2827
) -> NamedTuple('Outputs', [('model_id', str)]):
2928

3029
import logging
@@ -64,21 +63,14 @@ def upload_model(
6463
logging.info("upload_model_response: %s", upload_model_response)
6564
model_path = upload_model_response.model
6665
return (model_path, )
67-
# logging.info('got model path: %s', model_path)
68-
# with open('temp.txt', "w") as outfile:
69-
# outfile.write(model_path)
70-
# subprocess.run(['gsutil', 'cp', 'temp.txt', model_id])
7166

7267

7368

7469
if __name__ == '__main__':
75-
# upload_model('aju-vtests2', display_name='sdk_test1', metadata_schema_uri="",
76-
# image_uri='us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest',
77-
# artifact_uri='gs://aju-pipelines/v64/077ae97e-9c6d-4c1c-b5a1-fc2e95fb7dbb/0/bwmodel/trained_model/export/bikesw/1615937808')
7870
import kfp
7971
kfp.components.func_to_container_op(upload_model,
8072
output_component_file='../model_upload_component.yaml',
81-
base_image='gcr.io/aju-vtests2/bw-aiplatform:v1')
73+
base_image='gcr.io/google-samples/bw-aiplatform:v1')
8274

8375

8476
# gcloud beta ai models upload --region=us-central1 --display-name=bw2 --container-image-uri=us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest --artifact-uri=gs://aju-pipelines/ktune13/f8515c75-32b7-47a4-af70-5ff24362eccc/0/bwmodel/trained_model/export/bikesw/1603733739

ml/kubeflow-pipelines/keras_tuner/components/ucaip/training/create_training_job.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ def create_training_pipeline_custom_job(
3232
api_endpoint: str, # "us-central1-aiplatform.googleapis.com",
3333
data_dir: str,
3434
hptune_dict: str,
35-
# model_id: OutputPath('String'),
36-
# model_dispname: OutputPath('String')
3735
) -> NamedTuple('Outputs', [('model_id', str), ('model_dispname', str)]):
3836

3937
import logging
@@ -102,8 +100,6 @@ def create_training_pipeline_custom_job(
102100
training_task_inputs = json_format.ParseDict(training_task_inputs_dict, Value())
103101

104102
training_task_definition = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml"
105-
# image_uri = "gcr.io/cloud-aiplatform/prediction/tf-cpu.1-15:latest"
106-
# image_uri = 'us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest'
107103

108104
training_pipeline = {
109105
"display_name": display_name,
@@ -136,31 +132,11 @@ def create_training_pipeline_custom_job(
136132
logging.info('training finished')
137133
model_name = mresponse.model_to_upload.name
138134
return (model_name, model_display_name)
139-
# # write some outputs once finished
140-
# model_name = mresponse.model_to_upload.name
141-
# logging.info('got model name: %s', model_name)
142-
# with open('temp.txt', "w") as outfile:
143-
# outfile.write(model_name)
144-
# subprocess.run(['gsutil', 'cp', 'temp.txt', model_id])
145-
# with open('temp2.txt', "w") as outfile:
146-
# outfile.write(model_display_name)
147-
# subprocess.run(['gsutil', 'cp', 'temp2.txt', model_dispname])
148-
# break
149135
else:
150136
time.sleep(SLEEP_INTERVAL)
151137

152138
if __name__ == '__main__':
153-
# create_training_pipeline_custom_job(
154-
# 'aju-vtests2', 'bw_sdktest2',
155-
# 'bw_sdktest2',
156-
# 'us-docker.pkg.dev/cloud-aiplatform/training/tf-gpu.2-3:latest',
157-
# 'gs://aju-pipelines/ucaip/training1/bw-trainer-0.1.tar.gz',
158-
# 'trainer.task',
159-
# 'gs://aju-pipelines/ucaip/test1803_sdk1',
160-
# "us-central1",
161-
# "us-central1-aiplatform.googleapis.com",
162-
# )
163139
import kfp
164140
kfp.components.func_to_container_op(create_training_pipeline_custom_job,
165141
output_component_file='../model_train_component.yaml',
166-
base_image='gcr.io/aju-vtests2/bw-aiplatform:v1')
142+
base_image='gcr.io/google-samples/bw-aiplatform:v1')
Binary file not shown.

0 commit comments

Comments
 (0)