Squashed commit of the following:

commit 493ed3e Author: j-so <[email protected]> Date: Mon Jun 22 16:42:07 2020 -0700 mark as output commit 1ca7a59 Author: j-so <[email protected]> Date: Mon Jun 22 16:12:10 2020 -0700 fix import commit 743e301 Author: j-so <[email protected]> Date: Mon Jun 22 15:59:43 2020 -0700 more fixes commit 44abcac Author: j-so <[email protected]> Date: Mon Jun 22 15:50:49 2020 -0700 fix batch scoring
microsoft · j-so · Jun 26, 2020 · May 13, 2020 · May 14, 2020 · May 14, 2020
commit a95183b22a8e424926bd816e5df8027ae5340b85
diff --git a/.pipelines/diabetes_regression-batchscoring-ci.yml b/.pipelines/diabetes_regression-batchscoring-ci.yml
@@ -65,20 +65,22 @@ stages:
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           # Invoke the Python building and publishing a training pipeline
           python -m ml_service.pipelines.diabetes_regression_build_parallel_batchscore_pipeline
- 
+
   - job: "Run_Batch_Score_Pipeline"
     displayName: "Run Batch Scoring Pipeline"
-    dependsOn: "Build_Batch_Scoring_Pipeline"
+    dependsOn: ["Build_Batch_Scoring_Pipeline"]
     timeoutInMinutes: 240
     pool: server
     variables:
       pipeline_id: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['publish_batchscore.pipeline_id']]
+      model_name: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_NAME']]
+      model_version: $[ dependencies.Build_Batch_Scoring_Pipeline.outputs['get_model.MODEL_VERSION']]
     steps:
     - task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
       displayName: 'Invoke Batch Scoring pipeline'
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
         PipelineId: '$(pipeline_id)'
         ExperimentName: '$(EXPERIMENT_NAME)'
-        PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "model_version": "$(MODEL_VERSION)"}'
+        PipelineParameters: '"ParameterAssignments": {"model_name": "$(model_name)", "model_version": "$(model_version)"}'
 
diff --git a/.pipelines/diabetes_regression-get-model-id-artifact-template.yml b/.pipelines/diabetes_regression-get-model-id-artifact-template.yml
@@ -1,3 +1,5 @@
+# Pipeline template that gets the model name and version from a previous build's artifact
+
 parameters:
 - name: projectId
   type: string
@@ -26,6 +28,7 @@ steps:
       runBranch: '$(Build.SourceBranch)'
       path: $(Build.SourcesDirectory)/bin
   - task: Bash@3
+    name: get_model
     displayName: Parse Json for Model Name and Version
     inputs:
       targetType: 'inline'
@@ -41,5 +44,5 @@ steps:
         echo "Model Version: $MODEL_VERSION"
 
         # Set environment variables
-        echo "##vso[task.setvariable variable=MODEL_VERSION]$MODEL_VERSION"
-        echo "##vso[task.setvariable variable=MODEL_NAME]$MODEL_NAME"
+        echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$MODEL_VERSION"
+        echo "##vso[task.setvariable variable=MODEL_NAME;isOutput=true]$MODEL_NAME"
diff --git a/diabetes_regression/evaluate/evaluate_model.py b/diabetes_regression/evaluate/evaluate_model.py
@@ -26,7 +26,7 @@
 from azureml.core import Run
 import argparse
 import traceback
-from util.model_helper import get_latest_model
+from util.model_helper import get_model
 
 run = Run.get_context()
 
@@ -45,7 +45,7 @@
 #         sources_dir = 'diabetes_regression'
 #     path_to_util = os.path.join(".", sources_dir, "util")
 #     sys.path.append(os.path.abspath(path_to_util))  # NOQA: E402
-#     from model_helper import get_latest_model
+#     from model_helper import get_model
 #     workspace_name = os.environ.get("WORKSPACE_NAME")
 #     experiment_name = os.environ.get("EXPERIMENT_NAME")
 #     resource_group = os.environ.get("RESOURCE_GROUP")
@@ -108,7 +108,7 @@
     firstRegistration = False
     tag_name = 'experiment_name'
 
-    model = get_latest_model(
+    model = get_model(
                 model_name=model_name,
                 tag_name=tag_name,
                 tag_value=exp.name,

diff --git a/diabetes_regression/scoring/parallel_batchscore.py b/diabetes_regression/scoring/parallel_batchscore.py
@@ -30,6 +30,7 @@
 import sys
 from typing import List
 from util.model_helper import get_model
+from azureml.core import Model
 
 model = None
 
@@ -64,13 +65,12 @@ def parse_args() -> List[str]:
         for idx, itm in enumerate(sys.argv)
         if itm == "--model_version"
     ]
-
-    if len(model_version_param) == 0:
-        raise ValueError(
-            "Model name is required but no model name parameter was passed to the script"  # NOQA: E501
-        )
-
-    model_version = model_version_param[0][1]
+    model_version = (
+        None
+        if len(model_version_param) < 1
+        or len(model_version_param[0][1].strip()) == 0  # NOQA: E501
+        else model_version_param[0][1]
+    )
 
     model_tag_name_param = [
         (sys.argv[idx], sys.argv[idx + 1])
@@ -107,15 +107,18 @@ def init():
     try:
         print("Initializing batch scoring script...")
 
+        # Get the model using name/version/tags filter
         model_filter = parse_args()
         amlmodel = get_model(
             model_name=model_filter[0],
             model_version=model_filter[1],
             tag_name=model_filter[2],
             tag_value=model_filter[3])
 
+        # Load the model using name/version found
         global model
-        modelpath = Model.get_model_path(model_name=model_filter[0])
+        modelpath = Model.get_model_path(
+            model_name=amlmodel.name, version=amlmodel.version)
         model = joblib.load(modelpath)
         print("Loaded model {}".format(model_filter[0]))
     except Exception as ex:

diff --git a/diabetes_regression/util/model_helper.py b/diabetes_regression/util/model_helper.py
@@ -46,19 +46,19 @@ def get_model(
         print("No workspace defined - using current experiment workspace.")
         aml_workspace = get_current_workspace()
 
-    if tagname is not None and tagvalue is not None:
+    if tag_name is not None and tag_value is not None:
         model = AMLModel(
             aml_workspace,
             name=model_name,
             version=model_version,
             tags=[[tag_name, tag_value]])
-    elif (tagname is None and tagvalue is not None) or (
-        tagvalue is None and tagname is not None
+    elif (tag_name is None and tag_value is not None) or (
+        tag_value is None and tag_name is not None
     ):
         raise ValueError(
             "model_tag_name and model_tag_value should both be supplied"
             + "or excluded"  # NOQA: E501
         )
     else:
-        model = AMLModel(aml_workspace, name=env.model_name, version=env.model_version) # NOQA: E501
+        model = AMLModel(aml_workspace, name=model_name, version=model_version)  # NOQA: E501
     return model
diff --git a/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py b/ml_service/pipelines/diabetes_regression_build_parallel_batchscore_pipeline.py
@@ -33,35 +33,15 @@
     Workspace,
     Dataset,
     Datastore,
-    Model,
     RunConfiguration,
 )
 from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
 from azureml.core.compute import ComputeTarget
 from azureml.data.datapath import DataPath
 from azureml.pipeline.steps import PythonScriptStep
-from argparse import ArgumentParser, Namespace
 from typing import Tuple
 
 
-def parse_args() -> Namespace:
-    """
-    Parse arguments supplied to the pipeline creation script.
-    The only allowed arguments are model_tag_name and model_tag_value
-    specifying a custom tag/value pair to help locate a specific model.
-
-
-    :returns: Namespace with two attributes model_tag_name and model_tag_value
-    and corresponding values
-
-    """
-    parser = ArgumentParser()
-    parser.add_argument("--model_tag_name", default=None, type=str)
-    parser.add_argument("--model_tag_value", default=None, type=str)
-    args = parser.parse_args()
-    return args
-
-
 def get_or_create_datastore(
     datastorename: str, ws: Workspace, env: Env, input: bool = True
 ) -> Datastore:
@@ -312,7 +292,6 @@ def get_scoring_pipeline(
     """
     Creates the scoring pipeline.
 
-    :param model: The model to use for scoring
     :param scoring_dataset: Data to score
     :param output_loc: Location to save the scoring results
     :param score_run_config: Parallel Run configuration to support
@@ -399,8 +378,6 @@ def build_batchscore_pipeline():
     try:
         env = Env()
 
-        args = parse_args()
-
         # Get Azure machine learning workspace
         aml_workspace = Workspace.get(
             name=env.workspace_name,