diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml index 616e7011..3cbb2c43 100644 --- a/.pipelines/azdo-base-pipeline.yml +++ b/.pipelines/azdo-base-pipeline.yml @@ -10,7 +10,7 @@ steps: enabled: 'true' - script: | - pip install --user -r $(Build.SourcesDirectory)/tests/requirements.txt + pip install --user -r $(Build.SourcesDirectory)/tests/requirements.txt && \ pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit displayName: 'Run unit tests' enabled: 'true' diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml index c82bfa05..0e7b883a 100644 --- a/.pipelines/azdo-ci-build-train.yml +++ b/.pipelines/azdo-ci-build-train.yml @@ -7,7 +7,8 @@ trigger: exclude: - docs/ - environment_setup/ - - ml_service/util/create_scoring_image.* + - ml_service/util/create_scoring_image.py + - ml_service/util/smoke_test_scoring_service.py variables: - template: azdo-variables.yml @@ -36,8 +37,9 @@ stages: # Invoke the Python building and publishing a training pipeline python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }} displayName: 'Publish Azure Machine Learning Pipeline' + - stage: 'Trigger_AML_Pipeline' - displayName: 'Train, evaluate, register model via previously published AML pipeline' + displayName: 'Train model' jobs: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) @@ -56,7 +58,8 @@ stages: export SUBSCRIPTION_ID=$(az account show --query id -o tsv) python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution # Set AMLPIPELINEID variable for next AML Pipeline task in next job - echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$(cat pipeline_id.txt)" + AMLPIPELINEID="$(cat pipeline_id.txt)" + echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID" name: 'getpipelineid' displayName: 'Get Pipeline ID' - bash: | @@ -83,36 +86,116 @@ stages: PipelineId: '$(AMLPIPELINE_ID)' ExperimentName: '$(EXPERIMENT_NAME)' PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)", "hyperparameter_alpha": "$(ALPHA)"}' - - job: "Training_Run_Report" - dependsOn: "Run_ML_Pipeline" - displayName: "Determine if evaluation succeeded and new model is registered" + +- stage: 'Deploy_ACI' + displayName: 'Deploy to ACI' + dependsOn: Trigger_AML_Pipeline + condition: and(succeeded(), variables['ACI_DEPLOYMENT_NAME']) + jobs: + - job: "Deploy_ACI" + displayName: "Deploy to ACI" pool: vmImage: 'ubuntu-latest' container: mcr.microsoft.com/mlops/python:latest timeoutInMinutes: 0 steps: + - template: azdo-template-get-model-version.yml + - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 + displayName: 'Azure ML Model Deploy' + inputs: + azureSubscription: $(WORKSPACE_SVC_CONNECTION) + modelSourceType: manualSpec + modelName: '$(MODEL_NAME)' + modelVersion: $(MODEL_VERSION) + inferencePath: '$(Build.SourcesDirectory)/code/scoring/inference_config.yml' + deploymentTarget: ACI + deploymentName: $(ACI_DEPLOYMENT_NAME) + deployConfig: '$(Build.SourcesDirectory)/code/scoring/deployment_config_aci.yml' + overwriteExistingDeployment: true - task: AzureCLI@1 + displayName: 'Smoke test' inputs: azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' scriptLocation: inlineScript inlineScript: | set -e # fail on error export SUBSCRIPTION_ID=$(az account show --query id -o tsv) - python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --model_name "$(MODEL_NAME)" - echo "##vso[task.setvariable variable=MODEL_VERSION;isOutput=true]$(cat model_version.txt)" - displayName: "Determine if evaluation succeeded and new model is registered" - - task: CopyFiles@2 - displayName: 'Copy Files to: $(Build.ArtifactStagingDirectory)' + python ml_service/util/smoke_test_scoring_service.py --type ACI --service "$(ACI_DEPLOYMENT_NAME)" + +- stage: 'Deploy_AKS' + displayName: 'Deploy to AKS' + dependsOn: Deploy_ACI + condition: and(succeeded(), variables['AKS_DEPLOYMENT_NAME']) + jobs: + - job: "Deploy_AKS" + displayName: "Deploy to AKS" + pool: + vmImage: 'ubuntu-latest' + container: mcr.microsoft.com/mlops/python:latest + timeoutInMinutes: 0 + steps: + - template: azdo-template-get-model-version.yml + - task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0 + displayName: 'Azure ML Model Deploy' inputs: - SourceFolder: '$(Build.SourcesDirectory)' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - Contents: | - code/scoring/** - ml_service/util/** - - task: PublishBuildArtifacts@1 - displayName: 'Publish Artifact' + azureSubscription: $(WORKSPACE_SVC_CONNECTION) + modelSourceType: manualSpec + modelName: '$(MODEL_NAME)' + modelVersion: $(MODEL_VERSION) + inferencePath: '$(Build.SourcesDirectory)/code/scoring/inference_config.yml' + deploymentTarget: AKS + aksCluster: $(AKS_COMPUTE_NAME) + deploymentName: $(AKS_DEPLOYMENT_NAME) + deployConfig: '$(Build.SourcesDirectory)/code/scoring/deployment_config_aks.yml' + overwriteExistingDeployment: true + - task: AzureCLI@1 + displayName: 'Smoke test' inputs: - ArtifactName: 'mlops-pipelines' - publishLocation: 'container' - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - TargetPath: '$(Build.ArtifactStagingDirectory)' + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python ml_service/util/smoke_test_scoring_service.py --type AKS --service "$(AKS_DEPLOYMENT_NAME)" + +- stage: 'Deploy_Webapp' + displayName: 'Deploy to Webapp' + dependsOn: Trigger_AML_Pipeline + condition: and(succeeded(), variables['WEBAPP_DEPLOYMENT_NAME']) + jobs: + - job: "Deploy_Webapp" + displayName: "Deploy to Webapp" + pool: + vmImage: 'ubuntu-latest' + container: mcr.microsoft.com/mlops/python:latest + timeoutInMinutes: 0 + steps: + - template: azdo-template-get-model-version.yml + - task: AzureCLI@1 + displayName: 'Create scoring image and set IMAGE_LOCATION variable' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python ml_service/util/create_scoring_image.py --output_image_location_file image_location.txt + # Output image location to Azure DevOps job + IMAGE_LOCATION="$(cat image_location.txt)" + echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION" + - task: AzureWebAppContainer@1 + name: WebAppDeploy + displayName: 'Azure Web App on Container Deploy' + inputs: + azureSubscription: 'AzureResourceConnection' + appName: '$(WEBAPP_DEPLOYMENT_NAME)' + containers: '$(IMAGE_LOCATION)' + - task: AzureCLI@1 + displayName: 'Smoke test' + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python ml_service/util/smoke_test_scoring_service.py --type Webapp --service "$(WebAppDeploy.AppServiceApplicationUrl)/score" diff --git a/.pipelines/azdo-template-get-model-version.yml b/.pipelines/azdo-template-get-model-version.yml new file mode 100644 index 00000000..16b4a780 --- /dev/null +++ b/.pipelines/azdo-template-get-model-version.yml @@ -0,0 +1,12 @@ +steps: +- task: AzureCLI@1 + inputs: + azureSubscription: '$(WORKSPACE_SVC_CONNECTION)' + scriptLocation: inlineScript + inlineScript: | + set -e # fail on error + export SUBSCRIPTION_ID=$(az account show --query id -o tsv) + python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --model_name "$(MODEL_NAME)" --output_model_version_file "model_version.txt" + echo "##vso[task.setvariable variable=MODEL_VERSION]$(cat model_version.txt)" + name: 'getversion' + displayName: "Determine if evaluation succeeded and new model is registered" diff --git a/code/scoring/score.py b/code/scoring/score.py index 716cd0e4..b78a435c 100644 --- a/code/scoring/score.py +++ b/code/scoring/score.py @@ -38,10 +38,27 @@ def init(): model = joblib.load(model_path) -def run(raw_data): +def run(raw_data, request_headers): data = json.loads(raw_data)["data"] data = numpy.array(data) result = model.predict(data) + + # Demonstrate how we can log custom data into the Application Insights + # traces collection. + # The 'X-Ms-Request-id' value is generated internally and can be used to + # correlate a log entry with the Application Insights requests collection. + # The HTTP 'traceparent' header may be set by the caller to implement + # distributed tracing (per the W3C Trace Context proposed specification) + # and can be used to correlate the request to external systems. + print(('{{"RequestId":"{0}", ' + '"TraceParent":"{1}", ' + '"NumberOfPredictions":{2}}}' + ).format( + request_headers.get("X-Ms-Request-Id", ""), + request_headers.get("Traceparent", ""), + len(result) + )) + return {"result": result.tolist()} @@ -49,5 +66,5 @@ def run(raw_data): # Test scoring init() test_row = '{"data":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}' - prediction = run(test_row) + prediction = run(test_row, {}) print("Test result: ", prediction) diff --git a/docs/getting_started.md b/docs/getting_started.md index 8cff7ec1..de81dec0 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -4,8 +4,8 @@ ## Create an Azure DevOps account -We use Azure DevOps for running our build(CI), retraining trigger and release -(CD) pipelines. If you don't already have an Azure DevOps account, create one by +We use Azure DevOps for running our multi-stage pipeline with build(CI), ML training and scoring service release +(CD) stages. If you don't already have an Azure DevOps account, create one by following the instructions [here](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/create-organization?view=azure-devops). If you already have Azure DevOps account, create a [new project](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops). @@ -30,10 +30,10 @@ your Azure AD tenant, or receive the ID and secret of a service principal from your Azure AD Administrator. That principal must have 'Contributor' permissions on the subscription. -## Create a Variable Group for your Pipelines +## Create a Variable Group for your Pipeline We make use of variable group inside Azure DevOps to store variables and their -values that we want to make available across multiple pipelines. You can either +values that we want to make available across multiple pipelines or pipeline stages. You can either store the values directly in [Azure DevOps](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) or connect to an Azure Key Vault in your subscription. Please refer to the documentation [here](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/variable-groups?view=azure-devops&tabs=designer#create-a-variable-group) to @@ -55,6 +55,7 @@ The variable group should contain the following required variables: | RESOURCE_GROUP | | | WORKSPACE_NAME | mlops-AML-WS | | WORKSPACE_SVC_CONNECTION | aml-workspace-connection | +| ACI_DEPLOYMENT_NAME | diabetes-aci | **Note:** @@ -139,18 +140,19 @@ your Azure AD tenant, or receive the ID and secret of a service principal from your Azure AD Administrator. That principal must have Contributor permissions on the Azure ML Workspace. -## Set up Build, Release Trigger, and Release Deployment Pipelines +## Set up Build, Release Trigger, and Release Multi-Stage Pipeline Now that you have all the required resources created from the IaC pipeline, -you can set up the rest of the pipelines necessary for deploying your ML model -to production. These are the pipelines that you will be setting up: +you can set up the pipeline necessary for deploying your ML model +to production. The pipeline has a sequence of stages for: -1. **Build pipeline:** triggered on code change to master branch on GitHub, +1. **Model Code Continuous Integration:** triggered on code change to master branch on GitHub, performs linting, unit testing, publishing a training pipeline, and runs the published training pipeline to train, evaluate, and register a model. -1. **Release Deployment pipeline:** deploys a model to QA (ACI) and Prod (AKS) -environments. +1. **Train Model**: invokes the Azure ML service to trigger model training. +1. **Release Deployment:** deploys a model to QA (ACI) and Prod (AKS) +environments, or alternatively to Azure App Service. -### Set up a Build Training Pipeline +### Set up the Pipeline In your [Azure DevOps](https://dev.azure.com) project create and run a new build pipeline referring to the [azdo-ci-build-train.yml](../.pipelines/azdo-ci-build-train.yml) @@ -158,171 +160,105 @@ pipeline in your forked **GitHub** repository: ![configure ci build pipeline](./images/ci-build-pipeline-configure.png) -Name the pipeline **ci-build**. Once the pipline is finished, explore the -execution logs: +Once the pipeline is finished, explore the execution result: -![ci build logs](./images/ci-build-logs.png) +![build](./images/multi-stage-aci.png) and checkout a published training pipeline in the **mlops-AML-WS** workspace in [Azure Portal](https://ms.portal.azure.com/): ![training pipeline](./images/training-pipeline.png) -Great, you now have the build pipeline set up which automatically triggers every time there's a change in the master -branch. The pipeline performs linting, unit testing, builds and publishes and executes a -**ML Training Pipeline** in a **ML Workspace**. +Great, you now have the build pipeline set up which automatically triggers every time there's a change in the master branch. -**Note:** The build pipeline contains disabled steps to build and publish ML -pipelines using R to train a model. Enable these steps if you want to play with -this approach by changing the `build-train-script` pipeline variable to either `build_train_pipeline_with_r.py`, or `build_train_pipeline_with_r_on_dbricks.py`. For the pipeline training a model with R on Databricks you have +* The first stage of the pipeline, **Model CI**, perform linting, unit testing, build and publishes an **ML Training Pipeline** in a **ML Workspace**. + + **Note:** The build pipeline also supports building and publishing ML +pipelines using R to train a model. This is enabled +by changing the `build-train-script` pipeline variable to either `build_train_pipeline_with_r.py`, or `build_train_pipeline_with_r_on_dbricks.py`. For the pipeline training a model with R on Databricks you have to manually create a Databricks cluster and attach it to the ML Workspace as a -compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables shoud be +compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be specified). -![running training pipeline](./images/running-training-pipeline.png) +* The second stage of the pipeline, **Train model**, triggers the run of the ML Training Pipeline. The training pipeline will train, evaluate, and register a new model. The actual computation is performed in an [Azure Machine Learning Compute cluster](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute). In Azure DevOps, this stage runs an agentless job that waits for the completion of the Azure ML job, so it can wait for training completion for hours or even days without using agent resources. -The training pipeline will train, evaluate, and register a new model. Wait until -it is finished and make sure there is a new model in the **ML Workspace**: +* The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. + +Wait until the pipeline finished and make sure there is a new model in the **ML Workspace**: ![trained model](./images/trained-model.png) To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\azdo-ci-build-train.yml` pipeline to `false`. This can also be overridden at runtime execution of the pipeline. -### Set up a Release Deployment Pipeline to Deploy the Model +### Deploy the Model to Azure Kubernetes Service -The final step is to deploy the model across environments with a release -pipeline. There will be a **``QA``** environment running on -[Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/) -and a **``Prod``** environment running on +The final stage is to deploy the model to the production environment running on [Azure Kubernetes Service](https://azure.microsoft.com/en-us/services/kubernetes-service). -This is the final picture of what your release pipeline should look like: - -![deploy model](./images/deploy-model.png) - -The pipeline consumes two artifacts: - -1. the result of the **Build Pipeline** as it contains configuration files -1. the **model** trained and registered by the ML training pipeline - -Add an artifact to the pipeline and select **AzureML Model Artifact** source -type. Select the **Service Endpoint** and **Model Names** from the drop down -lists. **Service Endpoint** refers to the **Service connection** created in -the previous step: - -![model artifact](./images/model-artifact.png) - -Go to the new **Releases Pipelines** section, and click new to create a new -release pipeline. A first stage is automatically created and choose -**start with an Empty job**. Name the stage **QA (ACI)** and add a single task -to the job **Azure ML Model Deploy**. Make sure that the Agent Specification -is ubuntu-16.04 under the Agent Job: - -![deploy aci](./images/deploy-aci.png) - -Specify task parameters as it is shown in the table below: - -| Parameter | Value | -| ----------------------------- | ---------------------------------------------------------------------------------------------------- | -| Display Name | Azure ML Model Deploy | -| Azure ML Workspace | mlops-AML-WS | -| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | -| Model Deployment Target | Azure Container Instance | -| Deployment Name | mlopspython-aci | -| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aci.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | -| Overwrite existing deployment | X | - -In a similar way, create a stage **Prod (AKS)** and add a single task to the job -**Azure ML Model Deploy**. Make sure that the Agent Specification is -ubuntu-16.04 under the Agent Job: - -![deploy aks](./images/deploy-aks.png) - -Specify task parameters as it is shown in the table below: - -| Parameter | Value | -| --------------------------------- | ---------------------------------------------------------------------------------------------------- | -| Display Name | Azure ML Model Deploy | -| Azure ML Workspace | mlops-AML-WS | -| Inference config Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/inference_config.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | -| Model Deployment Target | Azure Kubernetes Service | -| Select AKS Cluster for Deployment | YOUR_DEPLOYMENT_K8S_CLUSTER | -| Deployment Name | mlopspython-aks | -| Deployment Configuration file | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/code/scoring/deployment_config_aks.yml`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | -| Overwrite existing deployment | X | **Note:** Creating of a Kubernetes cluster on AKS is out of scope of this tutorial, but you can find set up information in the docs [here](https://docs.microsoft.com/en-us/azure/aks/kubernetes-walkthrough-portal#create-an-aks-cluster). -Similarly to the **Invoke Training Pipeline** release pipeline, previously -created, in order to trigger a coutinuous integration, click on the lightning -bolt icon, make sure the **Continuous deployment trigger** is checked and -save the trigger: - -![Automate Deploy Model Pipeline](./images/automate_deploy_model_pipeline.png) - -Congratulations! You have three pipelines set up end to end: - -* **Build pipeline:** triggered on code change to master branch on GitHub, -performs linting, unit testing and publishing a training pipeline. -* **Release Trigger pipeline:** runs a published training pipeline to train, -evaluate and register a model. -* **Release Deployment pipeline:** deploys a model to QA (ACI) and Prod (AKS) -environments. - -## Deploy the trained model to Azure Web App for containers - -Note: This is an optional step and can be used only if you are deploying your -scoring service on Azure Web Apps. - -[Create Image Script](../ml_service/util/create_scoring_image.py) -can be used to create a scoring image from the release pipeline. The image -created by this script will be registered under Azure Container Registry (ACR) -instance that belongs to Azure Machine Learning Service. Any dependencies that -scoring file depends on can also be packaged with the container with Image -config. To learn more on how to create a container with AML SDK click -[here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.image.image.image?view=azure-ml-py#create-workspace--name--models--image-config-). - -Below is release pipeline with two tasks one to create an image using the above -script and second is the deploy the image to Web App for containers. - -![release_webapp](./images/release-webapp-pipeline.PNG) - -In the Variables tab, link the pipeline to your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variables: +In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variables: | Variable Name | Suggested Value | | --------------------------- | -----------------------------------| -| MODEL_NAME | sklearn_regression_model.pkl | -| IMAGE_NAME | diabetes | +| AKS_COMPUTE_NAME | aks | +| AKS_DEPLOYMENT_NAME | diabetes-aks | -Add as an artifact to the pipeline the result of the **Build Pipeline** as it contains the necessary scripts. +Set **AKS_COMPUTE_NAME** to the *Compute name* of the Inference Cluster referencing your AKS cluster in your Azure ML Workspace. -Use an Agent of type `ubuntu-16.04`. +After successfully deploying to Azure Container Instances, the next stage will deploy the model to Kubernetes and run a smoke test. -For the Azure CLI task to invoke the [Create Image Script](../ml_service/util/create_scoring_image.py), specify the following task parameters: +![build](./images/multi-stage-aci-aks.png) -| Parameter | Value | -| ------------------ | --------------------------------------------------------------------------------------------------- | -| Display name | Create Scoring Image | -| Azure subscription | aml-workspace-connection | -| Script Path | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipeline/ml_service/util/create_scoring_image.sh`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | -| Working directory | `$(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines`
_(The `_ci-build` part of the path is the source alias of your CI artifact)_ | +## Deploy the Model to Azure App Service (Azure Web App for containers) -![release_createimage](./images/release-task-createimage.PNG) +Note: This is an optional step and can be used only if you are [deploying your +scoring service on Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service). -Finally, for the Azure Web App for Containers Task, specify the following task -parameters as it is shown in the table below: +In the Variables tab, edit your variable group (`devopsforai-aml-vg`). In the variable group definition, add the following variable: -| Parameter | Value | -| ------------------ | --------------------------------------------------------------------------------------------------- | -| Azure subscription | Subscription used to deploy Web App | -| App name | Web App for Containers name | -| Image name | Specify the fully qualified container image name. For example, 'myregistry.azurecr.io/nginx:latest' | +| Variable Name | Suggested Value | +| --------------------------- | -----------------------------------| +| WEBAPP_DEPLOYMENT_NAME | mlopswebapp | -![release_webapp](./images/release-task-webappdeploy.PNG) +Set **WEBAPP_DEPLOYMENT_NAME** to the name of your Azure Web App. Delete the **ACI_DEPLOYMENT_NAME** variable. -Save the pipeline and create a release to trigger it manually. To create the -trigger, click on the "Create release" button on the top right of your screen, -leave the fields blank and click on **Create** at the bottom of the screen. -Once the pipeline execution is finished, check out deployments in the -**mlops-AML-WS** workspace. +The pipeline uses the [Create Image Script](../ml_service/util/create_scoring_image.py) +to create a scoring image. The image +created by this script will be registered under Azure Container Registry (ACR) +instance that belongs to Azure Machine Learning Service. Any dependencies that +scoring file depends on can also be packaged with the container with Image +config. +[Learn more on how to create a container with AML SDK](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.image.image.image?view=azure-ml-py#create-workspace--name--models--image-config-). + +Make sure your webapp has the credentials to pull the image from the Azure Container Registry created by the Infrastructure as Code pipeline. You could do this by following the instructions in the section [Configure registry credentials in web app](https://docs.microsoft.com/en-us/azure/devops/pipelines/targets/webapp-on-container-linux?view=azure-devops&tabs=dotnet-core%2Cyaml#configure-registry-credentials-in-web-app). Note that you must have run the pipeline once (including the Deploy to Webapp stage up to the `Create scoring image` step) so that an image is present in the registry, before you can connect the Webapp to the Azure Container Registry in the Azure Portal. + +![build](./images/multi-stage-webapp.png) + +# Next steps + +* The provided pipeline definition YAML file is a sample starting point, which you should tailor to your processes and environment. +* You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. +* The sample pipeline generates a random value for a model hyperparameter (ridge regression [*alpha*](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)) to generate 'interesting' charts when testing the sample. In a real application you should use fixed hyperparameter values. You can [tune hyperparameter values using Azure ML](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters), and manage their values in Azure DevOps Variable Groups. +* You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. +* You can explore aspects of model observability in the solution, such as: + * **Logging**: navigate to the Application Insights instance linked to the Azure ML Portal, + then to the Logs (Analytics) pane. The following sample query correlates HTTP requests with custom logs + generated in `score.py`, and can be used for example to analyze query duration vs. scoring batch size: + + let Traceinfo=traces + | extend d=parse_json(tostring(customDimensions.Content)) + | project workspace=customDimensions.["Workspace Name"], + service=customDimensions.["Service Name"], + NumberOfPredictions=tostring(d.NumberOfPredictions), + id=tostring(d.RequestId), + TraceParent=tostring(d.TraceParent); + requests + | project timestamp, id, success, resultCode, duration + | join kind=fullouter Traceinfo on id + | project-away id1 + + * **Distributed tracing**: The smoke test client code sets an HTTP `traceparent` header (per the [W3C Trace Context proposed specification](https://www.w3.org/TR/trace-context-1)), and the `score.py` code logs this header. The query above shows how to surface this value. You can adapt this to your tracing framework. + * **Monitoring**: You can use [Azure Monitor for containers](https://docs.microsoft.com/en-us/azure/azure-monitor/insights/container-insights-overview) to monitor the Azure ML scoring containers' performance, just as for any other container. \ No newline at end of file diff --git a/docs/images/automate_deploy_model_pipeline.png b/docs/images/automate_deploy_model_pipeline.png deleted file mode 100644 index 35c7f54e..00000000 Binary files a/docs/images/automate_deploy_model_pipeline.png and /dev/null differ diff --git a/docs/images/ci-build-logs.png b/docs/images/ci-build-logs.png deleted file mode 100644 index 726f70ac..00000000 Binary files a/docs/images/ci-build-logs.png and /dev/null differ diff --git a/docs/images/create-rm-service-connection.png b/docs/images/create-rm-service-connection.png index 629d3c2a..011018d3 100644 Binary files a/docs/images/create-rm-service-connection.png and b/docs/images/create-rm-service-connection.png differ diff --git a/docs/images/deploy-model.png b/docs/images/deploy-model.png deleted file mode 100644 index 8a4cbd06..00000000 Binary files a/docs/images/deploy-model.png and /dev/null differ diff --git a/docs/images/multi-stage-aci-aks.png b/docs/images/multi-stage-aci-aks.png new file mode 100644 index 00000000..0307fbf6 Binary files /dev/null and b/docs/images/multi-stage-aci-aks.png differ diff --git a/docs/images/multi-stage-aci.png b/docs/images/multi-stage-aci.png new file mode 100644 index 00000000..a96f3195 Binary files /dev/null and b/docs/images/multi-stage-aci.png differ diff --git a/docs/images/multi-stage-webapp.png b/docs/images/multi-stage-webapp.png new file mode 100644 index 00000000..e6d60ce1 Binary files /dev/null and b/docs/images/multi-stage-webapp.png differ diff --git a/ml_service/pipelines/verify_train_pipeline.py b/ml_service/pipelines/verify_train_pipeline.py index dbf34f16..34150177 100644 --- a/ml_service/pipelines/verify_train_pipeline.py +++ b/ml_service/pipelines/verify_train_pipeline.py @@ -42,6 +42,11 @@ def main(): type=str, help="Name of the Model" ) + parser.add_argument( + "--output_model_version_file", + type=str, + help="Name of a file to write model version to" + ) args = parser.parse_args() if (args.build_id is not None): @@ -62,6 +67,11 @@ def main(): print("Model was not registered for this run.") sys.exit(1) + # Save the Model Version for other AzDO jobs after script is complete + if args.output_model_version_file is not None: + with open(args.output_model_version_file, "w") as out_file: + out_file.write(str(model.version)) + if __name__ == '__main__': main() diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py index af7de448..0968b6c4 100644 --- a/ml_service/util/create_scoring_image.py +++ b/ml_service/util/create_scoring_image.py @@ -1,5 +1,6 @@ import os import sys +import argparse from azureml.core import Workspace from azureml.core.image import ContainerImage, Image from azureml.core.model import Model @@ -15,6 +16,15 @@ resource_group=e.resource_group ) +parser = argparse.ArgumentParser("create scoring image") +parser.add_argument( + "--output_image_location_file", + type=str, + help=("Name of a file to write image location to, " + "in format REGISTRY.azurecr.io/IMAGE_NAME:IMAGE_VERSION") +) +args = parser.parse_args() + model = Model(ws, name=e.model_name, version=e.model_version) os.chdir("./code/scoring") @@ -30,6 +40,8 @@ name=e.image_name, models=[model], image_config=image_config, workspace=ws ) +os.chdir("../..") + image.wait_for_creation(show_output=True) if image.creation_state != "Succeeded": @@ -43,3 +55,9 @@ image.image_build_log_uri, ) ) + +# Save the Image Location for other AzDO jobs after script is complete +if args.output_image_location_file is not None: + print("Writing image location to %s" % args.output_image_location_file) + with open(args.output_image_location_file, "w") as out_file: + out_file.write(str(image.image_location)) diff --git a/ml_service/util/create_scoring_image.sh b/ml_service/util/create_scoring_image.sh deleted file mode 100644 index 1651b73e..00000000 --- a/ml_service/util/create_scoring_image.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -set -euo pipefail # strict mode, fail on error -set -x # verbose - -docker run \ - --rm \ - -t \ - -v $PWD:/mlops \ - -v ${AZURE_CONFIG_DIR:-$HOME/.azure}:/root/.azure \ - -e SUBSCRIPTION_ID=$(az account show --query id -o tsv) \ - -e RESOURCE_GROUP=$RESOURCE_GROUP \ - -e WORKSPACE_NAME=$WORKSPACE_NAME \ - -e MODEL_NAME=$MODEL_NAME \ - -e IMAGE_NAME=$IMAGE_NAME \ - mcr.microsoft.com/mlops/python:latest \ - bash -c "cd /mlops/ && python ml_service/util/create_scoring_image.py" diff --git a/ml_service/util/smoke_test_scoring_service.py b/ml_service/util/smoke_test_scoring_service.py new file mode 100644 index 00000000..753ef23e --- /dev/null +++ b/ml_service/util/smoke_test_scoring_service.py @@ -0,0 +1,94 @@ +import os +import sys +import argparse +import requests +import time +from azureml.core import Workspace +from azureml.core.webservice import AksWebservice, AciWebservice +sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 +from env_variables import Env +import secrets + + +input = {"data": [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]} +output_len = 2 + + +def call_web_service(e, service_type, service_name): + aml_workspace = Workspace.get( + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group + ) + print("Fetching service") + headers = {} + if service_type == "ACI": + service = AciWebservice(aml_workspace, service_name) + else: + service = AksWebservice(aml_workspace, service_name) + if service.auth_enabled: + service_keys = service.get_keys() + headers['Authorization'] = 'Bearer ' + service_keys[0] + print("Testing service") + print(". url: %s" % service.scoring_uri) + output = call_web_app(service.scoring_uri, headers) + + return output + + +def call_web_app(url, headers): + + # Generate an HTTP 'traceparent' distributed tracing header + # (per the W3C Trace Context proposed specification). + headers['traceparent'] = "00-{0}-{1}-00".format( + secrets.token_hex(16), secrets.token_hex(8)) + + retries = 600 + for i in range(retries): + try: + response = requests.post( + url, json=input, headers=headers) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as e: + if i == retries-1: + raise e + print(e) + print("Retrying...") + time.sleep(1) + + +def main(): + + parser = argparse.ArgumentParser("smoke_test_scoring_service.py") + + parser.add_argument( + "--type", + type=str, + choices=["AKS", "ACI", "Webapp"], + required=True, + help="type of service" + ) + parser.add_argument( + "--service", + type=str, + required=True, + help="Name of the image to test" + ) + args = parser.parse_args() + + e = Env() + if args.type == "Webapp": + output = call_web_app(args.service, {}) + else: + output = call_web_service(e, args.type, args.service) + print("Verifying service output") + + assert "result" in output + assert len(output["result"]) == output_len + print("Smoke test successful.") + + +if __name__ == '__main__': + main()