Squashed commit of the following:

commit 5af6eeb242
Author: j-so <jenns@microsoft.com>
Date:   Mon Jun 15 18:31:15 2020 -0700

    fix bootstrap

commit f61e103ed0
Merge: 2796b40 08bb6f4
Author: j-so <jenns@microsoft.com>
Date:   Mon Jun 15 18:30:21 2020 -0700

    Merge branch 'master' into jenns/splitpipeline_docfix

commit 2796b40024
Author: j-so <jenns@microsoft.com>
Date:   Mon Jun 15 18:30:00 2020 -0700

    remove old pipeline

commit 08bb6f4a26
Author: David Tesar <david.tesar@microsoft.com>
Date:   Mon Jun 15 14:09:12 2020 -0700

    Simplify docs flow (#297)

commit cd762ecaa9
Author: jotaylo <jotaylo@microsoft.com>
Date:   Mon Jun 15 12:28:23 2020 -0700

    Move instruction to install AML extension to Azure Devops setup instructions (#298)
This commit is contained in:
j-so 2020-06-15 18:32:22 -07:00
Родитель dbe8b33ab2
Коммит 9bc9f8b8d5
10 изменённых файлов: 106 добавлений и 398 удалений

Просмотреть файл

@ -30,14 +30,9 @@ variables:
value: 'scoring/scoreB.py'
steps:
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
workingDirectory: $(Build.SourcesDirectory)
inlineScript: |
set -e
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python3 -m ml_service.util.create_scoring_image
displayName: 'Create Scoring Image'
- template: diabetes_regression-package-model-template.yml
parameters:
modelId: $(MODEL_NAME):$(MODEL_VERSION)
scoringScriptPath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/$(SCORE_SCRIPT)'
condaFilePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/conda_dependencies.yml'

Просмотреть файл

@ -1,97 +0,0 @@
# Continuous Integration (CI) pipeline that orchestrates the training, evaluation, and registration of the diabetes_regression model.
resources:
containers:
- container: mlops
image: mcr.microsoft.com/mlops/python:latest
pr: none
trigger:
branches:
include:
- master
paths:
include:
- diabetes_regression/
- ml_service/pipelines/diabetes_regression_build_train_pipeline.py
- ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py
- ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r_on_dbricks.py
variables:
- template: diabetes_regression-variables-template.yml
- group: devopsforai-aml-vg
pool:
vmImage: ubuntu-latest
stages:
- stage: 'Model_CI'
displayName: 'Model CI'
jobs:
- job: "Model_CI_Pipeline"
displayName: "Model CI Pipeline"
container: mlops
timeoutInMinutes: 0
steps:
- template: code-quality-template.yml
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
workingDirectory: $(Build.SourcesDirectory)
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
# Invoke the Python building and publishing a training pipeline
python -m ml_service.pipelines.diabetes_regression_build_train_pipeline
displayName: 'Publish Azure Machine Learning Pipeline'
- stage: 'Trigger_AML_Pipeline'
displayName: 'Train and evaluate model'
condition: succeeded()
variables:
BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)'
jobs:
- job: "Get_Pipeline_ID"
condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true'))
displayName: "Get Pipeline ID for execution"
container: mlops
timeoutInMinutes: 0
steps:
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
workingDirectory: $(Build.SourcesDirectory)
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.pipelines.run_train_pipeline --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
# Set AMLPIPELINEID variable for next AML Pipeline task in next job
AMLPIPELINEID="$(cat pipeline_id.txt)"
echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
name: 'getpipelineid'
displayName: 'Get Pipeline ID'
- job: "Run_ML_Pipeline"
dependsOn: "Get_Pipeline_ID"
displayName: "Trigger ML Training Pipeline"
timeoutInMinutes: 0
pool: server
variables:
AMLPIPELINE_ID: $[ dependencies.Get_Pipeline_ID.outputs['getpipelineid.AMLPIPELINEID'] ]
steps:
- task: ms-air-aiagility.vss-services-azureml.azureml-restApi-task.MLPublishedPipelineRestAPITask@0
displayName: 'Invoke ML pipeline'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
PipelineId: '$(AMLPIPELINE_ID)'
ExperimentName: '$(EXPERIMENT_NAME)'
PipelineParameters: '"ParameterAssignments": {"model_name": "$(MODEL_NAME)"}, "tags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}, "StepTags": {"BuildId": "$(Build.BuildId)", "BuildUri": "$(BUILD_URI)"}'
- job: "Training_Run_Report"
dependsOn: "Run_ML_Pipeline"
condition: always()
displayName: "Publish artifact if new model was registered"
container: mlops
timeoutInMinutes: 0
steps:
- template: diabetes_regression-publish-model-artifact-template.yml

Просмотреть файл

@ -1,4 +1,4 @@
# Continuous Integration (CI) pipeline that orchestrates the training, evaluation, registration, deployment, and testing of the diabetes_regression model.
# Continuous Integration (CI) pipeline that orchestrates the training, evaluation, and registration of the diabetes_regression model.
resources:
containers:
@ -27,7 +27,6 @@ pool:
stages:
- stage: 'Model_CI'
displayName: 'Model CI'
condition: not(variables['MODEL_BUILD_ID'])
jobs:
- job: "Model_CI_Pipeline"
displayName: "Model CI Pipeline"
@ -48,8 +47,8 @@ stages:
displayName: 'Publish Azure Machine Learning Pipeline'
- stage: 'Trigger_AML_Pipeline'
displayName: 'Train model'
condition: and(succeeded(), not(variables['MODEL_BUILD_ID']))
displayName: 'Train and evaluate model'
condition: succeeded()
variables:
BUILD_URI: '$(SYSTEM.COLLECTIONURI)$(SYSTEM.TEAMPROJECT)/_build/results?buildId=$(BUILD.BUILDID)'
jobs:
@ -91,116 +90,8 @@ stages:
- job: "Training_Run_Report"
dependsOn: "Run_ML_Pipeline"
condition: always()
displayName: "Determine if evaluation succeeded and new model is registered"
displayName: "Publish artifact if new model was registered"
container: mlops
timeoutInMinutes: 0
steps:
- template: diabetes_regression-get-model-version-template.yml
- stage: 'Deploy_ACI'
displayName: 'Deploy to ACI'
dependsOn: Trigger_AML_Pipeline
condition: and(or(succeeded(), variables['MODEL_BUILD_ID']), variables['ACI_DEPLOYMENT_NAME'])
jobs:
- job: "Deploy_ACI"
displayName: "Deploy to ACI"
container: mlops
timeoutInMinutes: 0
steps:
- template: diabetes_regression-get-model-version-template.yml
- task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0
displayName: 'Azure ML Model Deploy'
inputs:
azureSubscription: $(WORKSPACE_SVC_CONNECTION)
modelSourceType: manualSpec
modelName: '$(MODEL_NAME)'
modelVersion: $(MODEL_VERSION)
inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml'
deploymentTarget: ACI
deploymentName: $(ACI_DEPLOYMENT_NAME)
deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aci.yml'
overwriteExistingDeployment: true
- task: AzureCLI@1
displayName: 'Smoke test'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.util.smoke_test_scoring_service --type ACI --service "$(ACI_DEPLOYMENT_NAME)"
- stage: 'Deploy_AKS'
displayName: 'Deploy to AKS'
dependsOn: Deploy_ACI
condition: and(succeeded(), variables['AKS_DEPLOYMENT_NAME'])
jobs:
- job: "Deploy_AKS"
displayName: "Deploy to AKS"
container: mlops
timeoutInMinutes: 0
steps:
- template: diabetes_regression-get-model-version-template.yml
- task: ms-air-aiagility.vss-services-azureml.azureml-model-deploy-task.AMLModelDeploy@0
displayName: 'Azure ML Model Deploy'
inputs:
azureSubscription: $(WORKSPACE_SVC_CONNECTION)
modelSourceType: manualSpec
modelName: '$(MODEL_NAME)'
modelVersion: $(MODEL_VERSION)
inferencePath: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/inference_config.yml'
deploymentTarget: AKS
aksCluster: $(AKS_COMPUTE_NAME)
deploymentName: $(AKS_DEPLOYMENT_NAME)
deployConfig: '$(Build.SourcesDirectory)/$(SOURCES_DIR_TRAIN)/scoring/deployment_config_aks.yml'
overwriteExistingDeployment: true
- task: AzureCLI@1
displayName: 'Smoke test'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.util.smoke_test_scoring_service --type AKS --service "$(AKS_DEPLOYMENT_NAME)"
- stage: 'Deploy_Webapp'
displayName: 'Deploy to Webapp'
dependsOn: Trigger_AML_Pipeline
condition: and(or(succeeded(), variables['MODEL_BUILD_ID']), variables['WEBAPP_DEPLOYMENT_NAME'])
jobs:
- job: "Deploy_Webapp"
displayName: "Deploy to Webapp"
container: mlops
timeoutInMinutes: 0
steps:
- template: diabetes_regression-get-model-version-template.yml
- task: AzureCLI@1
displayName: 'Create scoring image and set IMAGE_LOCATION variable'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.util.create_scoring_image --output_image_location_file image_location.txt
# Output image location to Azure DevOps job
IMAGE_LOCATION="$(cat image_location.txt)"
echo "##vso[task.setvariable variable=IMAGE_LOCATION]$IMAGE_LOCATION"
- task: AzureWebAppContainer@1
name: WebAppDeploy
displayName: 'Azure Web App on Container Deploy'
inputs:
azureSubscription: '$(AZURE_RM_SVC_CONNECTION)'
appName: '$(WEBAPP_DEPLOYMENT_NAME)'
resourceGroupName: '$(RESOURCE_GROUP)'
imageName: '$(IMAGE_LOCATION)'
- task: AzureCLI@1
displayName: 'Smoke test'
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.util.smoke_test_scoring_service --type Webapp --service "$(WebAppDeploy.AppServiceApplicationUrl)/score"
- template: diabetes_regression-publish-model-artifact-template.yml

Просмотреть файл

@ -1,15 +0,0 @@
# Pipeline template that attempts to get the latest model version and adds it to the environment for subsequent tasks to use.
steps:
- task: AzureCLI@1
inputs:
azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
scriptLocation: inlineScript
inlineScript: |
set -e # fail on error
export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
python -m ml_service.pipelines.diabetes_regression_verify_train_pipeline --build_id $(modelbuildid) --output_model_version_file "model_version.txt"
# Output model version to Azure DevOps job
MODEL_VERSION="$(cat model_version.txt)"
echo "##vso[task.setvariable variable=MODEL_VERSION]$MODEL_VERSION"
name: 'getversion'
displayName: "Determine if evaluation succeeded and new model is registered"

Просмотреть файл

@ -1,18 +1,3 @@
# Bootstrap from MLOpsPython repository
To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project.
Bootstrapping will prepare a directory structure for your project which includes:
* renaming files and folders from the base project name `diabetes_regression` to your project name
* fixing imports and absolute path based on your project name
* deleting and cleaning up some directories
To bootstrap from the existing MLOpsPython repository:
1. Ensure Python 3 is installed locally
1. Clone this repository locally
1. Run bootstrap.py script
`python bootstrap.py -d [dirpath] -n [projectname]`
* `[dirpath]` is the absolute path to the root of the directory where MLOpsPython is cloned
* `[projectname]` is the name of your ML project
For steps on how to use the bootstrap script, please see the "Bootstrap the project" section of the [custom model guide](../docs/custom_model.md#bootstrap-the-project).

Просмотреть файл

@ -84,14 +84,12 @@ def replace_project_name(project_dir, project_name, rename_name):
files = [r".env.example",
r".pipelines/code-quality-template.yml",
r".pipelines/pr.yml",
r".pipelines/diabetes_regression-cd-deploy.yml",
r".pipelines/diabetes_regression-ci-train-register.yml",
r".pipelines/diabetes_regression-cd.yml",
r".pipelines/diabetes_regression-ci.yml",
r".pipelines/abtest.yml",
r".pipelines/diabetes_regression-ci-image.yml",
r".pipelines/diabetes_regression-publish-model-artifact-template.yml", # NOQA: E501
r".pipelines/diabetes_regression-get-model-id-artifact-template.yml", # NOQA: E501
r".pipelines/diabetes_regression-get-model-version-template.yml", # NOQA: E501
r".pipelines/diabetes_regression-variables-template.yml",
r"environment_setup/Dockerfile",
r"environment_setup/install_requirements.sh",

Просмотреть файл

@ -3,11 +3,11 @@
This document provides steps to follow when using this repository as a template to train models and deploy the models with real-time inference in Azure ML with your own scripts and data.
1. Follow the MLOpsPython [Getting Started](getting_started.md) guide
1. Follow the MLOpsPython [bootstrap instructions](../bootstrap/README.md) to create your project starting point
1. Bootstrap the project
1. Configure training data
1. [If necessary] Convert your ML experimental code into production ready code
1. Replace the training code
1. Update the evaluation code
1. [Optional] Update the evaluation code
1. Customize the build agent environment
1. [If appropriate] Replace the score code
@ -17,24 +17,36 @@ Follow the [Getting Started](getting_started.md) guide to set up the infrastruct
Take a look at the [Repo Details](code_description.md) document for a description of the structure of this repository.
## Follow the Bootstrap instructions
## Bootstrap the project
The [Bootstrap from MLOpsPython repository](../bootstrap/README.md) guide will help you to quickly prepare the repository for your project.
Bootstrapping will prepare the directory structure to be used for your project name which includes:
* renaming files and folders from the base project name `diabetes_regression` to your project name
* fixing imports and absolute path based on your project name
* deleting and cleaning up some directories
**Note:** Since the bootstrap script will rename the `diabetes_regression` folder to the project name of your choice, we'll refer to your project as `[project name]` when paths are involved.
To bootstrap from the existing MLOpsPython repository:
1. Ensure Python 3 is installed locally
1. From a local copy of the code, run the `bootstrap.py` script in the `bootstrap` folder
`python bootstrap.py -d [dirpath] -n [projectname]`
* `[dirpath]` is the absolute path to the root of the directory where MLOpsPython is cloned
* `[projectname]` is the name of your ML project
## Configure training data
The training ML pipeline uses a [sample diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) as training data.
To use your own data:
**Important** Convert the template to use your own Azure ML Dataset for model training via these steps:
1. [Create a Dataset](https://docs.microsoft.com/azure/machine-learning/how-to-create-register-datasets) in your Azure ML workspace
1. Update the `DATASET_NAME` and `DATASTORE_NAME` variables in `.pipelines/[project name]-variables-template.yml`
## Convert your ML experimental code into production ready code
The MLOpsPython template creates an Azure Machine Learning (ML) pipeline that invokes a set of [Azure ML pipeline steps](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps) (see `ml_service/pipelines/[project name]_build_train_pipeline.py`). If your experiment is currently in a Jupyter notebook, it will need to be refactored into scripts that can be run independantly and dropped into the template which the existing Azure ML pipeline steps utilize.
The MLOpsPython template creates an Azure Machine Learning (ML) pipeline that invokes a set of [Azure ML pipeline steps](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps) (see `ml_service/pipelines/[project name]_build_train_pipeline.py`). If your experiment is currently in a Jupyter notebook, it will need to be refactored into scripts that can be run independently and dropped into the template which the existing Azure ML pipeline steps utilize.
1. Refactor your experiment code into scripts
1. [Recommended] Prepare unit tests

Просмотреть файл

@ -1,11 +1,12 @@
# Getting Started with MLOpsPython <!-- omit in toc -->
This guide shows how to get MLOpsPython working with a sample ML project ***diabetes_regression***. The project creates a linear regression model to predict diabetes. You can adapt this example to use with your own project.
This guide shows how to get MLOpsPython working with a sample ML project ***diabetes_regression***. The project creates a linear regression model to predict diabetes and has CI/CD DevOps practices enabled for model training and serving when these steps are completed in this getting started guide.
We recommend working through this guide completely to ensure everything is working in your environment. After the sample is working, follow the [bootstrap instructions](../bootstrap/README.md) to convert the ***diabetes_regression*** sample into a starting point for your project.
If you would like to bring your own model code to use this template structure, follow the [custom model](custom_model.md) guide. We recommend completing this getting started guide with the diabetes model through ACI deployment first to ensure everything is working in your environment before converting the template to use your own model code.
- [Setting up Azure DevOps](#setting-up-azure-devops)
- [Install the Azure Machine Learning extension](#install-the-azure-machine-learning-extension)
- [Get the code](#get-the-code)
- [Create a Variable Group for your Pipeline](#create-a-variable-group-for-your-pipeline)
- [Variable Descriptions](#variable-descriptions)
@ -33,6 +34,12 @@ You'll use Azure DevOps for running the multi-stage pipeline with build, model t
If you already have an Azure DevOps organization, create a new project using the guide at [Create a project in Azure DevOps and TFS](https://docs.microsoft.com/en-us/azure/devops/organizations/projects/create-project?view=azure-devops).
### Install the Azure Machine Learning extension
Install the **Azure Machine Learning** extension to your Azure DevOps organization from the [Visual Studio Marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml).
This extension contains the Azure ML pipeline tasks and adds the ability to create Azure ML Workspace service connections.
## Get the code
We recommend using the [repository template](https://github.com/microsoft/MLOpsPython/generate), which effectively forks the repository to your own GitHub location and squashes the history. You can use the resulting repository for this guide and for your own experimentation.
@ -118,8 +125,6 @@ Check that the newly created resources appear in the [Azure Portal](https://port
At this point, you should have an Azure ML Workspace created. Similar to the Azure Resource Manager service connection, you need to create an additional one for the Azure ML Workspace.
Install the **Azure Machine Learning** extension to your Azure DevOps organization from the [Visual Studio Marketplace](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml). The extension is required for the service connection.
Create a new service connection to your Azure ML Workspace using the [Machine Learning Extension](https://marketplace.visualstudio.com/items?itemName=ms-air-aiagility.vss-services-azureml) instructions to enable executing the Azure ML training pipeline. The connection name needs to match `WORKSPACE_SVC_CONNECTION` that you set in the variable group above.
![Created resources](./images/ml-ws-svc-connection.png)
@ -127,25 +132,30 @@ Create a new service connection to your Azure ML Workspace using the [Machine Le
**Note:** Similar to the Azure Resource Manager service connection you created earlier, creating a service connection with Azure Machine Learning workspace scope requires 'Owner' or 'User Access Administrator' permissions on the Workspace.
You'll need sufficient permissions to register an application with your Azure AD tenant, or you can get the ID and secret of a service principal from your Azure AD Administrator. That principal must have Contributor permissions on the Azure ML Workspace.
## Set up Build, Release Trigger, and Release Multi-Stage Pipeline
## Set up Build, Release Trigger, and Release Multi-Stage Pipelines
Now that you've provisioned all the required Azure resources and service connections, you can set up the pipeline for deploying your machine learning model to production. The pipeline has a sequence of stages for:
Now that you've provisioned all the required Azure resources and service connections, you can set up the pipelines for training (CI) and deploying (CD) your machine learning model to production.
1. **Model Code Continuous Integration:** triggered on code changes to master branch on GitHub. Runs linting, unit tests, code coverage and publishes a training pipeline.
1. **Train Model**: invokes the Azure ML service to trigger the published training pipeline to train, evaluate, and register a model.
1. **Release Deployment:** deploys a model to either [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/), [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service), or [Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service) environments. For simplicity, you're going to initially focus on Azure Container Instances. See [Further Exploration](#further-exploration) for other deployment types.
1. **Model CI, training, evaluation, and registration** - triggered on code changes to master branch on GitHub. Runs linting, unit tests, code coverage, and publishes and runs the training pipeline. If a new model is registered after evaluation, it creates a build artifact containing the JSON metadata of the model. Definition: [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml).
1. **Release deployment** - consumes the artifact of the previous pipeline and deploys a model to either [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/), [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service), or [Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service) environments. See [Further Exploration](#further-exploration) for other deployment types. Definition: [diabetes_regression-cd.yml](../.pipelines/diabetes_regression-cd.yml).
1. **Note:** Edit the pipeline definition to remove unused stages. For example, if you're deploying to Azure Container Instances and Azure Kubernetes Service only, delete the unused `Deploy_Webapp` stage.
### Set up the Pipeline
These pipelines use a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The container image ***mcr.microsoft.com/mlops/python:latest*** is built with [this Dockerfile](../environment_setup/Dockerfile) and has all the necessary dependencies installed for MLOpsPython and ***diabetes_regression***. This image is an example of a custom Docker image with a pre-baked environment. The environment is guaranteed to be the same on any building agent, VM, or local machine. In your project, you'll want to build your own Docker image that only contains the dependencies and tools required for your use case. Your image will probably be smaller and faster, and it will be maintained by your team.
### Set up the Model CI, training, evaluation, and registration pipeline
In your Azure DevOps project, create and run a new build pipeline based on the [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml)
pipeline definition in your forked repository.
![Configure CI build pipeline](./images/ci-build-pipeline-configure.png)
If you plan to use the release deployment pipeline (in the next section), you will need to rename this pipeline to `Model-Train-Register-CI`.
Once the pipeline is finished, check the execution result:
![Build](./images/multi-stage-aci.png)
![Build](./images/model-train-register.png)
And the pipeline artifacts:
![Build](./images/model-train-register-artifacts.png)
Also check the published training pipeline in the **mlops-AML-WS** workspace in [Azure Portal](https://portal.azure.com/):
@ -153,6 +163,12 @@ Also check the published training pipeline in the **mlops-AML-WS** workspace in
Great, you now have the build pipeline set up which automatically triggers every time there's a change in the master branch!
After the pipeline is finished, you'll see a new model in the **ML Workspace**:
![Trained model](./images/trained-model.png)
To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\diabetes_regression-ci.yml` pipeline to `false`. You can also override the variable at runtime execution of the pipeline.
The pipeline stages are summarized below:
#### Model CI
@ -168,28 +184,64 @@ The pipeline stages are summarized below:
- This is an **agentless** job. The CI pipeline can wait for ML pipeline completion for hours or even days without using agent resources.
- Determine if a new model was registered by the *ML Training Pipeline*.
- If the model evaluation determines that the new model doesn't perform any better than the previous one, the new model won't register and the *ML Training Pipeline* will be **canceled**. In this case, you'll see a message in the 'Train Model' job under the 'Determine if evaluation succeeded and new model is registered' step saying '**Model was not registered for this run.**'
- See [evaluate_model.py](../diabetes_regression/evaluate/evaluate_model.py#L118) for the evaluation logic and [diabetes_regression_verify_train_pipeline.py](../ml_service/pipelines/diabetes_regression_verify_train_pipeline.py#L54) for the ML pipeline reporting logic.
- See [evaluate_model.py](../diabetes_regression/evaluate/evaluate_model.py#L118) for the evaluation logic.
- [Additional Variables and Configuration](#additional-variables-and-configuration) for configuring this and other behavior.
#### Create pipeline artifact
- Get the info about the registered model
- Create a pipeline artifact called `model` that contains a `model.json` file containing the model information.
### Set up the Release deployment pipeline
---
**PREREQUISITE**
In order to use this pipeline:
1. Follow the steps to set up the Model CI, training, evaluation, and registration pipeline.
1. You **must** rename your model CI/train/eval/register pipeline to `Model-Train-Register-CI`.
The release deploymment pipeline relies on the model CI pipeline and references it by name.
---
In this section, we will set up the pipeline for release deployment to ACI, AKS, or Webapp. This pipeline uses the resulting artifact of the [Model-Train-Register-CI pipeline](#) to identify the model to be deployed.
This pipeline has the following behaviors:
- The pipeline will **automatically trigger** on completion of the Model-Train-Register-CI pipeline
- The pipeline will default to using the latest successful build of the Model-Train-Register-CI pipeline. It will deploy the model produced by that build.
- You can specify a `Model-Train-Register-CI` build ID when running the pipeline manually. You can find this in the url of the build, and the model registered from that build will also be tagged with the build ID. This is useful to skip model training and registration, and deploy a model successfully registered by a `Model-Train-Register-CI` build.
In your Azure DevOps project, create and run a new build pipeline based on the [diabetes_regression-cd.yml](../.pipelines/diabetes_regression-cd.yml)
pipeline definition in your forked repository.
Your first run will use the latest model created by the `Model-Train-Register-CI` pipeline.
Once the pipeline is finished, check the execution result:
![Build](./images/model-deploy-result.png)
To specify a particular build's model, set the `Model Train CI Build Id` parameter to the build Id you would like to use.
![Build](./images/model-deploy-configure.png)
Once your pipeline run begins, you can see the model name and version downloaded from the `Model-Train-Register-CI` pipeline.
![Build](./images/model-deploy-artifact-logs.png)
The pipeline has the following stage:
#### Deploy to ACI
- Deploy the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/).
- Smoke test
- The test sends a sample query to the scoring web service and verifies that it returns the expected response. Have a look at the [smoke test code](../ml_service/util/smoke_test_scoring_service.py) for an example.
The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The container image ***mcr.microsoft.com/mlops/python:latest*** is built with [this Dockerfile](../environment_setup/Dockerfile) and has all the necessary dependencies installed for MLOpsPython and ***diabetes_regression***. This image is an example of a custom Docker image with a pre-baked environment. The environment is guaranteed to be the same on any building agent, VM, or local machine. In your project, you'll want to build your own Docker image that only contains the dependencies and tools required for your use case. Your image will probably be smaller and faster, and it will be maintained by your team.
After the pipeline is finished, you'll see a new model in the **ML Workspace**:
![Trained model](./images/trained-model.png)
To disable the automatic trigger of the training pipeline, change the `auto-trigger-training` variable as listed in the `.pipelines\diabetes_regression-ci.yml` pipeline to `false`. You can also override the variable at runtime execution of the pipeline.
To skip model training and registration, and deploy a model successfully registered by a previous build (for testing changes to the score file or inference configuration), add the variable `MODEL_BUILD_ID` when the pipeline is queued, and set the value to the ID of the previous build.
## Further Exploration
You should now have a working pipeline that can get you started with MLOpsPython. Below are some additional features offered that might suit your scenario.
You should now have a working set of pipelines that can get you started with MLOpsPython. Below are some additional features offered that might suit your scenario.
### Deploy the model to Azure Kubernetes Service

Просмотреть файл

@ -1,113 +0,0 @@
# CI/CD pipelines for model train/register and deployment
Follow this guide to set up two separate pipelines to train/register models and deploy models. This set of pipelines is functionally similar to the [diabetes_regression-ci.yml](../.pipelines/diabetes_regression-ci.yml) pipeline in the [getting started](getting_started.md) guide.
1. **Model CI, training, evaluation, and registration** - triggered on code changes to master branch on GitHub. Runs linting, unit tests, code coverage, and publishes and runs the training pipeline. If a new model is registered after evaluation, it creates a build artifact containing the JSON metadata of the model. Definition: [diabetes_regression-train-register.yml](../.pipelines/diabetes_regression-train-register.yml).
1. **Release deployment** - consumes the artifact of the previous pipeline and deploys a model to either [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/), [Azure Kubernetes Service (AKS)](https://azure.microsoft.com/en-us/services/kubernetes-service), or [Azure App Service](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-app-service) environments. Definition: [diabetes_regression-deploy.yml](../.pipelines/diabetes_regression-deploy.yml).
## Prerequisites
---
It is recommended to go through the [getting started guide](getting_started.md) before starting this guide.
---
Before continuing this guide, you will need:
- An existing workspace. To setup your environment with a new workspace, follow the steps here.
- An Azure DevOps Service Connection with your Azure ML Workspace.
- A variable group named **``devopsforai-aml-vg``** with the required variables set.
## Model CI, training, evaluation, and registration pipeline
In this section, we will create the pipeline that will perform model IC, training, evaluation, and registration.
### Set up the Pipeline
In your Azure DevOps project, create and run a new build pipeline based on the [diabetes_regression-ci-train-register.yml](../.pipelines/diabetes_regression-ci-train-register.yml)
pipeline definition in your forked repository.
If you plan to use the release deployment pipeline (in the next section), you will need to rename this pipeline to `Model-Train-Register-CI`.
Once the pipeline is finished, check the execution result:
![Build](./images/model-train-register.png)
And the pipeline artifacts:
![Build](./images/model-train-register-artifacts.png)
The pipeline stages are summarized below:
#### Model CI
- Linting (code quality analysis)
- Unit tests and code coverage analysis
- Build and publish *ML Training Pipeline* in an *ML Workspace*
#### Train model
- Determine the ID of the *ML Training Pipeline* published in the previous stage.
- Trigger the *ML Training Pipeline* and waits for it to complete.
- This is an **agentless** job. The CI pipeline can wait for ML pipeline completion for hours or even days without using agent resources.
- Determine if a new model was registered by the *ML Training Pipeline*.
- If the model evaluation determines that the new model doesn't perform any better than the previous one, the new model won't register and the *ML Training Pipeline* will be **canceled**. In this case, you'll see a message in the 'Train Model' job under the 'Determine if evaluation succeeded and new model is registered' step saying '**Model was not registered for this run.**'
- See [evaluate_model.py](../diabetes_regression/evaluate/evaluate_model.py#L118) for the evaluation logic.
- [Additional Variables and Configuration](getting_started.md#additional-variables-and-configuration) for configuring this and other behavior.
#### Create pipeline artifact
- Get the info about the registered model
- Create a pipeline artifact called `model` that contains a `model.json` file containing the model information.
## Release deployment pipeline
---
**PREREQUISITE**
In order to use this pipeline:
1. Follow the steps to set up the Model CI, training, evaluation, and registration pipeline.
1. You **must** rename your model CI/train/eval/register pipeline to `Model-Train-Register-CI`.
The release deploymment pipeline relies on the model CI pipeline and references it by name.
---
In this section, we will set up the pipeline for release deployment to ACI, AKS, or Webapp. This pipeline uses the resulting artifact of the [Model-Train-Register-CI pipeline](#) to identify the model to be deployed.
This pipeline has the following behaviors:
- The pipeline will **automatically trigger** on completion of the Model-Train-Register-CI pipeline
- The pipeline will default to using the latest successful build of the Model-Train-Register-CI pipeline. It will deploy the model produced by that build.
- You can specify a `Model-Train-Register-CI` build ID when running the pipeline manually. You can find this in the url of the build, and the model registered from that build will also be tagged with the build ID.
### Set up the pipeline
In your Azure DevOps project, create and run a new build pipeline based on the [diabetes_regression-cd-deploy.yml](../.pipelines/diabetes_regression-cd-deploy.yml)
pipeline definition in your forked repository.
Your first run will use the latest model created by the `Model-Train-Register-CI` pipeline.
Once the pipeline is finished, check the execution result:
![Build](./images/model-deploy-result.png)
To specify a particular build's model, set the `Model Train CI Build Id` parameter to the build Id you would like to use.
![Build](./images/model-deploy-configure.png)
Once your pipeline run begins, you can see the model name and version downloaded from the `Model-Train-Register-CI` pipeline.
![Build](./images/model-deploy-artifact-logs.png)
The pipeline has the following stage:
#### Deploy to ACI
- Deploy the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/).
- Smoke test
- The test sends a sample query to the scoring web service and verifies that it returns the expected response. Have a look at the [smoke test code](../ml_service/util/smoke_test_scoring_service.py) for an example.
See [Further Exploration](getting_started.md#further-exploration) to learn about other deployment targets.