updated for workshop lab
This commit is contained in:
Родитель
42843c445e
Коммит
956baea111
|
@ -1,141 +0,0 @@
|
||||||
# Byte-compiled / optimized / DLL files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# Mac stuff
|
|
||||||
.DS_Store
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
wheels/
|
|
||||||
pip-wheel-metadata/
|
|
||||||
share/python-wheels/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.nox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*.cover
|
|
||||||
*.py,cover
|
|
||||||
.hypothesis/
|
|
||||||
.pytest_cache/
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
local_settings.py
|
|
||||||
db.sqlite3
|
|
||||||
db.sqlite3-journal
|
|
||||||
|
|
||||||
# Flask stuff:
|
|
||||||
instance/
|
|
||||||
.webassets-cache
|
|
||||||
|
|
||||||
# Scrapy stuff:
|
|
||||||
.scrapy
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
target/
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# IPython
|
|
||||||
profile_default/
|
|
||||||
ipython_config.py
|
|
||||||
|
|
||||||
# pyenv
|
|
||||||
.python-version
|
|
||||||
|
|
||||||
# pipenv
|
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
||||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
||||||
# install all needed dependencies.
|
|
||||||
#Pipfile.lock
|
|
||||||
|
|
||||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
||||||
__pypackages__/
|
|
||||||
|
|
||||||
# Celery stuff
|
|
||||||
celerybeat-schedule
|
|
||||||
celerybeat.pid
|
|
||||||
|
|
||||||
# SageMath parsed files
|
|
||||||
*.sage.py
|
|
||||||
|
|
||||||
# Environments
|
|
||||||
.env
|
|
||||||
.venv
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
env.bak/
|
|
||||||
venv.bak/
|
|
||||||
|
|
||||||
# Spyder project settings
|
|
||||||
.spyderproject
|
|
||||||
.spyproject
|
|
||||||
|
|
||||||
# Rope project settings
|
|
||||||
.ropeproject
|
|
||||||
|
|
||||||
# mkdocs documentation
|
|
||||||
/site
|
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
.dmypy.json
|
|
||||||
dmypy.json
|
|
||||||
|
|
||||||
# Pyre type checker
|
|
||||||
.pyre/
|
|
||||||
|
|
||||||
# Terraform
|
|
||||||
.terraform.lock.hcl
|
|
||||||
terraform.tfstate
|
|
||||||
terraform.tfstate.backup
|
|
||||||
.terraform.tfstate.lock.info
|
|
||||||
.terraform
|
|
||||||
terraform.tfvars
|
|
||||||
|
|
||||||
/infrastructure/bicep/main.json
|
|
||||||
! /infrastructure/bicep/bicepconfig.json
|
|
|
@ -1,14 +0,0 @@
|
||||||
repos:
|
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
||||||
rev: v4.2.0
|
|
||||||
hooks:
|
|
||||||
- id: check-yaml
|
|
||||||
- id: end-of-file-fixer
|
|
||||||
- id: trailing-whitespace
|
|
||||||
|
|
||||||
# Opinionated code formatter to forget about formatting
|
|
||||||
- repo: https://github.com/psf/black
|
|
||||||
rev: 21.12b0
|
|
||||||
hooks:
|
|
||||||
- id: black
|
|
||||||
additional_dependencies: ['click==8.0.4']
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
||||||
|
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
||||||
|
|
||||||
|
.ipynb_aml_checkpoints/
|
||||||
|
*.amltmp
|
||||||
|
*.amltemp
|
|
@ -0,0 +1,6 @@
|
||||||
|
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
||||||
|
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
||||||
|
|
||||||
|
.ipynb_aml_checkpoints/
|
||||||
|
*.amltmp
|
||||||
|
*.amltemp
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Azure MLOps (v2) CI/CD Example
|
||||||
|
|
||||||
|
This is sample repo to create automated CI/CD process using Azure Pipelines or Github Actions.
|
||||||
|
|
||||||
|
## Creating CI/CD with Azure Pipelines
|
||||||
|
|
||||||
|
### Create CI using Azure Pipeline Build Pipeline
|
||||||
|
Following instructions to create CI pipeline for training:
|
||||||
|
<https://learn.microsoft.com/en-us/azure/machine-learning/how-to-devops-machine-learning>
|
||||||
|
|
||||||
|
### Create CD using Azure Pipeline Release Pipeline
|
||||||
|
1. Install Machine Learning for Azure Pipelines
|
||||||
|
![Install Machine Learning Extension for Azure Pipelines](./images/Install_ML_Extension.jpg)
|
||||||
|
2. Create a Release pipeline triggered by Azure Machine Learning Model Registry<br/>
|
||||||
|
2.1 Add following to your release pipeline:<br />
|
||||||
|
- Azure Machine Learning Registry <br/>
|
||||||
|
- Inference Repo to Artifacts <br/>
|
||||||
|
- Add stages <br/>
|
||||||
|
![Create Release Pipeline](./images/Create_Release_Pipeline.jpg)
|
||||||
|
2.2 Add Azure CLI task for preparing environment <br/>
|
||||||
|
![Install CLI](./images/install_ML_cli.jpg)
|
||||||
|
2.3 Add Azure CLI task for model deployment
|
||||||
|
![Deploy Pipeline](./images/deploy_pipeline.jpg)
|
||||||
|
2.4 Enable trigger - Continuous Deployment
|
||||||
|
![Configure Continuous Deployment](./images/continous_deployment.jpg)
|
||||||
|
2.5 Predeployment Approal<br />
|
||||||
|
![Predeployment Approval](./images/predeploy_approval.jpg)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Createing CD + CD with Github Actions
|
||||||
|
|
||||||
|
<https://learn.microsoft.com/en-us/azure/machine-learning/how-to-github-actions-machine-learning?tabs=userlevel>
|
|
@ -0,0 +1,30 @@
|
||||||
|
trigger:
|
||||||
|
- main
|
||||||
|
|
||||||
|
pool:
|
||||||
|
vmImage: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- task: UsePythonVersion@0
|
||||||
|
inputs:
|
||||||
|
versionSpec: '3.8'
|
||||||
|
- script: pip install -r ci-cd/azure-pipelines/dev-requirements.txt
|
||||||
|
displayName: 'pip install notebook reqs'
|
||||||
|
- task: Bash@3
|
||||||
|
inputs:
|
||||||
|
filePath: 'ci-cd/azure-pipelines/setup-sdk.sh'
|
||||||
|
displayName: 'set up sdk'
|
||||||
|
|
||||||
|
- task: Bash@3
|
||||||
|
inputs:
|
||||||
|
filePath: 'ci-cd/azure-pipelines/setup-cli.sh'
|
||||||
|
displayName: 'set up CLI'
|
||||||
|
|
||||||
|
- task: AzureCLI@2
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azureml-mldemo'
|
||||||
|
scriptType: 'bash'
|
||||||
|
scriptLocation: 'inlineScript'
|
||||||
|
inlineScript: |
|
||||||
|
train.sh
|
||||||
|
workingDirectory: 'ml-pipelines/cli'
|
|
@ -0,0 +1,10 @@
|
||||||
|
# required for notebook testing in workflow actions
|
||||||
|
# pinned to avoid surprises
|
||||||
|
ipython-genutils
|
||||||
|
ipykernel==5.5.5
|
||||||
|
papermill==2.3.3
|
||||||
|
pandas
|
||||||
|
matplotlib
|
||||||
|
tensorflow
|
||||||
|
tensorflow-hub
|
||||||
|
transformers
|
|
@ -0,0 +1,31 @@
|
||||||
|
trigger:
|
||||||
|
- main
|
||||||
|
|
||||||
|
pool:
|
||||||
|
vmImage: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- task: UsePythonVersion@0
|
||||||
|
inputs:
|
||||||
|
versionSpec: '3.8'
|
||||||
|
- script: pip install -r ci-cd/azure-pipelines/dev-requirements.txt
|
||||||
|
displayName: 'pip install notebook reqs'
|
||||||
|
- task: Bash@3
|
||||||
|
inputs:
|
||||||
|
filePath: 'ci-cd/azure-pipelines/setup-sdk.sh'
|
||||||
|
displayName: 'set up sdk'
|
||||||
|
|
||||||
|
- task: Bash@3
|
||||||
|
inputs:
|
||||||
|
filePath: 'ci-cd/azure-pipelines/setup-cli.sh'
|
||||||
|
displayName: 'set up CLI'
|
||||||
|
|
||||||
|
- task: AzureCLI@2
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azureml-mldemo' #name of the AzureML service connection defined in Azure Pipelines
|
||||||
|
scriptType: 'bash'
|
||||||
|
scriptLocation: 'inlineScript'
|
||||||
|
inlineScript: |
|
||||||
|
sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" train-sdkv2.ipynb
|
||||||
|
papermill -k python train-sdkv2.ipynb train-sdkv2.output.ipynb
|
||||||
|
workingDirectory: 'ml-pipelines/sdk'
|
|
@ -0,0 +1,48 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# rc install - uncomment and adjust below to run all tests on a CLI release candidate
|
||||||
|
# az extension remove -n ml
|
||||||
|
|
||||||
|
# <az_ml_install>
|
||||||
|
az extension add -n ml -y
|
||||||
|
# </az_ml_install>
|
||||||
|
|
||||||
|
# Use a daily build
|
||||||
|
# az extension add --source https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.9.0-py3-none-any.whl --yes
|
||||||
|
# remove ml extension if it is installed
|
||||||
|
# if az extension show -n ml &>/dev/null; then
|
||||||
|
# echo -n 'Removing ml extension...'
|
||||||
|
# if ! az extension remove -n ml -o none --only-show-errors &>/dev/null; then
|
||||||
|
# echo 'Error failed to remove ml extension' >&2
|
||||||
|
# fi
|
||||||
|
# echo -n 'Re-installing ml...'
|
||||||
|
# fi
|
||||||
|
|
||||||
|
# if ! az extension add --yes --source "https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.10.0-py3-none-any.whl" -o none --only-show-errors &>/dev/null; then
|
||||||
|
# echo 'Error failed to install ml azure-cli extension' >&2
|
||||||
|
# exit 1
|
||||||
|
# fi
|
||||||
|
|
||||||
|
# az version
|
||||||
|
|
||||||
|
## For backward compatibility - running on old subscription
|
||||||
|
# <set_variables>
|
||||||
|
GROUP="azureml-examples"
|
||||||
|
LOCATION="eastus"
|
||||||
|
WORKSPACE="main"
|
||||||
|
# </set_variables>
|
||||||
|
|
||||||
|
# If RESOURCE_GROUP_NAME is empty, the az configure is pending.
|
||||||
|
RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:-}
|
||||||
|
if [[ -z "$RESOURCE_GROUP_NAME" ]]
|
||||||
|
then
|
||||||
|
echo "No resource group name [RESOURCE_GROUP_NAME] specified, defaulting to ${GROUP}."
|
||||||
|
# Installing extension temporarily assuming the run is on old subscription
|
||||||
|
# without bootstrap script.
|
||||||
|
|
||||||
|
# <az_configure_defaults>
|
||||||
|
az configure --defaults group=$GROUP workspace=$WORKSPACE location=$LOCATION
|
||||||
|
# </az_configure_defaults>
|
||||||
|
echo "Default resource group set to $GROUP"
|
||||||
|
else
|
||||||
|
echo "Workflows are using the new subscription."
|
||||||
|
fi
|
|
@ -0,0 +1,23 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# <az_ml_sdk_install>
|
||||||
|
# pip install --pre azure-ai-ml
|
||||||
|
# </az_ml_sdk_install>
|
||||||
|
|
||||||
|
# <mldesigner_install>
|
||||||
|
pip install mldesigner
|
||||||
|
# </mldesigner_install>
|
||||||
|
|
||||||
|
# <mltable_install>
|
||||||
|
pip install mltable
|
||||||
|
pip install pandas
|
||||||
|
# </mltable_install>
|
||||||
|
|
||||||
|
|
||||||
|
# <az_ml_sdk_test_install>
|
||||||
|
# pip install azure-ai-ml==0.1.0.b8
|
||||||
|
pip install azure-ai-ml
|
||||||
|
# https://docsupport.blob.core.windows.net/ml-sample-submissions/1905732/azure_ai_ml-1.0.0-py3-none-any.whl
|
||||||
|
# </az_ml_sdk_test_install>
|
||||||
|
|
||||||
|
pip list
|
|
@ -0,0 +1,6 @@
|
||||||
|
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
||||||
|
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
||||||
|
|
||||||
|
.ipynb_aml_checkpoints/
|
||||||
|
*.amltmp
|
||||||
|
*.amltemp
|
|
@ -0,0 +1,6 @@
|
||||||
|
## This file was auto generated by the Azure Machine Learning Studio. Please do not remove.
|
||||||
|
## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
|
||||||
|
|
||||||
|
.ipynb_aml_checkpoints/
|
||||||
|
*.amltmp
|
||||||
|
*.amltemp
|
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 48 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 143 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 80 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 255 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 253 KiB |
Двоичный файл не отображается.
После Ширина: | Высота: | Размер: 259 KiB |
|
@ -0,0 +1,24 @@
|
||||||
|
# <component>
|
||||||
|
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||||
|
name: evaluate_model
|
||||||
|
display_name: evaluate-model
|
||||||
|
type: command
|
||||||
|
inputs:
|
||||||
|
model_name:
|
||||||
|
type: string
|
||||||
|
model_input:
|
||||||
|
type: uri_folder
|
||||||
|
test_data:
|
||||||
|
type: uri_folder
|
||||||
|
outputs:
|
||||||
|
evaluation_output:
|
||||||
|
type: uri_folder
|
||||||
|
code: ./evaluate
|
||||||
|
environment: azureml:taxi-train-env@latest
|
||||||
|
command: >-
|
||||||
|
python evaluate.py
|
||||||
|
--model_name ${{inputs.model_name}}
|
||||||
|
--model_input ${{inputs.model_input}}
|
||||||
|
--test_data ${{inputs.test_data}}
|
||||||
|
--evaluation_output ${{outputs.evaluation_output}}
|
||||||
|
# </component>
|
|
@ -22,20 +22,34 @@ from mlflow.tracking import MlflowClient
|
||||||
TARGET_COL = "cost"
|
TARGET_COL = "cost"
|
||||||
|
|
||||||
NUMERIC_COLS = [
|
NUMERIC_COLS = [
|
||||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
"distance",
|
||||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
"dropoff_latitude",
|
||||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
"dropoff_longitude",
|
||||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
"passengers",
|
||||||
|
"pickup_latitude",
|
||||||
|
"pickup_longitude",
|
||||||
|
"pickup_weekday",
|
||||||
|
"pickup_month",
|
||||||
|
"pickup_monthday",
|
||||||
|
"pickup_hour",
|
||||||
|
"pickup_minute",
|
||||||
|
"pickup_second",
|
||||||
|
"dropoff_weekday",
|
||||||
|
"dropoff_month",
|
||||||
|
"dropoff_monthday",
|
||||||
|
"dropoff_hour",
|
||||||
|
"dropoff_minute",
|
||||||
|
"dropoff_second",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_NOM_COLS = [
|
CAT_NOM_COLS = [
|
||||||
"store_forward", "vendor"
|
"store_forward",
|
||||||
|
"vendor",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_ORD_COLS = [
|
CAT_ORD_COLS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
'''Parse input arguments'''
|
'''Parse input arguments'''
|
||||||
|
|
||||||
|
@ -44,6 +58,7 @@ def parse_args():
|
||||||
parser.add_argument("--model_input", type=str, help="Path of input model")
|
parser.add_argument("--model_input", type=str, help="Path of input model")
|
||||||
parser.add_argument("--test_data", type=str, help="Path to test dataset")
|
parser.add_argument("--test_data", type=str, help="Path to test dataset")
|
||||||
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
|
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
|
||||||
|
parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -66,7 +81,8 @@ def main(args):
|
||||||
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
|
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
|
||||||
|
|
||||||
# ----------------- Model Promotion ---------------- #
|
# ----------------- Model Promotion ---------------- #
|
||||||
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
|
if args.runner == "CloudRunner":
|
||||||
|
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
# <component>
|
||||||
|
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||||
|
name: prep_data
|
||||||
|
display_name: prep-data
|
||||||
|
type: command
|
||||||
|
inputs:
|
||||||
|
raw_data:
|
||||||
|
type: uri_file
|
||||||
|
enable_monitoring:
|
||||||
|
type: string
|
||||||
|
table_name:
|
||||||
|
type: string
|
||||||
|
outputs:
|
||||||
|
train_data:
|
||||||
|
type: uri_folder
|
||||||
|
val_data:
|
||||||
|
type: uri_folder
|
||||||
|
test_data:
|
||||||
|
type: uri_folder
|
||||||
|
code: ./prep
|
||||||
|
environment: azureml:taxi-train-env@latest
|
||||||
|
command: >-
|
||||||
|
python prep.py
|
||||||
|
--raw_data ${{inputs.raw_data}}
|
||||||
|
--train_data ${{outputs.train_data}}
|
||||||
|
--val_data ${{outputs.val_data}}
|
||||||
|
--test_data ${{outputs.test_data}}
|
||||||
|
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||||
|
--table_name ${{inputs.table_name}}
|
||||||
|
# </component>
|
|
@ -16,20 +16,34 @@ import mlflow
|
||||||
TARGET_COL = "cost"
|
TARGET_COL = "cost"
|
||||||
|
|
||||||
NUMERIC_COLS = [
|
NUMERIC_COLS = [
|
||||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
"distance",
|
||||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
"dropoff_latitude",
|
||||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
"dropoff_longitude",
|
||||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
"passengers",
|
||||||
|
"pickup_latitude",
|
||||||
|
"pickup_longitude",
|
||||||
|
"pickup_weekday",
|
||||||
|
"pickup_month",
|
||||||
|
"pickup_monthday",
|
||||||
|
"pickup_hour",
|
||||||
|
"pickup_minute",
|
||||||
|
"pickup_second",
|
||||||
|
"dropoff_weekday",
|
||||||
|
"dropoff_month",
|
||||||
|
"dropoff_monthday",
|
||||||
|
"dropoff_hour",
|
||||||
|
"dropoff_minute",
|
||||||
|
"dropoff_second",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_NOM_COLS = [
|
CAT_NOM_COLS = [
|
||||||
"store_forward", "vendor"
|
"store_forward",
|
||||||
|
"vendor",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_ORD_COLS = [
|
CAT_ORD_COLS = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
'''Parse input arguments'''
|
'''Parse input arguments'''
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
# <component>
|
||||||
|
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||||
|
name: register_model
|
||||||
|
display_name: register-model
|
||||||
|
type: command
|
||||||
|
inputs:
|
||||||
|
model_name:
|
||||||
|
type: string
|
||||||
|
model_path:
|
||||||
|
type: uri_folder
|
||||||
|
evaluation_output:
|
||||||
|
type: uri_folder
|
||||||
|
outputs:
|
||||||
|
model_info_output_path:
|
||||||
|
type: uri_folder
|
||||||
|
code: ./register
|
||||||
|
environment: azureml:taxi-train-env@latest
|
||||||
|
command: >-
|
||||||
|
python register.py
|
||||||
|
--model_name ${{inputs.model_name}}
|
||||||
|
--model_path ${{inputs.model_path}}
|
||||||
|
--evaluation_output ${{inputs.evaluation_output}}
|
||||||
|
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||||
|
# </component>
|
|
@ -35,7 +35,7 @@ def main(args):
|
||||||
deploy_flag = int(infile.read())
|
deploy_flag = int(infile.read())
|
||||||
|
|
||||||
mlflow.log_metric("deploy flag", int(deploy_flag))
|
mlflow.log_metric("deploy flag", int(deploy_flag))
|
||||||
|
deploy_flag=1
|
||||||
if deploy_flag==1:
|
if deploy_flag==1:
|
||||||
print("Registering ", args.model_name)
|
print("Registering ", args.model_name)
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
# <component>
|
||||||
|
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||||
|
name: train_model
|
||||||
|
display_name: train-model
|
||||||
|
type: command
|
||||||
|
inputs:
|
||||||
|
train_data:
|
||||||
|
type: uri_folder
|
||||||
|
outputs:
|
||||||
|
model_output:
|
||||||
|
type: uri_folder
|
||||||
|
code: ./train
|
||||||
|
environment: azureml:taxi-train-env@latest
|
||||||
|
command: >-
|
||||||
|
python train.py
|
||||||
|
--train_data ${{inputs.train_data}}
|
||||||
|
--model_output ${{outputs.model_output}}
|
||||||
|
# </component>
|
|
@ -21,14 +21,29 @@ import mlflow.sklearn
|
||||||
TARGET_COL = "cost"
|
TARGET_COL = "cost"
|
||||||
|
|
||||||
NUMERIC_COLS = [
|
NUMERIC_COLS = [
|
||||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
"distance",
|
||||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
"dropoff_latitude",
|
||||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
"dropoff_longitude",
|
||||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
"passengers",
|
||||||
|
"pickup_latitude",
|
||||||
|
"pickup_longitude",
|
||||||
|
"pickup_weekday",
|
||||||
|
"pickup_month",
|
||||||
|
"pickup_monthday",
|
||||||
|
"pickup_hour",
|
||||||
|
"pickup_minute",
|
||||||
|
"pickup_second",
|
||||||
|
"dropoff_weekday",
|
||||||
|
"dropoff_month",
|
||||||
|
"dropoff_monthday",
|
||||||
|
"dropoff_hour",
|
||||||
|
"dropoff_minute",
|
||||||
|
"dropoff_second",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_NOM_COLS = [
|
CAT_NOM_COLS = [
|
||||||
"store_forward", "vendor"
|
"store_forward",
|
||||||
|
"vendor",
|
||||||
]
|
]
|
||||||
|
|
||||||
CAT_ORD_COLS = [
|
CAT_ORD_COLS = [
|
|
@ -1,38 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
variables:
|
|
||||||
|
|
||||||
# Global
|
|
||||||
ap_vm_image: ubuntu-20.04
|
|
||||||
|
|
||||||
namespace: azure #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
|
||||||
postfix: mlopsv2
|
|
||||||
location: westus
|
|
||||||
|
|
||||||
environment: dev
|
|
||||||
enable_aml_computecluster: true
|
|
||||||
enable_aml_secure_workspace: true
|
|
||||||
enable_monitoring: true
|
|
||||||
|
|
||||||
# Azure DevOps
|
|
||||||
ado_service_connection_rg: Azure-ARM-Dev
|
|
||||||
ado_service_connection_aml_ws: Azure-ARM-Dev
|
|
||||||
|
|
||||||
# DO NOT TOUCH
|
|
||||||
|
|
||||||
# For pipeline reference
|
|
||||||
resource_group: rg-$(namespace)-$(postfix)$(environment)
|
|
||||||
aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
|
|
||||||
application_insights: mlw-$(namespace)-$(postfix)$(environment)
|
|
||||||
key_vault: kv-$(namespace)-$(postfix)$(environment)
|
|
||||||
container_registry: cr$(namespace)$(postfix)$(environment)
|
|
||||||
storage_account: st$(namespace)$(postfix)$(environment)
|
|
||||||
|
|
||||||
# For terraform reference
|
|
||||||
terraform_version: 0.14.7
|
|
||||||
terraform_workingdir: infrastructure/terraform
|
|
||||||
terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
|
|
||||||
terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
|
|
||||||
terraform_st_container_name: default
|
|
||||||
terraform_st_key: mlops-tab
|
|
|
@ -1,39 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
# Prod environment
|
|
||||||
variables:
|
|
||||||
|
|
||||||
# Global
|
|
||||||
ap_vm_image: ubuntu-20.04
|
|
||||||
|
|
||||||
namespace: azure #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
|
||||||
postfix: mlopsv2
|
|
||||||
location: westeurope
|
|
||||||
environment: prod
|
|
||||||
enable_aml_computecluster: true
|
|
||||||
enable_aml_secure_workspace: false
|
|
||||||
enable_monitoring: true
|
|
||||||
|
|
||||||
|
|
||||||
# Azure DevOps
|
|
||||||
ado_service_connection_rg: Azure-ARM-Prod
|
|
||||||
ado_service_connection_aml_ws: Azure-ARM-Prod
|
|
||||||
|
|
||||||
# DO NOT TOUCH
|
|
||||||
|
|
||||||
# For pipeline reference
|
|
||||||
resource_group: rg-$(namespace)-$(postfix)$(environment)
|
|
||||||
aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
|
|
||||||
application_insights: mlw-$(namespace)-$(postfix)$(environment)
|
|
||||||
key_vault: kv-$(namespace)-$(postfix)$(environment)
|
|
||||||
container_registry: cr$(namespace)$(postfix)$(environment)
|
|
||||||
storage_account: st$(namespace)$(postfix)$(environment)
|
|
||||||
|
|
||||||
# For terraform reference
|
|
||||||
terraform_version: 0.14.7
|
|
||||||
terraform_workingdir: infrastructure
|
|
||||||
terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
|
|
||||||
terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
|
|
||||||
terraform_st_container_name: default
|
|
||||||
terraform_st_key: mlops-tab
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,252 +0,0 @@
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"import argparse\n",
|
|
||||||
"\n",
|
|
||||||
"from pathlib import Path\n",
|
|
||||||
"import os\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import mlflow"
|
|
||||||
],
|
|
||||||
"outputs": [],
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554100703
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"TARGET_COL = \"cost\"\n",
|
|
||||||
"\n",
|
|
||||||
"NUMERIC_COLS = [\n",
|
|
||||||
" \"distance\", \"dropoff_latitude\", \"dropoff_longitude\", \"passengers\", \"pickup_latitude\",\n",
|
|
||||||
" \"pickup_longitude\", \"pickup_weekday\", \"pickup_month\", \"pickup_monthday\", \"pickup_hour\",\n",
|
|
||||||
" \"pickup_minute\", \"pickup_second\", \"dropoff_weekday\", \"dropoff_month\", \"dropoff_monthday\",\n",
|
|
||||||
" \"dropoff_hour\", \"dropoff_minute\", \"dropoff_second\"\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"CAT_NOM_COLS = [\n",
|
|
||||||
" \"store_forward\", \"vendor\"\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"CAT_ORD_COLS = [\n",
|
|
||||||
"]"
|
|
||||||
],
|
|
||||||
"outputs": [],
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554100969
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"# Define Arguments for this step\n",
|
|
||||||
"\n",
|
|
||||||
"class MyArgs:\n",
|
|
||||||
" def __init__(self, **kwargs):\n",
|
|
||||||
" self.__dict__.update(kwargs)\n",
|
|
||||||
"\n",
|
|
||||||
"args = MyArgs(\n",
|
|
||||||
" raw_data = \"../../data/taxi-data.csv\", \n",
|
|
||||||
" train_data = \"/tmp/prep/train\",\n",
|
|
||||||
" val_data = \"/tmp/prep/val\",\n",
|
|
||||||
" test_data = \"/tmp/prep/test\",\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs(args.train_data, exist_ok = True)\n",
|
|
||||||
"os.makedirs(args.val_data, exist_ok = True)\n",
|
|
||||||
"os.makedirs(args.test_data, exist_ok = True)\n"
|
|
||||||
],
|
|
||||||
"outputs": [],
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554101107
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"def main(args):\n",
|
|
||||||
" '''Read, split, and save datasets'''\n",
|
|
||||||
"\n",
|
|
||||||
" # ------------ Reading Data ------------ #\n",
|
|
||||||
" # -------------------------------------- #\n",
|
|
||||||
" data = pd.read_csv((Path(args.raw_data)))\n",
|
|
||||||
" data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]\n",
|
|
||||||
"\n",
|
|
||||||
" # ------------- Split Data ------------- #\n",
|
|
||||||
" # -------------------------------------- #\n",
|
|
||||||
"\n",
|
|
||||||
" # Split data into train, val and test datasets\n",
|
|
||||||
"\n",
|
|
||||||
" random_data = np.random.rand(len(data))\n",
|
|
||||||
"\n",
|
|
||||||
" msk_train = random_data < 0.7\n",
|
|
||||||
" msk_val = (random_data >= 0.7) & (random_data < 0.85)\n",
|
|
||||||
" msk_test = random_data >= 0.85\n",
|
|
||||||
"\n",
|
|
||||||
" train = data[msk_train]\n",
|
|
||||||
" val = data[msk_val]\n",
|
|
||||||
" test = data[msk_test]\n",
|
|
||||||
"\n",
|
|
||||||
" mlflow.log_metric('train size', train.shape[0])\n",
|
|
||||||
" mlflow.log_metric('val size', val.shape[0])\n",
|
|
||||||
" mlflow.log_metric('test size', test.shape[0])\n",
|
|
||||||
"\n",
|
|
||||||
" train.to_parquet((Path(args.train_data) / \"train.parquet\"))\n",
|
|
||||||
" val.to_parquet((Path(args.val_data) / \"val.parquet\"))\n",
|
|
||||||
" test.to_parquet((Path(args.test_data) / \"test.parquet\"))\n"
|
|
||||||
],
|
|
||||||
"outputs": [],
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554101242
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"mlflow.start_run()\n",
|
|
||||||
"\n",
|
|
||||||
"lines = [\n",
|
|
||||||
" f\"Raw data path: {args.raw_data}\",\n",
|
|
||||||
" f\"Train dataset output path: {args.train_data}\",\n",
|
|
||||||
" f\"Val dataset output path: {args.val_data}\",\n",
|
|
||||||
" f\"Test dataset path: {args.test_data}\",\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"for line in lines:\n",
|
|
||||||
" print(line)\n",
|
|
||||||
"\n",
|
|
||||||
"main(args)\n",
|
|
||||||
"\n",
|
|
||||||
"mlflow.end_run()"
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "stream",
|
|
||||||
"name": "stdout",
|
|
||||||
"text": "Raw data path: ../../data/taxi-data.csv\nTrain dataset output path: /tmp/prep/train\nVal dataset output path: /tmp/prep/val\nTest dataset path: /tmp/prep/test\n"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554107510
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"source": [
|
|
||||||
"ls \"/tmp/prep/train\" "
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"output_type": "stream",
|
|
||||||
"name": "stdout",
|
|
||||||
"text": "train.parquet\r\n"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"vscode": {
|
|
||||||
"languageId": "shellscript"
|
|
||||||
},
|
|
||||||
"gather": {
|
|
||||||
"logged": 1671554107615
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernel_info": {
|
|
||||||
"name": "python310-sdkv2"
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"name": "python310-sdkv2",
|
|
||||||
"language": "python",
|
|
||||||
"display_name": "Python 3.10 - SDK V2"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"name": "python",
|
|
||||||
"version": "3.10.6",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"file_extension": ".py"
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"version": "nteract-front-end@1.0.0"
|
|
||||||
},
|
|
||||||
"vscode": {
|
|
||||||
"interpreter": {
|
|
||||||
"hash": "c87d6401964827bd736fe8e727109b953dd698457ca58fb5acabab22fd6dac41"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"microsoft": {
|
|
||||||
"host": {
|
|
||||||
"AzureML": {
|
|
||||||
"notebookHasBeenCompleted": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 0
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
azureml-mlflow==1.38.0
|
|
||||||
scikit-learn==0.24.1
|
|
||||||
pandas==1.2.1
|
|
||||||
joblib==1.0.0
|
|
||||||
matplotlib==3.3.3
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,149 +0,0 @@
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from sklearn.ensemble import RandomForestRegressor
|
|
||||||
|
|
||||||
import mlflow
|
|
||||||
|
|
||||||
TARGET_COL = "cost"
|
|
||||||
|
|
||||||
NUMERIC_COLS = [
|
|
||||||
"distance",
|
|
||||||
"dropoff_latitude",
|
|
||||||
"dropoff_longitude",
|
|
||||||
"passengers",
|
|
||||||
"pickup_latitude",
|
|
||||||
"pickup_longitude",
|
|
||||||
"pickup_weekday",
|
|
||||||
"pickup_month",
|
|
||||||
"pickup_monthday",
|
|
||||||
"pickup_hour",
|
|
||||||
"pickup_minute",
|
|
||||||
"pickup_second",
|
|
||||||
"dropoff_weekday",
|
|
||||||
"dropoff_month",
|
|
||||||
"dropoff_monthday",
|
|
||||||
"dropoff_hour",
|
|
||||||
"dropoff_minute",
|
|
||||||
"dropoff_second",
|
|
||||||
]
|
|
||||||
|
|
||||||
CAT_NOM_COLS = [
|
|
||||||
"store_forward",
|
|
||||||
"vendor",
|
|
||||||
]
|
|
||||||
|
|
||||||
CAT_ORD_COLS = [
|
|
||||||
]
|
|
||||||
|
|
||||||
def test_evaluate_model():
|
|
||||||
|
|
||||||
test_data = "/tmp/test"
|
|
||||||
model_input = "/tmp/model"
|
|
||||||
evaluation_output = "/tmp/evaluate"
|
|
||||||
model_name = "taxi-model"
|
|
||||||
runner = "LocalRunner"
|
|
||||||
|
|
||||||
os.makedirs(test_data, exist_ok = True)
|
|
||||||
os.makedirs(model_input, exist_ok = True)
|
|
||||||
os.makedirs(evaluation_output, exist_ok = True)
|
|
||||||
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
|
||||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
|
||||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
|
||||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
|
||||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
|
||||||
21, 16, 4, 10],
|
|
||||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
|
||||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
|
||||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
|
||||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
|
||||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
|
||||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
|
||||||
40.66606140136719, 40.64547729492188],
|
|
||||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
|
||||||
-73.98030853271484, -73.92549896240234,
|
|
||||||
-73.91104125976562, -73.89237213134766,
|
|
||||||
-73.94535064697266, -74.01203918457031,
|
|
||||||
-73.97817993164062, -73.99366760253906,
|
|
||||||
-73.94902801513672, -73.98792266845703,
|
|
||||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
|
||||||
-73.96553039550781, -73.9442138671875,
|
|
||||||
-73.97544860839844, -73.87281036376953,
|
|
||||||
-73.77632141113281],
|
|
||||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
|
||||||
5, 20, 41, 46],
|
|
||||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
|
||||||
4, 34, 21, 6, 36],
|
|
||||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
|
||||||
3],
|
|
||||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
|
||||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
|
||||||
15, 4, 10],
|
|
||||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
|
||||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
|
||||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
|
||||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
|
||||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
|
||||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
|
||||||
40.660118103027344, 40.78546905517578],
|
|
||||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
|
||||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
|
||||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
|
||||||
-73.96582794189453, -73.94767761230469,
|
|
||||||
-73.96052551269531, -73.96453094482422,
|
|
||||||
-73.88248443603516, -73.92410278320312,
|
|
||||||
-73.95661163330078, -73.92512512207031,
|
|
||||||
-73.94800567626953, -73.95987701416016,
|
|
||||||
-73.94915771484375],
|
|
||||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
|
||||||
56, 49, 23, 18],
|
|
||||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
|
||||||
41, 53, 43, 2],
|
|
||||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
|
||||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
||||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Save the data
|
|
||||||
df = pd.DataFrame(data)
|
|
||||||
df.to_parquet(os.path.join(test_data, "test.parquet"))
|
|
||||||
|
|
||||||
# Split the data into inputs and outputs
|
|
||||||
y_test = df[TARGET_COL]
|
|
||||||
X_test = df[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
|
||||||
|
|
||||||
# Train a Random Forest Regression Model with the training set
|
|
||||||
model = RandomForestRegressor(random_state=0)
|
|
||||||
model.fit(X_test, y_test)
|
|
||||||
|
|
||||||
# Save the model
|
|
||||||
mlflow.sklearn.save_model(sk_model=model, path=model_input)
|
|
||||||
|
|
||||||
|
|
||||||
cmd = f"python data-science/src/evaluate/evaluate.py --model_name={model_name} --model_input={model_input} --test_data={test_data} --evaluation_output={evaluation_output} --runner={runner}"
|
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
|
||||||
out, err = p.communicate()
|
|
||||||
result = str(out).split('\\n')
|
|
||||||
for lin in result:
|
|
||||||
if not lin.startswith('#'):
|
|
||||||
print(lin)
|
|
||||||
|
|
||||||
assert os.path.exists(os.path.join(evaluation_output, "predictions.csv"))
|
|
||||||
assert os.path.exists(os.path.join(evaluation_output, "score.txt"))
|
|
||||||
|
|
||||||
print("Train Model Unit Test Completed")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_evaluate_model()
|
|
|
@ -1,102 +0,0 @@
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def test_prep_data():
|
|
||||||
|
|
||||||
raw_data = "/tmp/raw"
|
|
||||||
train_data = "/tmp/train"
|
|
||||||
val_data = "/tmp/val"
|
|
||||||
test_data = "/tmp/test"
|
|
||||||
|
|
||||||
os.makedirs(raw_data, exist_ok = True)
|
|
||||||
os.makedirs(train_data, exist_ok = True)
|
|
||||||
os.makedirs(val_data, exist_ok = True)
|
|
||||||
os.makedirs(test_data, exist_ok = True)
|
|
||||||
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
|
||||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
|
||||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
|
||||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
|
||||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
|
||||||
21, 16, 4, 10],
|
|
||||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
|
||||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
|
||||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
|
||||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
|
||||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
|
||||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
|
||||||
40.66606140136719, 40.64547729492188],
|
|
||||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
|
||||||
-73.98030853271484, -73.92549896240234,
|
|
||||||
-73.91104125976562, -73.89237213134766,
|
|
||||||
-73.94535064697266, -74.01203918457031,
|
|
||||||
-73.97817993164062, -73.99366760253906,
|
|
||||||
-73.94902801513672, -73.98792266845703,
|
|
||||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
|
||||||
-73.96553039550781, -73.9442138671875,
|
|
||||||
-73.97544860839844, -73.87281036376953,
|
|
||||||
-73.77632141113281],
|
|
||||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
|
||||||
5, 20, 41, 46],
|
|
||||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
|
||||||
4, 34, 21, 6, 36],
|
|
||||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
|
||||||
3],
|
|
||||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
|
||||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
|
||||||
15, 4, 10],
|
|
||||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
|
||||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
|
||||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
|
||||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
|
||||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
|
||||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
|
||||||
40.660118103027344, 40.78546905517578],
|
|
||||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
|
||||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
|
||||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
|
||||||
-73.96582794189453, -73.94767761230469,
|
|
||||||
-73.96052551269531, -73.96453094482422,
|
|
||||||
-73.88248443603516, -73.92410278320312,
|
|
||||||
-73.95661163330078, -73.92512512207031,
|
|
||||||
-73.94800567626953, -73.95987701416016,
|
|
||||||
-73.94915771484375],
|
|
||||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
|
||||||
56, 49, 23, 18],
|
|
||||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
|
||||||
41, 53, 43, 2],
|
|
||||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
|
||||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
||||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
|
||||||
}
|
|
||||||
|
|
||||||
df = pd.DataFrame(data)
|
|
||||||
df.to_csv(os.path.join(raw_data, "taxi-data.csv"))
|
|
||||||
|
|
||||||
raw_data= os.path.join(raw_data, "taxi-data.csv")
|
|
||||||
cmd = f"python data-science/src/prep/prep.py --raw_data={raw_data} --train_data={train_data} --val_data={val_data} --test_data={test_data}"
|
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
|
||||||
out, err = p.communicate()
|
|
||||||
result = str(out).split('\\n')
|
|
||||||
for lin in result:
|
|
||||||
if not lin.startswith('#'):
|
|
||||||
print(lin)
|
|
||||||
|
|
||||||
assert os.path.exists(os.path.join(train_data, "train.parquet"))
|
|
||||||
assert os.path.exists(os.path.join(val_data, "val.parquet"))
|
|
||||||
assert os.path.exists(os.path.join(test_data, "test.parquet"))
|
|
||||||
|
|
||||||
print("¨Prep Data Unit Test Completed")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
test_prep_data()
|
|
|
@ -1,93 +0,0 @@
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def test_train_model():
|
|
||||||
|
|
||||||
train_data = "/tmp/train"
|
|
||||||
model_output = "/tmp/model"
|
|
||||||
|
|
||||||
os.makedirs(train_data, exist_ok = True)
|
|
||||||
os.makedirs(model_output, exist_ok = True)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
|
||||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
|
||||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
|
||||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
|
||||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
|
||||||
21, 16, 4, 10],
|
|
||||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
|
||||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
|
||||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
|
||||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
|
||||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
|
||||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
|
||||||
40.66606140136719, 40.64547729492188],
|
|
||||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
|
||||||
-73.98030853271484, -73.92549896240234,
|
|
||||||
-73.91104125976562, -73.89237213134766,
|
|
||||||
-73.94535064697266, -74.01203918457031,
|
|
||||||
-73.97817993164062, -73.99366760253906,
|
|
||||||
-73.94902801513672, -73.98792266845703,
|
|
||||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
|
||||||
-73.96553039550781, -73.9442138671875,
|
|
||||||
-73.97544860839844, -73.87281036376953,
|
|
||||||
-73.77632141113281],
|
|
||||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
|
||||||
5, 20, 41, 46],
|
|
||||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
|
||||||
4, 34, 21, 6, 36],
|
|
||||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
|
||||||
3],
|
|
||||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
|
||||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
|
||||||
15, 4, 10],
|
|
||||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
|
||||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
|
||||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
|
||||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
|
||||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
|
||||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
|
||||||
40.660118103027344, 40.78546905517578],
|
|
||||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
|
||||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
|
||||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
|
||||||
-73.96582794189453, -73.94767761230469,
|
|
||||||
-73.96052551269531, -73.96453094482422,
|
|
||||||
-73.88248443603516, -73.92410278320312,
|
|
||||||
-73.95661163330078, -73.92512512207031,
|
|
||||||
-73.94800567626953, -73.95987701416016,
|
|
||||||
-73.94915771484375],
|
|
||||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
|
||||||
56, 49, 23, 18],
|
|
||||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
|
||||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
|
||||||
2, 15, 21],
|
|
||||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
|
||||||
41, 53, 43, 2],
|
|
||||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
|
||||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
|
||||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
|
||||||
}
|
|
||||||
|
|
||||||
df = pd.DataFrame(data)
|
|
||||||
df.to_parquet(os.path.join(train_data, "train.parquet"))
|
|
||||||
|
|
||||||
cmd = f"python data-science/src/train/train.py --train_data={train_data} --model_output={model_output}"
|
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
|
||||||
out, err = p.communicate()
|
|
||||||
result = str(out).split('\\n')
|
|
||||||
for lin in result:
|
|
||||||
if not lin.startswith('#'):
|
|
||||||
print(lin)
|
|
||||||
|
|
||||||
assert os.path.exists(os.path.join(model_output, "model.pkl"))
|
|
||||||
|
|
||||||
print("Train Model Unit Test Completed")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_train_model()
|
|
|
@ -1,134 +0,0 @@
|
||||||
# Resource group
|
|
||||||
|
|
||||||
module "resource_group" {
|
|
||||||
source = "./modules/resource-group"
|
|
||||||
|
|
||||||
location = var.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Azure Machine Learning workspace
|
|
||||||
|
|
||||||
module "aml_workspace" {
|
|
||||||
source = "./modules/aml-workspace"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
storage_account_id = module.storage_account_aml.id
|
|
||||||
key_vault_id = module.key_vault.id
|
|
||||||
application_insights_id = module.application_insights.id
|
|
||||||
container_registry_id = module.container_registry.id
|
|
||||||
|
|
||||||
enable_aml_computecluster = var.enable_aml_computecluster
|
|
||||||
storage_account_name = module.storage_account_aml.name
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
|
||||||
subnet_default_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
|
||||||
subnet_training_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_training[0].id : ""
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Storage account
|
|
||||||
|
|
||||||
module "storage_account_aml" {
|
|
||||||
source = "./modules/storage-account"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
hns_enabled = false
|
|
||||||
firewall_bypass = ["AzureServices"]
|
|
||||||
firewall_virtual_network_subnet_ids = []
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
|
||||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Key vault
|
|
||||||
|
|
||||||
module "key_vault" {
|
|
||||||
source = "./modules/key-vault"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
|
||||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Application insights
|
|
||||||
|
|
||||||
module "application_insights" {
|
|
||||||
source = "./modules/application-insights"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Container registry
|
|
||||||
|
|
||||||
module "container_registry" {
|
|
||||||
source = "./modules/container-registry"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
|
||||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
module "data_explorer" {
|
|
||||||
source = "./modules/data-explorer"
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
key_vault_id = module.key_vault.id
|
|
||||||
enable_monitoring = var.enable_monitoring
|
|
||||||
|
|
||||||
client_secret = var.client_secret
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
|
@ -1,37 +0,0 @@
|
||||||
# Bastion
|
|
||||||
|
|
||||||
module "bastion" {
|
|
||||||
source = "./modules/bastion-host"
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_bastion[0].id : ""
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Virtual machine
|
|
||||||
|
|
||||||
module "virtual_machine_jumphost" {
|
|
||||||
source = "./modules/virtual-machine"
|
|
||||||
|
|
||||||
prefix = var.prefix
|
|
||||||
postfix = var.postfix
|
|
||||||
env = var.environment
|
|
||||||
|
|
||||||
rg_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
|
||||||
jumphost_username = var.jumphost_username
|
|
||||||
jumphost_password = var.jumphost_password
|
|
||||||
|
|
||||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
|
@ -1,9 +0,0 @@
|
||||||
locals {
|
|
||||||
tags = {
|
|
||||||
Owner = "mlops-v2"
|
|
||||||
Project = "mlops-v2"
|
|
||||||
Environment = "${var.environment}"
|
|
||||||
Toolkit = "terraform"
|
|
||||||
Name = "${var.prefix}"
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,18 +0,0 @@
|
||||||
terraform {
|
|
||||||
backend "azurerm" {}
|
|
||||||
required_providers {
|
|
||||||
azurerm = {
|
|
||||||
version = "= 2.99.0"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
provider "azurerm" {
|
|
||||||
features {}
|
|
||||||
}
|
|
||||||
|
|
||||||
data "azurerm_client_config" "current" {}
|
|
||||||
|
|
||||||
data "http" "ip" {
|
|
||||||
url = "https://ifconfig.me"
|
|
||||||
}
|
|
|
@ -1,97 +0,0 @@
|
||||||
resource "azurerm_machine_learning_workspace" "mlw" {
|
|
||||||
name = "mlw-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
application_insights_id = var.application_insights_id
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
storage_account_id = var.storage_account_id
|
|
||||||
container_registry_id = var.container_registry_id
|
|
||||||
|
|
||||||
sku_name = "Basic"
|
|
||||||
|
|
||||||
identity {
|
|
||||||
type = "SystemAssigned"
|
|
||||||
}
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Compute cluster
|
|
||||||
|
|
||||||
resource "azurerm_machine_learning_compute_cluster" "mlw_compute_cluster" {
|
|
||||||
name = "cpu-cluster"
|
|
||||||
location = var.location
|
|
||||||
vm_priority = "LowPriority"
|
|
||||||
vm_size = "Standard_DS3_v2"
|
|
||||||
machine_learning_workspace_id = azurerm_machine_learning_workspace.mlw.id
|
|
||||||
subnet_resource_id = var.enable_aml_secure_workspace ? var.subnet_training_id : ""
|
|
||||||
|
|
||||||
count = var.enable_aml_computecluster ? 1 : 0
|
|
||||||
|
|
||||||
scale_settings {
|
|
||||||
min_node_count = 0
|
|
||||||
max_node_count = 4
|
|
||||||
scale_down_nodes_after_idle_duration = "PT120S" # 120 seconds
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# DNS Zones
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "mlw_zone_api" {
|
|
||||||
name = "privatelink.api.azureml.ms"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "mlw_zone_notebooks" {
|
|
||||||
name = "privatelink.notebooks.azure.net"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Linking of DNS zones to Virtual Network
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_api_link" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_api"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_api[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_notebooks_link" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_notebooks"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_notebooks[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Private Endpoint configuration
|
|
||||||
|
|
||||||
resource "azurerm_private_endpoint" "mlw_pe" {
|
|
||||||
name = "pe-${azurerm_machine_learning_workspace.mlw.name}-amlw"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
subnet_id = var.subnet_default_id
|
|
||||||
|
|
||||||
private_service_connection {
|
|
||||||
name = "psc-aml-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
private_connection_resource_id = azurerm_machine_learning_workspace.mlw.id
|
|
||||||
subresource_names = ["amlworkspace"]
|
|
||||||
is_manual_connection = false
|
|
||||||
}
|
|
||||||
|
|
||||||
private_dns_zone_group {
|
|
||||||
name = "private-dns-zone-group-ws"
|
|
||||||
private_dns_zone_ids = [azurerm_private_dns_zone.mlw_zone_api[0].id, azurerm_private_dns_zone.mlw_zone_notebooks[0].id]
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
output "name" {
|
|
||||||
value = azurerm_machine_learning_workspace.mlw.name
|
|
||||||
}
|
|
|
@ -1,79 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "storage_account_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the Storage Account linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "key_vault_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the Key Vault linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "application_insights_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the Application Insights linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "container_registry_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the Container Registry linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_computecluster" {
|
|
||||||
description = "Variable to enable or disable AML compute cluster"
|
|
||||||
default = false
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "storage_account_name" {
|
|
||||||
type = string
|
|
||||||
description = "The Name of the Storage Account linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "vnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_default_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_training_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
|
||||||
}
|
|
|
@ -1,8 +0,0 @@
|
||||||
resource "azurerm_application_insights" "appi" {
|
|
||||||
name = "appi-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
application_type = "web"
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
output "id" {
|
|
||||||
value = azurerm_application_insights.appi.id
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
|
@ -1,31 +0,0 @@
|
||||||
resource "azurerm_bastion_host" "bas" {
|
|
||||||
name = "bas-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
sku = "Standard"
|
|
||||||
copy_paste_enabled = false
|
|
||||||
file_copy_enabled = false
|
|
||||||
|
|
||||||
ip_configuration {
|
|
||||||
name = "configuration"
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
public_ip_address_id = azurerm_public_ip.pip[0].id
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_public_ip" "pip" {
|
|
||||||
name = "pip-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
allocation_method = "Static"
|
|
||||||
sku = "Standard"
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "Subnet ID for the bastion"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
|
@ -1,59 +0,0 @@
|
||||||
locals {
|
|
||||||
safe_prefix = replace(var.prefix, "-", "")
|
|
||||||
safe_postfix = replace(var.postfix, "-", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_container_registry" "cr" {
|
|
||||||
name = "cr${local.safe_prefix}${local.safe_postfix}${var.env}"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
location = var.location
|
|
||||||
sku = var.enable_aml_secure_workspace ? "Premium" : "Standard"
|
|
||||||
admin_enabled = true
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# DNS Zones
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "cr_zone" {
|
|
||||||
name = "privatelink.azurecr.io"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Linking of DNS zones to Virtual Network
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "cr_zone_link" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_acr"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.cr_zone[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Private Endpoint configuration
|
|
||||||
|
|
||||||
resource "azurerm_private_endpoint" "cr_pe" {
|
|
||||||
name = "pe-${azurerm_container_registry.cr.name}-acr"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
|
|
||||||
private_service_connection {
|
|
||||||
name = "psc-acr-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
private_connection_resource_id = azurerm_container_registry.cr.id
|
|
||||||
subresource_names = ["registry"]
|
|
||||||
is_manual_connection = false
|
|
||||||
}
|
|
||||||
|
|
||||||
private_dns_zone_group {
|
|
||||||
name = "private-dns-zone-group-acr"
|
|
||||||
private_dns_zone_ids = [azurerm_private_dns_zone.cr_zone[0].id]
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
output "id" {
|
|
||||||
value = azurerm_container_registry.cr.id
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "vnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
|
||||||
}
|
|
|
@ -1,59 +0,0 @@
|
||||||
data "azurerm_client_config" "current" {}
|
|
||||||
|
|
||||||
resource "azurerm_kusto_cluster" "cluster" {
|
|
||||||
name = "adx${var.prefix}${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
streaming_ingestion_enabled = true
|
|
||||||
language_extensions = ["PYTHON"]
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
|
|
||||||
sku {
|
|
||||||
name = "Standard_D11_v2"
|
|
||||||
capacity = 2
|
|
||||||
}
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_kusto_database" "database" {
|
|
||||||
name = "mlmonitoring"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
location = var.location
|
|
||||||
cluster_name = azurerm_kusto_cluster.cluster[0].name
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault_secret" "SP_ID" {
|
|
||||||
name = "kvmonitoringspid"
|
|
||||||
value = data.azurerm_client_config.current.client_id
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault_secret" "SP_KEY" {
|
|
||||||
name = "kvmonitoringspkey"
|
|
||||||
value = var.client_secret
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault_secret" "SP_TENANT_ID" {
|
|
||||||
name = "kvmonitoringadxtenantid"
|
|
||||||
value = data.azurerm_client_config.current.tenant_id
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault_secret" "ADX_URI" {
|
|
||||||
name = "kvmonitoringadxuri"
|
|
||||||
value = azurerm_kusto_cluster.cluster[0].uri
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault_secret" "ADX_DB" {
|
|
||||||
name = "kvmonitoringadxdb"
|
|
||||||
value = azurerm_kusto_database.database[0].name
|
|
||||||
key_vault_id = var.key_vault_id
|
|
||||||
count = var.enable_monitoring ? 1 : 0
|
|
||||||
}
|
|
|
@ -1,45 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "key_vault_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the Key Vault linked to AML workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_monitoring" {
|
|
||||||
description = "Variable to enable or disable AML compute cluster"
|
|
||||||
default = false
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "client_secret" {
|
|
||||||
description = "client secret"
|
|
||||||
default = false
|
|
||||||
}
|
|
|
@ -1,74 +0,0 @@
|
||||||
data "azurerm_client_config" "current" {}
|
|
||||||
|
|
||||||
resource "azurerm_key_vault" "kv" {
|
|
||||||
name = "kv-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
tenant_id = data.azurerm_client_config.current.tenant_id
|
|
||||||
sku_name = "standard"
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
|
|
||||||
access_policy {
|
|
||||||
tenant_id = data.azurerm_client_config.current.tenant_id
|
|
||||||
object_id = data.azurerm_client_config.current.object_id
|
|
||||||
|
|
||||||
key_permissions = [
|
|
||||||
"Create",
|
|
||||||
"Get",
|
|
||||||
]
|
|
||||||
|
|
||||||
secret_permissions = [
|
|
||||||
"Set",
|
|
||||||
"Get",
|
|
||||||
"Delete",
|
|
||||||
"Purge",
|
|
||||||
"Recover"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# DNS Zones
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "kv_zone" {
|
|
||||||
name = "privatelink.vaultcore.azure.net"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Linking of DNS zones to Virtual Network
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "kv_zone_link" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_kv"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.kv_zone[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Private Endpoint configuration
|
|
||||||
|
|
||||||
resource "azurerm_private_endpoint" "kv_pe" {
|
|
||||||
name = "pe-${azurerm_key_vault.kv.name}-vault"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
|
|
||||||
private_service_connection {
|
|
||||||
name = "psc-kv-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
private_connection_resource_id = azurerm_key_vault.kv.id
|
|
||||||
subresource_names = ["vault"]
|
|
||||||
is_manual_connection = false
|
|
||||||
}
|
|
||||||
|
|
||||||
private_dns_zone_group {
|
|
||||||
name = "private-dns-zone-group-kv"
|
|
||||||
private_dns_zone_ids = [azurerm_private_dns_zone.kv_zone[0].id]
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,3 +0,0 @@
|
||||||
output "id" {
|
|
||||||
value = azurerm_key_vault.kv.id
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "vnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
|
||||||
}
|
|
|
@ -1,5 +0,0 @@
|
||||||
resource "azurerm_resource_group" "adl_rg" {
|
|
||||||
name = "rg-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
output "name" {
|
|
||||||
value = azurerm_resource_group.adl_rg.name
|
|
||||||
}
|
|
||||||
|
|
||||||
output "location" {
|
|
||||||
value = azurerm_resource_group.adl_rg.location
|
|
||||||
}
|
|
|
@ -1,26 +0,0 @@
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
default = "North Europe"
|
|
||||||
description = "Location of the Resource Group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the Resource Group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
|
@ -1,118 +0,0 @@
|
||||||
data "azurerm_client_config" "current" {}
|
|
||||||
|
|
||||||
data "http" "ip" {
|
|
||||||
url = "https://ifconfig.me"
|
|
||||||
}
|
|
||||||
|
|
||||||
locals {
|
|
||||||
safe_prefix = replace(var.prefix, "-", "")
|
|
||||||
safe_postfix = replace(var.postfix, "-", "")
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_storage_account" "st" {
|
|
||||||
name = "st${local.safe_prefix}${local.safe_postfix}${var.env}"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
location = var.location
|
|
||||||
account_tier = "Standard"
|
|
||||||
account_replication_type = "LRS"
|
|
||||||
account_kind = "StorageV2"
|
|
||||||
is_hns_enabled = var.hns_enabled
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Virtual Network & Firewall configuration
|
|
||||||
|
|
||||||
resource "azurerm_storage_account_network_rules" "firewall_rules" {
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
storage_account_name = azurerm_storage_account.st.name
|
|
||||||
|
|
||||||
default_action = "Allow"
|
|
||||||
ip_rules = [] # [data.http.ip.body]
|
|
||||||
virtual_network_subnet_ids = var.firewall_virtual_network_subnet_ids
|
|
||||||
bypass = var.firewall_bypass
|
|
||||||
}
|
|
||||||
|
|
||||||
# DNS Zones
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "st_zone_blob" {
|
|
||||||
name = "privatelink.blob.core.windows.net"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone" "st_zone_file" {
|
|
||||||
name = "privatelink.file.core.windows.net"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Linking of DNS zones to Virtual Network
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_blob" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_st_blob"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.st_zone_blob[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_file" {
|
|
||||||
name = "${var.prefix}${var.postfix}_link_st_file"
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
private_dns_zone_name = azurerm_private_dns_zone.st_zone_file[0].name
|
|
||||||
virtual_network_id = var.vnet_id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Private Endpoint configuration
|
|
||||||
|
|
||||||
resource "azurerm_private_endpoint" "st_pe_blob" {
|
|
||||||
name = "pe-${azurerm_storage_account.st.name}-blob"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
|
|
||||||
private_service_connection {
|
|
||||||
name = "psc-blob-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
private_connection_resource_id = azurerm_storage_account.st.id
|
|
||||||
subresource_names = ["blob"]
|
|
||||||
is_manual_connection = false
|
|
||||||
}
|
|
||||||
|
|
||||||
private_dns_zone_group {
|
|
||||||
name = "private-dns-zone-group-blob"
|
|
||||||
private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_blob[0].id]
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_private_endpoint" "st_pe_file" {
|
|
||||||
name = "pe-${azurerm_storage_account.st.name}-file"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
|
|
||||||
private_service_connection {
|
|
||||||
name = "psc-file-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
private_connection_resource_id = azurerm_storage_account.st.id
|
|
||||||
subresource_names = ["file"]
|
|
||||||
is_manual_connection = false
|
|
||||||
}
|
|
||||||
|
|
||||||
private_dns_zone_group {
|
|
||||||
name = "private-dns-zone-group-file"
|
|
||||||
private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_file[0].id]
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
output "id" {
|
|
||||||
value = azurerm_storage_account.st.id
|
|
||||||
}
|
|
||||||
|
|
||||||
output "name" {
|
|
||||||
value = azurerm_storage_account.st.name
|
|
||||||
}
|
|
|
@ -1,58 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the Resource Group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "hns_enabled" {
|
|
||||||
type = bool
|
|
||||||
description = "Hierarchical namespaces enabled/disabled"
|
|
||||||
default = true
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "firewall_virtual_network_subnet_ids" {
|
|
||||||
default = []
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "firewall_bypass" {
|
|
||||||
default = ["None"]
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "vnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
|
||||||
}
|
|
|
@ -1,104 +0,0 @@
|
||||||
resource "azurerm_virtual_machine" "vm" {
|
|
||||||
name = "wvm-jumphost"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
network_interface_ids = [azurerm_network_interface.vm_nic[0].id]
|
|
||||||
vm_size = "Standard_DS3_v2"
|
|
||||||
|
|
||||||
delete_os_disk_on_termination = true
|
|
||||||
delete_data_disks_on_termination = true
|
|
||||||
|
|
||||||
storage_image_reference {
|
|
||||||
publisher = "microsoft-dsvm"
|
|
||||||
offer = "dsvm-win-2019"
|
|
||||||
sku = "server-2019"
|
|
||||||
version = "latest"
|
|
||||||
}
|
|
||||||
|
|
||||||
os_profile {
|
|
||||||
computer_name = var.jumphost_username
|
|
||||||
admin_username = var.jumphost_username
|
|
||||||
admin_password = var.jumphost_password
|
|
||||||
}
|
|
||||||
|
|
||||||
os_profile_windows_config {
|
|
||||||
provision_vm_agent = true
|
|
||||||
enable_automatic_upgrades = true
|
|
||||||
}
|
|
||||||
|
|
||||||
identity {
|
|
||||||
type = "SystemAssigned"
|
|
||||||
}
|
|
||||||
|
|
||||||
storage_os_disk {
|
|
||||||
name = "disk-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
caching = "ReadWrite"
|
|
||||||
create_option = "FromImage"
|
|
||||||
managed_disk_type = "StandardSSD_LRS"
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_network_interface" "vm_nic" {
|
|
||||||
name = "nic-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
ip_configuration {
|
|
||||||
name = "configuration"
|
|
||||||
private_ip_address_allocation = "Dynamic"
|
|
||||||
subnet_id = var.subnet_id
|
|
||||||
# public_ip_address_id = azurerm_public_ip.vm_public_ip.id
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_network_security_group" "vm_nsg" {
|
|
||||||
name = "nsg-${var.prefix}-${var.postfix}${var.env}"
|
|
||||||
location = var.location
|
|
||||||
resource_group_name = var.rg_name
|
|
||||||
|
|
||||||
security_rule {
|
|
||||||
name = "RDP"
|
|
||||||
priority = 1010
|
|
||||||
direction = "Inbound"
|
|
||||||
access = "Allow"
|
|
||||||
protocol = "Tcp"
|
|
||||||
source_port_range = "*"
|
|
||||||
destination_port_range = 3389
|
|
||||||
source_address_prefix = "*"
|
|
||||||
destination_address_prefix = "*"
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = var.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_network_interface_security_group_association" "vm_nsg_association" {
|
|
||||||
network_interface_id = azurerm_network_interface.vm_nic[0].id
|
|
||||||
network_security_group_id = azurerm_network_security_group.vm_nsg[0].id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_dev_test_global_vm_shutdown_schedule" "vm_schedule" {
|
|
||||||
virtual_machine_id = azurerm_virtual_machine.vm[0].id
|
|
||||||
location = var.location
|
|
||||||
enabled = true
|
|
||||||
|
|
||||||
daily_recurrence_time = "2000"
|
|
||||||
timezone = "W. Europe Standard Time"
|
|
||||||
|
|
||||||
notification_settings {
|
|
||||||
enabled = false
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
|
@ -1,49 +0,0 @@
|
||||||
variable "rg_name" {
|
|
||||||
type = string
|
|
||||||
description = "Resource group name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "tags" {
|
|
||||||
type = map(string)
|
|
||||||
default = {}
|
|
||||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for the module name"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "env" {
|
|
||||||
type = string
|
|
||||||
description = "Environment prefix"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "jumphost_username" {
|
|
||||||
type = string
|
|
||||||
description = "VM username"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "jumphost_password" {
|
|
||||||
type = string
|
|
||||||
description = "VM password"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subnet_id" {
|
|
||||||
type = string
|
|
||||||
description = "Subnet ID for the virtual machine"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
|
@ -1,131 +0,0 @@
|
||||||
# Virtual network
|
|
||||||
|
|
||||||
resource "azurerm_virtual_network" "vnet_default" {
|
|
||||||
name = "vnet-${var.prefix}-${var.postfix}${var.environment}"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
location = module.resource_group.location
|
|
||||||
address_space = ["10.0.0.0/16"]
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
|
|
||||||
tags = local.tags
|
|
||||||
}
|
|
||||||
|
|
||||||
# Subnets
|
|
||||||
|
|
||||||
resource "azurerm_subnet" "snet_default" {
|
|
||||||
name = "snet-${var.prefix}-${var.postfix}${var.environment}-default"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
|
||||||
address_prefixes = ["10.0.1.0/24"]
|
|
||||||
enforce_private_link_endpoint_network_policies = true
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_subnet" "snet_bastion" {
|
|
||||||
name = "AzureBastionSubnet"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
|
||||||
address_prefixes = ["10.0.10.0/27"]
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_subnet" "snet_training" {
|
|
||||||
name = "snet-${var.prefix}-${var.postfix}${var.environment}-training"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
|
||||||
address_prefixes = ["10.0.2.0/24"]
|
|
||||||
enforce_private_link_endpoint_network_policies = true
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Network security groups
|
|
||||||
|
|
||||||
resource "azurerm_network_security_group" "nsg_training" {
|
|
||||||
name = "nsg-${var.prefix}-${var.postfix}${var.environment}-training"
|
|
||||||
location = module.resource_group.location
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
|
|
||||||
security_rule {
|
|
||||||
name = "BatchNodeManagement"
|
|
||||||
priority = 100
|
|
||||||
direction = "Inbound"
|
|
||||||
access = "Allow"
|
|
||||||
protocol = "Tcp"
|
|
||||||
source_port_range = "*"
|
|
||||||
destination_port_range = "29876-29877"
|
|
||||||
source_address_prefix = "BatchNodeManagement"
|
|
||||||
destination_address_prefix = "*"
|
|
||||||
}
|
|
||||||
|
|
||||||
security_rule {
|
|
||||||
name = "AzureMachineLearning"
|
|
||||||
priority = 110
|
|
||||||
direction = "Inbound"
|
|
||||||
access = "Allow"
|
|
||||||
protocol = "Tcp"
|
|
||||||
source_port_range = "*"
|
|
||||||
destination_port_range = "44224"
|
|
||||||
source_address_prefix = "AzureMachineLearning"
|
|
||||||
destination_address_prefix = "*"
|
|
||||||
}
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_subnet_network_security_group_association" "nsg-training-link" {
|
|
||||||
subnet_id = azurerm_subnet.snet_training[0].id
|
|
||||||
network_security_group_id = azurerm_network_security_group.nsg_training[0].id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# User Defined Routes
|
|
||||||
|
|
||||||
resource "azurerm_route_table" "rt_training" {
|
|
||||||
name = "rt-${var.prefix}-${var.postfix}${var.environment}-training"
|
|
||||||
location = module.resource_group.location
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_route" "route_training_internet" {
|
|
||||||
name = "Internet"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
route_table_name = azurerm_route_table.rt_training[0].name
|
|
||||||
address_prefix = "0.0.0.0/0"
|
|
||||||
next_hop_type = "Internet"
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_route" "route_training_aml" {
|
|
||||||
name = "AzureMLRoute"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
route_table_name = azurerm_route_table.rt_training[0].name
|
|
||||||
address_prefix = "AzureMachineLearning"
|
|
||||||
next_hop_type = "Internet"
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_route" "route_training_batch" {
|
|
||||||
name = "BatchRoute"
|
|
||||||
resource_group_name = module.resource_group.name
|
|
||||||
route_table_name = azurerm_route_table.rt_training[0].name
|
|
||||||
address_prefix = "BatchNodeManagement"
|
|
||||||
next_hop_type = "Internet"
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "azurerm_subnet_route_table_association" "rt_training_link" {
|
|
||||||
subnet_id = azurerm_subnet.snet_training[0].id
|
|
||||||
route_table_id = azurerm_route_table.rt_training[0].id
|
|
||||||
|
|
||||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
|
||||||
}
|
|
|
@ -1,68 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
variables:
|
|
||||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'main' branch: PRD environment
|
|
||||||
- template: ../../config-infra-prod.yml
|
|
||||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'develop' or feature branches: DEV environment
|
|
||||||
- template: ../../config-infra-dev.yml
|
|
||||||
|
|
||||||
parameters:
|
|
||||||
- name: jumphost_username
|
|
||||||
type: string
|
|
||||||
default: "azureuser"
|
|
||||||
- name: jumphost_password
|
|
||||||
type: string
|
|
||||||
default: "ThisIsNotVerySecure!"
|
|
||||||
|
|
||||||
trigger:
|
|
||||||
- none
|
|
||||||
|
|
||||||
pool:
|
|
||||||
vmImage: $(ap_vm_image)
|
|
||||||
|
|
||||||
resources:
|
|
||||||
repositories:
|
|
||||||
- repository: mlops-templates
|
|
||||||
name: Azure/mlops-templates
|
|
||||||
endpoint: github-connection
|
|
||||||
type: github
|
|
||||||
ref: main #branch name
|
|
||||||
|
|
||||||
stages :
|
|
||||||
- stage: CreateStorageAccountForTerraformState
|
|
||||||
displayName: Create Storage for Terraform
|
|
||||||
jobs:
|
|
||||||
- job: CreateStorageForTerraform
|
|
||||||
displayName: Create Storage for Terraform
|
|
||||||
steps:
|
|
||||||
- checkout: self
|
|
||||||
path: s/
|
|
||||||
- checkout: mlops-templates
|
|
||||||
path: s/templates/
|
|
||||||
- template: templates/infra/create-resource-group.yml@mlops-templates
|
|
||||||
- template: templates/infra/create-storage-account.yml@mlops-templates
|
|
||||||
- template: templates/infra/create-storage-container.yml@mlops-templates
|
|
||||||
- stage: DeployAzureMachineLearningRG
|
|
||||||
displayName: Deploy AML Workspace
|
|
||||||
jobs:
|
|
||||||
- job: DeployAMLWorkspace
|
|
||||||
displayName: Deploy Terraform
|
|
||||||
steps:
|
|
||||||
- checkout: self
|
|
||||||
path: s/
|
|
||||||
- checkout: mlops-templates
|
|
||||||
path: s/templates/
|
|
||||||
- template: templates/infra/install-terraform.yml@mlops-templates
|
|
||||||
- template: templates/infra/run-terraform-init.yml@mlops-templates
|
|
||||||
- template: templates/infra/run-terraform-validate.yml@mlops-templates
|
|
||||||
- template: templates/infra/run-terraform-plan.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
jumphost_username: ${{parameters.jumphost_username}}
|
|
||||||
jumphost_password: ${{parameters.jumphost_password}}
|
|
||||||
- template: templates/infra/run-terraform-apply.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
jumphost_username: ${{parameters.jumphost_username}}
|
|
||||||
jumphost_password: ${{parameters.jumphost_password}}
|
|
|
@ -1,47 +0,0 @@
|
||||||
variable "location" {
|
|
||||||
type = string
|
|
||||||
description = "Location of the resource group and modules"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "prefix" {
|
|
||||||
type = string
|
|
||||||
description = "Prefix for module names"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "environment" {
|
|
||||||
type = string
|
|
||||||
description = "Environment information"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "postfix" {
|
|
||||||
type = string
|
|
||||||
description = "Postfix for module names"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_computecluster" {
|
|
||||||
description = "Variable to enable or disable AML compute cluster"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_aml_secure_workspace" {
|
|
||||||
description = "Variable to enable or disable AML secure workspace"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "jumphost_username" {
|
|
||||||
type = string
|
|
||||||
description = "VM username"
|
|
||||||
default = "azureuser"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "jumphost_password" {
|
|
||||||
type = string
|
|
||||||
description = "VM password"
|
|
||||||
default = "ThisIsNotVerySecure!"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "enable_monitoring" {
|
|
||||||
description = "Variable to enable or disable Monitoring"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "client_secret" {
|
|
||||||
description = "Service Principal Secret"
|
|
||||||
}
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,11 @@
|
||||||
|
# DEPLOY
|
||||||
|
# Deploy Batch Endpoint
|
||||||
|
|
||||||
|
# create compute cluster to be used by batch cluster
|
||||||
|
#az ml compute create -n batch-cluster --type amlcompute --min-instances 0 --max-instances 3
|
||||||
|
# create batch endpoint
|
||||||
|
az ml batch-endpoint create --file deploy/batch/batch-endpoint.yml
|
||||||
|
# create batch deployment
|
||||||
|
az ml batch-deployment create --file deploy/batch/batch-deployment.yml --set-default
|
||||||
|
# invoke and test endpoint
|
||||||
|
#az ml batch-endpoint invoke --name taxi-batch-endpoint --input ../../data/taxi-batch.csv
|
|
@ -0,0 +1,12 @@
|
||||||
|
# DEPLOY
|
||||||
|
|
||||||
|
# Deploy Online Endpoint
|
||||||
|
az configure --defaults group=mldemorg workspace=mldemo location=eastus
|
||||||
|
# create online endpoint
|
||||||
|
az ml online-endpoint create --file deploy/online/online-endpoint.yml
|
||||||
|
# create online deployment
|
||||||
|
az ml online-deployment create --file deploy/online/online-deployment.yml
|
||||||
|
# allocate traffic
|
||||||
|
az ml online-endpoint update --name taxi-online-endpoint --traffic blue=100
|
||||||
|
# invoke and test endpoint
|
||||||
|
#az ml online-endpoint invoke --name taxi-online-endpoint --request-file ../../data/taxi-request.json
|
|
@ -0,0 +1,12 @@
|
||||||
|
# TRAIN
|
||||||
|
|
||||||
|
# Create train job compute cluster
|
||||||
|
#az ml compute create --file train/compute.yml
|
||||||
|
# Register data asset
|
||||||
|
az ml data create --file train/data.yml
|
||||||
|
# Register train environment
|
||||||
|
az ml environment create --file train/environment.yml
|
||||||
|
# Create pipeline job
|
||||||
|
az ml job create --file train/pipeline.yml
|
||||||
|
# Create pipeline job with automl training job
|
||||||
|
az ml job create --file train/pipeline_automl.yml
|
|
@ -1,5 +1,5 @@
|
||||||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
||||||
name: taxi-train-env
|
name: taxi-train-env
|
||||||
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
|
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
|
||||||
conda_file: ../../../data-science/environment/train-conda.yml
|
conda_file: ../../../environment/train-conda.yml
|
||||||
description: Environment created from a Docker image plus Conda environment to train taxi model.
|
description: Environment created from a Docker image plus Conda environment to train taxi model.
|
|
@ -30,7 +30,7 @@ jobs:
|
||||||
prep_data:
|
prep_data:
|
||||||
name: prep_data
|
name: prep_data
|
||||||
display_name: prep-data
|
display_name: prep-data
|
||||||
code: ../../../data-science/src/prep
|
code: ../../../components/prep
|
||||||
command: >-
|
command: >-
|
||||||
python prep.py
|
python prep.py
|
||||||
--raw_data ${{inputs.raw_data}}
|
--raw_data ${{inputs.raw_data}}
|
||||||
|
@ -52,7 +52,7 @@ jobs:
|
||||||
train_model:
|
train_model:
|
||||||
name: train_model
|
name: train_model
|
||||||
display_name: train-model
|
display_name: train-model
|
||||||
code: ../../../data-science/src/train
|
code: ../../../components/train
|
||||||
command: >-
|
command: >-
|
||||||
python train.py
|
python train.py
|
||||||
--train_data ${{inputs.train_data}}
|
--train_data ${{inputs.train_data}}
|
||||||
|
@ -66,7 +66,7 @@ jobs:
|
||||||
evaluate_model:
|
evaluate_model:
|
||||||
name: evaluate_model
|
name: evaluate_model
|
||||||
display_name: evaluate-model
|
display_name: evaluate-model
|
||||||
code: ../../../data-science/src/evaluate
|
code: ../../../components/evaluate
|
||||||
command: >-
|
command: >-
|
||||||
python evaluate.py
|
python evaluate.py
|
||||||
--model_name ${{inputs.model_name}}
|
--model_name ${{inputs.model_name}}
|
||||||
|
@ -84,7 +84,7 @@ jobs:
|
||||||
register_model:
|
register_model:
|
||||||
name: register_model
|
name: register_model
|
||||||
display_name: register-model
|
display_name: register-model
|
||||||
code: ../../../data-science/src/register
|
code: ../../../components/register
|
||||||
command: >-
|
command: >-
|
||||||
python register.py
|
python register.py
|
||||||
--model_name ${{inputs.model_name}}
|
--model_name ${{inputs.model_name}}
|
|
@ -30,7 +30,7 @@ jobs:
|
||||||
prep_data:
|
prep_data:
|
||||||
name: prep_data
|
name: prep_data
|
||||||
display_name: prep-data
|
display_name: prep-data
|
||||||
code: ../../../data-science/src/prep
|
code: ../../../components/prep
|
||||||
command: >-
|
command: >-
|
||||||
python prep.py
|
python prep.py
|
||||||
--raw_data ${{inputs.raw_data}}
|
--raw_data ${{inputs.raw_data}}
|
||||||
|
@ -74,7 +74,7 @@ jobs:
|
||||||
register_model:
|
register_model:
|
||||||
name: register_model
|
name: register_model
|
||||||
display_name: register-model
|
display_name: register-model
|
||||||
code: ../../../data-science/src/register
|
code: ../../../components/register
|
||||||
command: >-
|
command: >-
|
||||||
python register_automl.py
|
python register_automl.py
|
||||||
--model_name ${{inputs.model_name}}
|
--model_name ${{inputs.model_name}}
|
|
@ -0,0 +1,390 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# import required libraries\n",
|
||||||
|
"from azure.ai.ml import MLClient, command, Input, Output, load_component\n",
|
||||||
|
"from azure.identity import DefaultAzureCredential\n",
|
||||||
|
"from azure.ai.ml.entities import Data, Environment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Enter details of your AML workspace\n",
|
||||||
|
"subscription_id = \"<subscription_id>\"\n",
|
||||||
|
"resource_group = \"<resouce_group_name>\"\n",
|
||||||
|
"workspace = \"<workspace_name>\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"gather": {
|
||||||
|
"logged": 1670200031039
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get a handle to the workspace\n",
|
||||||
|
"ml_client = MLClient(\n",
|
||||||
|
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Batch Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"**Batch endpoints** are endpoints that are used to do batch inferencing on large volumes of data over a period of time. \n",
|
||||||
|
"\n",
|
||||||
|
"**Batch endpoints** receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis.\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/endpoint_batch_concept.png\" width = \"700px\" alt=\"Concept batch endpoint\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Create Batch Compute Cluster (Optional)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668247613855
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"``` python\n",
|
||||||
|
"# create compute cluster to be used by batch cluster\n",
|
||||||
|
"from azure.ai.ml.entities import AmlCompute\n",
|
||||||
|
"\n",
|
||||||
|
"my_cluster = AmlCompute(\n",
|
||||||
|
" name=\"batch-cluster\",\n",
|
||||||
|
" type=\"amlcompute\", \n",
|
||||||
|
" size=\"STANDARD_DS3_V2\", \n",
|
||||||
|
" min_instances=0, \n",
|
||||||
|
" max_instances=3,\n",
|
||||||
|
" location=\"westeurope\", \t\n",
|
||||||
|
")\n",
|
||||||
|
"ml_client.compute.begin_create_or_update(my_cluster)\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<azure.core.polling._poller.LROPoller at 0x7f085c2d3550>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from azure.ai.ml.entities import AmlCompute\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" ml_client.compute.get(name=\"cpu-test\")\n",
|
||||||
|
" print(\"Compute already exists\")\n",
|
||||||
|
"\n",
|
||||||
|
"except:\n",
|
||||||
|
" print(\"Compute not found; Proceding to create\")\n",
|
||||||
|
"\n",
|
||||||
|
" my_cluster = AmlCompute(\n",
|
||||||
|
" name=\"batch-cluster\",\n",
|
||||||
|
" type=\"amlcompute\", \n",
|
||||||
|
" size=\"STANDARD_DS3_V2\", \n",
|
||||||
|
" min_instances=0, \n",
|
||||||
|
" max_instances=3,\n",
|
||||||
|
" )\n",
|
||||||
|
" ml_client.compute.begin_create_or_update(my_cluster)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Create Batch Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"We can create the **batch endpoint** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/create_batch_endpoint.png\" width = \"700px\" alt=\"Create batch endpoint cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668247623872
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create batch endpoint\n",
|
||||||
|
"from azure.ai.ml.entities import BatchEndpoint\n",
|
||||||
|
"import random\n",
|
||||||
|
"\n",
|
||||||
|
"rand = random.randint(0, 10000)\n",
|
||||||
|
"\n",
|
||||||
|
"endpoint_name = f\"taxi-batch-endpoint-{rand}\"\n",
|
||||||
|
"batch_endpoint = BatchEndpoint(\n",
|
||||||
|
" name=endpoint_name,\n",
|
||||||
|
" description=\"Taxi batch endpoint\",\n",
|
||||||
|
" tags={\"model\": \"taxi-model@latest\"},\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"poller = ml_client.begin_create_or_update(batch_endpoint)\n",
|
||||||
|
"poller.wait()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Endpoint creation succeeded\n",
|
||||||
|
"{'additional_properties': {}, 'id': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourceGroups/mldemorg/providers/Microsoft.MachineLearningServices/workspaces/mldemo/batchEndpoints/taxi-batch-endpoint-6853', 'name': 'taxi-batch-endpoint-6853', 'type': 'Microsoft.MachineLearningServices/workspaces/batchEndpoints', 'system_data': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.SystemData object at 0x7f085c30ceb0>, 'tags': {'model': 'taxi-model@latest'}, 'location': 'eastus2', 'identity': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity object at 0x7f085c30e620>, 'kind': None, 'properties': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.BatchEndpointDetails object at 0x7f085c30e200>, 'sku': None}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from azure.ai.ml.exceptions import DeploymentException\n",
|
||||||
|
"\n",
|
||||||
|
"status = poller.status()\n",
|
||||||
|
"if status != \"Succeeded\":\n",
|
||||||
|
" raise DeploymentException(status)\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"Endpoint creation succeeded\")\n",
|
||||||
|
" endpoint = poller.result()\n",
|
||||||
|
" print(endpoint)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 3. Create Batch Deployment\n",
|
||||||
|
"\n",
|
||||||
|
"We can create the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/create_batch_deployment.png\" width = \"700px\" alt=\"Create batch deployment cli vs sdk\">\n",
|
||||||
|
"</center>\n",
|
||||||
|
"\n",
|
||||||
|
"Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668247892781
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create batch deployment\n",
|
||||||
|
"from azure.ai.ml.entities import BatchDeployment, Model, Environment\n",
|
||||||
|
"from azure.ai.ml.constants import BatchDeploymentOutputAction\n",
|
||||||
|
"\n",
|
||||||
|
"model = \"taxi-model@latest\"\n",
|
||||||
|
"\n",
|
||||||
|
"batch_deployment = BatchDeployment(\n",
|
||||||
|
" name=\"taxi-batch-dp\",\n",
|
||||||
|
" description=\"this is a sample batch deployment\",\n",
|
||||||
|
" endpoint_name=endpoint_name,\n",
|
||||||
|
" model=model,\n",
|
||||||
|
" compute=\"batch-cluster\",\n",
|
||||||
|
" instance_count=2,\n",
|
||||||
|
" max_concurrency_per_instance=2,\n",
|
||||||
|
" mini_batch_size=10,\n",
|
||||||
|
" output_action=BatchDeploymentOutputAction.APPEND_ROW,\n",
|
||||||
|
" output_file_name=\"predictions.csv\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"poller = ml_client.begin_create_or_update(batch_deployment)\n",
|
||||||
|
"poller.wait()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Set deployment as the default deployment in the endpoint:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668249096086
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"batch_endpoint = ml_client.batch_endpoints.get(endpoint_name)\n",
|
||||||
|
"batch_endpoint.defaults.deployment_name = batch_deployment.name\n",
|
||||||
|
"poller = ml_client.batch_endpoints.begin_create_or_update(batch_endpoint)\n",
|
||||||
|
"poller.wait()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 4. Invoke and Test Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"We can invoke the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/invoke_batch_deployment.png\" width = \"700px\" alt=\"Invoke batch deployment cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668689480461
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\u001b[32mUploading taxi-batch.csv\u001b[32m (< 1 MB): 100%|██████████| 133k/133k [00:00<00:00, 7.89MB/s]\n",
|
||||||
|
"\u001b[39m\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"<azure.ai.ml._restclient.v2020_09_01_dataplanepreview.models._models_py3.BatchJobResource at 0x7f085c160460>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# invoke and test endpoint\n",
|
||||||
|
"from azure.ai.ml import Input\n",
|
||||||
|
"from azure.ai.ml.constants import AssetTypes, InputOutputModes\n",
|
||||||
|
"\n",
|
||||||
|
"input = Input(path=\"../../data/taxi-batch.csv\", \n",
|
||||||
|
" type=AssetTypes.URI_FILE, \n",
|
||||||
|
" mode=InputOutputModes.DOWNLOAD)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# invoke the endpoint for batch scoring job\n",
|
||||||
|
"ml_client.batch_endpoints.invoke(\n",
|
||||||
|
" endpoint_name=endpoint_name,\n",
|
||||||
|
" input=input,\n",
|
||||||
|
" deployment_name=\"taxi-batch-dp\"\n",
|
||||||
|
")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernel_info": {
|
||||||
|
"name": "python310-sdkv2"
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.10 - SDK V2",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python310-sdkv2"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
|
},
|
||||||
|
"microsoft": {
|
||||||
|
"host": {
|
||||||
|
"AzureML": {
|
||||||
|
"notebookHasBeenCompleted": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
|
@ -0,0 +1,356 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# import required libraries\n",
|
||||||
|
"from azure.ai.ml import MLClient, command, Input, Output, load_component\n",
|
||||||
|
"from azure.identity import DefaultAzureCredential\n",
|
||||||
|
"from azure.ai.ml.entities import Data, Environment, ManagedOnlineEndpoint\n",
|
||||||
|
"from azure.ai.ml.constants import AssetTypes, InputOutputModes\n",
|
||||||
|
"from azure.ai.ml.dsl import pipeline"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Enter details of your AML workspace\n",
|
||||||
|
"subscription_id = \"<subscription_id>\"\n",
|
||||||
|
"resource_group = \"<resouce_group_name>\"\n",
|
||||||
|
"workspace = \"<workspace_name>\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"gather": {
|
||||||
|
"logged": 1670200031039
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get a handle to the workspace\n",
|
||||||
|
"ml_client = MLClient(\n",
|
||||||
|
" DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Online Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.\n",
|
||||||
|
"\n",
|
||||||
|
"An **endpoint** is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:\n",
|
||||||
|
"* Authentication using \"key & token\" based auth\n",
|
||||||
|
"* SSL termination\n",
|
||||||
|
"* A stable scoring URI (endpoint-name.region.inference.ml.azure.com)\n",
|
||||||
|
"\n",
|
||||||
|
"A **deployment** is a set of resources required for hosting the model that does the actual inferencing.\n",
|
||||||
|
"A single endpoint can contain multiple deployments.\n",
|
||||||
|
"\n",
|
||||||
|
"Features of the managed online endpoint:\n",
|
||||||
|
"\n",
|
||||||
|
"* **Test and deploy locally** for faster debugging\n",
|
||||||
|
"* Traffic to one deployment can also be **mirrored** (copied) to another deployment.\n",
|
||||||
|
"* **Application Insights integration**\n",
|
||||||
|
"* Security\n",
|
||||||
|
"* Authentication: Key and Azure ML Tokens\n",
|
||||||
|
"* Automatic Autoscaling\n",
|
||||||
|
"* Visual Studio Code debugging\n",
|
||||||
|
"\n",
|
||||||
|
"**blue-green deployment**: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/endpoint_concept.png\" width = \"500px\" alt=\"Online Endpoint Concept cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## 1. Create Online Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"We can create an **online endpoint** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/create_online_endpoint.png\" width = \"700px\" alt=\"Create Online Endpoint cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"gather": {
|
||||||
|
"logged": 1669584576485
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false,
|
||||||
|
"source_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azure.ai.ml.entities import ManagedOnlineEndpoint\n",
|
||||||
|
"import random\n",
|
||||||
|
"\n",
|
||||||
|
"rand = random.randint(0, 10000)\n",
|
||||||
|
"\n",
|
||||||
|
"endpoint_name = f\"taxi-online-endpoint-{rand}\"\n",
|
||||||
|
"# create an online endpoint\n",
|
||||||
|
"online_endpoint = ManagedOnlineEndpoint(\n",
|
||||||
|
" name=endpoint_name, \n",
|
||||||
|
" description=\"Taxi online endpoint\",\n",
|
||||||
|
" auth_mode=\"aml_token\",\n",
|
||||||
|
")\n",
|
||||||
|
"poller = ml_client.online_endpoints.begin_create_or_update(\n",
|
||||||
|
" online_endpoint, \n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"poller.wait()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Endpoint creation succeeded\n",
|
||||||
|
"ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://taxi-online-endpoint-5807.eastus2.inference.ml.azure.com/score', 'openapi_uri': 'https://taxi-online-endpoint-5807.eastus2.inference.ml.azure.com/swagger.json', 'name': 'taxi-online-endpoint-5807', 'description': 'Taxi online endpoint', 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourcegroups/mldemorg/providers/microsoft.machinelearningservices/workspaces/mldemo/onlineendpoints/taxi-online-endpoint-5807', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/oe:00187fbf-e9d1-40fe-becd-8d9bd1713ab3:a7f35f02-2493-40f0-8452-35cd3a20cb73?api-version=2022-02-01-preview'}, 'id': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourceGroups/mldemorg/providers/Microsoft.MachineLearningServices/workspaces/mldemo/onlineEndpoints/taxi-online-endpoint-5807', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jomedin2/code/Users/jomedin/mlops-v2/ml-pipelines/sdk', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f06d308df30>, 'auth_mode': 'aml_token', 'location': 'eastus2', 'identity': <azure.ai.ml.entities._credentials.IdentityConfiguration object at 0x7f06c93f9690>, 'traffic': {}, 'mirror_traffic': {}, 'kind': 'Managed'})\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from azure.ai.ml.exceptions import DeploymentException\n",
|
||||||
|
"\n",
|
||||||
|
"status = poller.status()\n",
|
||||||
|
"if status != \"Succeeded\":\n",
|
||||||
|
" raise DeploymentException(status)\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"Endpoint creation succeeded\")\n",
|
||||||
|
" endpoint = poller.result()\n",
|
||||||
|
" print(endpoint)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## 2. Create Online Deployment\n",
|
||||||
|
"\n",
|
||||||
|
"To create a deployment to online endpoint, you need to specify the following elements:\n",
|
||||||
|
"\n",
|
||||||
|
"* Model files (or specify a registered model in your workspace)\n",
|
||||||
|
"* Scoring script - code needed to do scoring/inferencing\n",
|
||||||
|
"* Environment - a Docker image with Conda dependencies, or a dockerfile\n",
|
||||||
|
"* Compute instance & scale settings\n",
|
||||||
|
"\n",
|
||||||
|
"Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.\n",
|
||||||
|
"\n",
|
||||||
|
"We can create an **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/create_online_deployment.png\" width = \"700px\" alt=\"Create Online Deployment cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false,
|
||||||
|
"gather": {
|
||||||
|
"logged": 1669584886619
|
||||||
|
},
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Check: endpoint taxi-online-endpoint-5807 exists\n",
|
||||||
|
"data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"....................................................................................."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# create online deployment\n",
|
||||||
|
"from azure.ai.ml.entities import ManagedOnlineDeployment, Model, Environment\n",
|
||||||
|
"\n",
|
||||||
|
"blue_deployment = ManagedOnlineDeployment(\n",
|
||||||
|
" name=\"blue\",\n",
|
||||||
|
" endpoint_name=endpoint_name,\n",
|
||||||
|
" model=\"taxi-model@latest\",\n",
|
||||||
|
" instance_type=\"Standard_DS2_v2\",\n",
|
||||||
|
" instance_count=1,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"poller = ml_client.online_deployments.begin_create_or_update(\n",
|
||||||
|
" deployment=blue_deployment\n",
|
||||||
|
")\n",
|
||||||
|
"poller.wait()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 3. Allocate Traffic"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1670199946158
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# allocate traffic\n",
|
||||||
|
"# blue deployment takes 100 traffic\n",
|
||||||
|
"online_endpoint.traffic = {\"blue\": 100}\n",
|
||||||
|
"poller = ml_client.begin_create_or_update(online_endpoint)\n",
|
||||||
|
"poller.wait()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 4. Invoke and Test Endpoint\n",
|
||||||
|
"\n",
|
||||||
|
"We can invoke the **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
||||||
|
"\n",
|
||||||
|
"<center>\n",
|
||||||
|
"<img src=\"../../imgs/invoke_online_endpoint.png\" width = \"700px\" alt=\"Invoke online endpoint cli vs sdk\">\n",
|
||||||
|
"</center>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {
|
||||||
|
"gather": {
|
||||||
|
"logged": 1668246829854
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'[11.928738280516184, 15.403240743572406]'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# invoke and test endpoint\n",
|
||||||
|
"ml_client.online_endpoints.invoke(\n",
|
||||||
|
" endpoint_name=endpoint_name,\n",
|
||||||
|
" request_file=\"../../data/taxi-request.json\",\n",
|
||||||
|
")\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernel_info": {
|
||||||
|
"name": "python310-sdkv2"
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.10 - SDK V2",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python310-sdkv2"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
|
},
|
||||||
|
"microsoft": {
|
||||||
|
"host": {
|
||||||
|
"AzureML": {
|
||||||
|
"notebookHasBeenCompleted": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,605 +0,0 @@
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"For this workshop, you need:\n",
|
|
||||||
"\n",
|
|
||||||
"* An Azure Machine Learning workspace. \n",
|
|
||||||
"* The Azure Machine Learning CLI v2 installed.\n",
|
|
||||||
"\n",
|
|
||||||
"To install the CLI you can either,\n",
|
|
||||||
"\n",
|
|
||||||
"Create a compute instance, which already has installed the latest AzureML CLI and is pre-configured for ML workflows.\n",
|
|
||||||
"\n",
|
|
||||||
"Use the followings commands to install Azure ML CLI v2:\n",
|
|
||||||
"\n",
|
|
||||||
"```bash\n",
|
|
||||||
"az extension add --name ml\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"!az extension add --name ml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"!az login"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"# Model Training"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## 1. Create Managed Compute\n",
|
|
||||||
"\n",
|
|
||||||
"A compute is a designated compute resource where you run your job or host your endpoint. Azure Machine learning supports the following types of compute:\n",
|
|
||||||
"\n",
|
|
||||||
"- **Compute instance** - a fully configured and managed development environment in the cloud. You can use the instance as a training or inference compute for development and testing. It's similar to a virtual machine on the cloud.\n",
|
|
||||||
"\n",
|
|
||||||
"- **Compute cluster** - a managed-compute infrastructure that allows you to easily create a cluster of CPU or GPU compute nodes in the cloud.\n",
|
|
||||||
"\n",
|
|
||||||
"- **Inference cluster** - used to deploy trained machine learning models to Azure Kubernetes Service. You can create an Azure Kubernetes Service (AKS) cluster from your Azure ML workspace, or attach an existing AKS cluster.\n",
|
|
||||||
"\n",
|
|
||||||
"- **Attached compute** - You can attach your own compute resources to your workspace and use them for training and inference.\n",
|
|
||||||
"\n",
|
|
||||||
"You can create a compute using the Studio, the cli and the sdk.\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can create a **compute instance** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_compute_instance.png\" width = \"700px\" alt=\"Create Compute Instance cli vs sdk\">\n",
|
|
||||||
"</center>\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can create a **compute cluster** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_compute_cluster.png\" width = \"700px\" alt=\"Create Compute Instance cli vs sdk\">\n",
|
|
||||||
"</center>\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Let's create a managed compute cluster for the training workload."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Create train job compute cluster\n",
|
|
||||||
"!az ml compute create --file train/compute.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## 2. Register Data Asset\n",
|
|
||||||
"\n",
|
|
||||||
"**Datastore** - Azure Machine Learning Datastores securely keep the connection information to your data storage on Azure, so you don't have to code it in your scripts.\n",
|
|
||||||
"\n",
|
|
||||||
"An Azure Machine Learning datastore is a **reference** to an **existing** storage account on Azure. The benefits of creating and using a datastore are:\n",
|
|
||||||
"* A common and easy-to-use API to interact with different storage type. \n",
|
|
||||||
"* Easier to discover useful datastores when working as a team.\n",
|
|
||||||
"* When using credential-based access (service principal/SAS/key), the connection information is secured so you don't have to code it in your scripts.\n",
|
|
||||||
"\n",
|
|
||||||
"Supported Data Resources: \n",
|
|
||||||
"\n",
|
|
||||||
"* Azure Storage blob container\n",
|
|
||||||
"* Azure Storage file share\n",
|
|
||||||
"* Azure Data Lake Gen 1\n",
|
|
||||||
"* Azure Data Lake Gen 2\n",
|
|
||||||
"* Azure SQL Database \n",
|
|
||||||
"* Azure PostgreSQL Database\n",
|
|
||||||
"* Azure MySQL Database\n",
|
|
||||||
"\n",
|
|
||||||
"It is not a requirement to use Azure Machine Learning datastores - you can use storage URIs directly assuming you have access to the underlying data.\n",
|
|
||||||
"\n",
|
|
||||||
"You can create a datastore using the Studio, the cli and the sdk.\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can create a **datastore** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_datastore.png\" width = \"700px\" alt=\"Create Datastore cli vs sdk\">\n",
|
|
||||||
"</center>\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"**Data asset** - Create data assets in your workspace to share with team members, version, and track data lineage.\n",
|
|
||||||
"\n",
|
|
||||||
"By creating a data asset, you create a reference to the data source location, along with a copy of its metadata. \n",
|
|
||||||
"\n",
|
|
||||||
"The benefits of creating data assets are:\n",
|
|
||||||
"\n",
|
|
||||||
"* You can **share and reuse data** with other members of the team such that they do not need to remember file locations.\n",
|
|
||||||
"* You can **seamlessly access data** during model training (on any supported compute type) without worrying about connection strings or data paths.\n",
|
|
||||||
"* You can **version** the data.\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can create a **data asset** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_data_asset.png\" width = \"700px\" alt=\"Create Data Asset cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Register data asset \n",
|
|
||||||
"!az ml data create --file train/data.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## 3. Register Train Environment\n",
|
|
||||||
"\n",
|
|
||||||
"Azure Machine Learning environments define the execution environments for your **jobs** or **deployments** and encapsulate the dependencies for your code. \n",
|
|
||||||
"\n",
|
|
||||||
"Azure ML uses the environment specification to create the Docker container that your **training** or **scoring code** runs in on the specified compute target.\n",
|
|
||||||
"\n",
|
|
||||||
"Create an environment from a\n",
|
|
||||||
"* conda specification\n",
|
|
||||||
"* Docker image\n",
|
|
||||||
"* Docker build context\n",
|
|
||||||
"\n",
|
|
||||||
"There are two types of environments in Azure ML: **curated** and **custom environments**. Curated environments are predefined environments containing popular ML frameworks and tooling. Custom environments are user-defined.\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can register an **environment** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_environment.png\" width = \"700px\" alt=\"Create Environment cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Register train environment \n",
|
|
||||||
"!az ml environment create --file train/environment.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## 4. Create Pipeline Job\n",
|
|
||||||
"\n",
|
|
||||||
"**AML Job**:\n",
|
|
||||||
"\n",
|
|
||||||
"Azure ML provides several ways to train your models, from code-first solutions to low-code solutions:\n",
|
|
||||||
"\n",
|
|
||||||
"* Azure ML supports script files in python, R, Java, Julia or C#. All you need to learn is YAML format and command lines to use Azure ML.\n",
|
|
||||||
"\n",
|
|
||||||
"* Distributed Training: AML supports integrations with popular frameworks, PyTorch and TensorFlow. Both frameworks employ data parallelism & model parallelism for distributed training.\n",
|
|
||||||
"\n",
|
|
||||||
"* Automated ML - Train models without extensive data science or programming knowledge.\n",
|
|
||||||
"\n",
|
|
||||||
"* Designer - drag and drop web-based UI.\n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can submit a **job** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_job.png\" width = \"700px\" alt=\"Create Job cli vs sdk\">\n",
|
|
||||||
"</center>\n",
|
|
||||||
"\n",
|
|
||||||
"<br>\n",
|
|
||||||
" \n",
|
|
||||||
"**AML Pipelines**:\n",
|
|
||||||
"\n",
|
|
||||||
"An AML pipeline is an independently executable workflow of a complete machine learning task. It helps standardizing the best practices of producing a machine learning model: The core of a machine learning pipeline is to split a complete machine learning task into a multistep workflow. Each step is a manageable component that can be developed, optimized, configured, and automated individually. \n",
|
|
||||||
"\n",
|
|
||||||
"<hr>\n",
|
|
||||||
"\n",
|
|
||||||
"We can submit a **pipeline job** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_pipeline.png\" width = \"700px\" alt=\"Create Pipeline cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Create pipeline job\n",
|
|
||||||
"!az ml job create --file train/pipeline.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"# Online Endpoint\n",
|
|
||||||
"\n",
|
|
||||||
"Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.\n",
|
|
||||||
"\n",
|
|
||||||
"An **endpoint** is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:\n",
|
|
||||||
"* Authentication using \"key & token\" based auth\n",
|
|
||||||
"* SSL termination\n",
|
|
||||||
"* A stable scoring URI (endpoint-name.region.inference.ml.azure.com)\n",
|
|
||||||
"\n",
|
|
||||||
"A **deployment** is a set of resources required for hosting the model that does the actual inferencing.\n",
|
|
||||||
"A single endpoint can contain multiple deployments.\n",
|
|
||||||
"\n",
|
|
||||||
"Features of the managed online endpoint:\n",
|
|
||||||
"\n",
|
|
||||||
"* **Test and deploy locally** for faster debugging\n",
|
|
||||||
"* Traffic to one deployment can also be **mirrored** (copied) to another deployment.\n",
|
|
||||||
"* **Application Insights integration**\n",
|
|
||||||
"* Security\n",
|
|
||||||
"* Authentication: Key and Azure ML Tokens\n",
|
|
||||||
"* Automatic Autoscaling\n",
|
|
||||||
"* Visual Studio Code debugging\n",
|
|
||||||
"\n",
|
|
||||||
"**blue-green deployment**: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/endpoint_concept.png\" width = \"500px\" alt=\"Online Endpoint Concept cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## 1. Create Online Endpoint\n",
|
|
||||||
"\n",
|
|
||||||
"We can create an **online endpoint** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_online_endpoint.png\" width = \"700px\" alt=\"Create Online Endpoint cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create online endpoint\n",
|
|
||||||
"!az ml online-endpoint create --file deploy/online/online-endpoint.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 2. Create Online Deployment\n",
|
|
||||||
"\n",
|
|
||||||
"To create a deployment to online endpoint, you need to specify the following elements:\n",
|
|
||||||
"\n",
|
|
||||||
"* Model files (or specify a registered model in your workspace)\n",
|
|
||||||
"* Scoring script - code needed to do scoring/inferencing\n",
|
|
||||||
"* Environment - a Docker image with Conda dependencies, or a dockerfile\n",
|
|
||||||
"* Compute instance & scale settings\n",
|
|
||||||
"\n",
|
|
||||||
"Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.\n",
|
|
||||||
"\n",
|
|
||||||
"We can create an **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_online_deployment.png\" width = \"700px\" alt=\"Create Online Deployment cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"jupyter": {
|
|
||||||
"outputs_hidden": false,
|
|
||||||
"source_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create online deployment\n",
|
|
||||||
"!az ml online-deployment create --file deploy/online/online-deployment.yml "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 3. Allocate Traffic"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# allocate traffic\n",
|
|
||||||
"!az ml online-endpoint update --name taxi-online-endpoint --traffic blue=100"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 4. Invoke and Test Endpoint\n",
|
|
||||||
"\n",
|
|
||||||
"We can invoke the **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/invoke_online_endpoint.png\" width = \"700px\" alt=\"Invoke online endpoint cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# invoke and test endpoint\n",
|
|
||||||
"!az ml online-endpoint invoke --name taxi-online-endpoint --request-file ../../data/taxi-request.json"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Model Batch Endpoint"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 1. Create Batch Compute Cluster"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create compute cluster to be used by batch cluster\n",
|
|
||||||
"!az ml compute create -n batch-cluster --type amlcompute --min-instances 0 --max-instances 3"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 2. Create Batch Endpoint\n",
|
|
||||||
"\n",
|
|
||||||
"We can create the **batch endpoint** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_batch_endpoint.png\" width = \"700px\" alt=\"Create batch endpoint cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create batch endpoint\n",
|
|
||||||
"!az ml batch-endpoint create --file deploy/batch/batch-endpoint.yml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 3. Create Batch Deployment\n",
|
|
||||||
"\n",
|
|
||||||
"We can create the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/create_batch_deployment.png\" width = \"700px\" alt=\"Create batch deployment cli vs sdk\">\n",
|
|
||||||
"</center>\n",
|
|
||||||
"\n",
|
|
||||||
"Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# create batch deployment\n",
|
|
||||||
"!az ml batch-deployment create --file deploy/batch/batch-deployment.yml --set-default"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 4. Invoke and Test Endpoint\n",
|
|
||||||
"\n",
|
|
||||||
"We can invoke the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
|
|
||||||
"\n",
|
|
||||||
"<center>\n",
|
|
||||||
"<img src=\"../../imgs/invoke_batch_deployment.png\" width = \"700px\" alt=\"Invoke batch deployment cli vs sdk\">\n",
|
|
||||||
"</center>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# invoke and test endpoint\n",
|
|
||||||
"!az ml batch-endpoint invoke --name taxi-batch-endpoint --input ../../data/taxi-batch.csv"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernel_info": {
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.5"
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"version": "nteract-front-end@1.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,66 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
variables:
|
|
||||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'main' branch: PRD environment
|
|
||||||
- template: ../../config-infra-prod.yml
|
|
||||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'develop' or feature branches: DEV environment
|
|
||||||
- template: ../../config-infra-dev.yml
|
|
||||||
- name: version
|
|
||||||
value: aml-cli-v2
|
|
||||||
- name: endpoint_name
|
|
||||||
value: taxi-batch-$(namespace)$(postfix)$(environment)
|
|
||||||
- name: endpoint_type
|
|
||||||
value: batch
|
|
||||||
|
|
||||||
trigger:
|
|
||||||
- none
|
|
||||||
|
|
||||||
pool:
|
|
||||||
vmImage: ubuntu-20.04
|
|
||||||
|
|
||||||
|
|
||||||
resources:
|
|
||||||
repositories:
|
|
||||||
- repository: mlops-templates # Template Repo
|
|
||||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
|
||||||
endpoint: github-connection # need to set up and hardcode
|
|
||||||
type: github
|
|
||||||
ref: main
|
|
||||||
|
|
||||||
|
|
||||||
stages:
|
|
||||||
- stage: CreateBatchEndpoint
|
|
||||||
displayName: Create/Update Batch Endpoint
|
|
||||||
jobs:
|
|
||||||
- job: DeployBatchEndpoint
|
|
||||||
steps:
|
|
||||||
- checkout: self
|
|
||||||
path: s/
|
|
||||||
- checkout: mlops-templates
|
|
||||||
path: s/templates/
|
|
||||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
cluster_name: batch-cluster # name must match cluster name in deployment file below
|
|
||||||
size: STANDARD_DS3_V2
|
|
||||||
min_instances: 0
|
|
||||||
max_instances: 5
|
|
||||||
cluster_tier: dedicated
|
|
||||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
|
|
||||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
deployment_name: taxi-batch-dp
|
|
||||||
deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
|
|
||||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
deployment_name: taxi-batch-dp
|
|
||||||
sample_request: data/taxi-batch.csv
|
|
||||||
request_type: uri_file #either uri_folder or uri_file
|
|
||||||
|
|
|
@ -1,59 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
variables:
|
|
||||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'main' branch: PRD environment
|
|
||||||
- template: ../../config-infra-prod.yml
|
|
||||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'develop' or feature branches: DEV environment
|
|
||||||
- template: ../../config-infra-dev.yml
|
|
||||||
- name: version
|
|
||||||
value: aml-cli-v2
|
|
||||||
|
|
||||||
resources:
|
|
||||||
repositories:
|
|
||||||
- repository: mlops-templates # Template Repo
|
|
||||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
|
||||||
endpoint: github-connection # need to set up and hardcode
|
|
||||||
type: github
|
|
||||||
ref: main
|
|
||||||
|
|
||||||
trigger:
|
|
||||||
- none
|
|
||||||
|
|
||||||
pool:
|
|
||||||
vmImage: ubuntu-20.04
|
|
||||||
|
|
||||||
stages:
|
|
||||||
- stage: DeployTrainingPipeline
|
|
||||||
displayName: Deploy Training Pipeline
|
|
||||||
jobs:
|
|
||||||
- job: DeployTrainingPipeline
|
|
||||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
|
||||||
steps:
|
|
||||||
- checkout: self
|
|
||||||
path: s/
|
|
||||||
- checkout: mlops-templates
|
|
||||||
path: s/templates/
|
|
||||||
- template: templates/tests/unit-tests.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
build_type: conda
|
|
||||||
environment_name: taxi-train-env
|
|
||||||
environment_file: mlops/azureml/train/environment.yml
|
|
||||||
enable_monitoring: $(enable_monitoring)
|
|
||||||
- template: templates/${{ variables.version }}/register-data.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
data_type: uri_file
|
|
||||||
data_name: taxi-data
|
|
||||||
data_file: mlops/azureml/train/data.yml
|
|
||||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
|
||||||
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
|
|
||||||
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
|
|
||||||
enable_monitoring: $(enable_monitoring)
|
|
|
@ -1,61 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
||||||
# Licensed under the MIT License.
|
|
||||||
|
|
||||||
variables:
|
|
||||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'main' branch: PRD environment
|
|
||||||
- template: ../../config-infra-prod.yml
|
|
||||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
|
||||||
# 'develop' or feature branches: DEV environment
|
|
||||||
- template: ../../../../config-infra-dev.yml
|
|
||||||
- name: version
|
|
||||||
value: aml-cli-v2
|
|
||||||
- name: endpoint_name
|
|
||||||
value: taxi-online-$(namespace)$(postfix)$(environment)
|
|
||||||
- name: endpoint_type
|
|
||||||
value: online
|
|
||||||
|
|
||||||
|
|
||||||
trigger:
|
|
||||||
- none
|
|
||||||
|
|
||||||
pool:
|
|
||||||
vmImage: ubuntu-20.04
|
|
||||||
|
|
||||||
|
|
||||||
resources:
|
|
||||||
repositories:
|
|
||||||
- repository: mlops-templates # Template Repo
|
|
||||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
|
||||||
endpoint: github-connection # need to set up and hardcode
|
|
||||||
type: github
|
|
||||||
ref: main
|
|
||||||
|
|
||||||
stages:
|
|
||||||
- stage: CreateOnlineEndpoint
|
|
||||||
displayName: Create/Update Online Endpoint
|
|
||||||
jobs:
|
|
||||||
- job: DeployOnlineEndpoint
|
|
||||||
steps:
|
|
||||||
- checkout: self
|
|
||||||
path: s/
|
|
||||||
- checkout: mlops-templates
|
|
||||||
path: s/templates/
|
|
||||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
|
||||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
|
|
||||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
deployment_name: taxi-online-dp
|
|
||||||
deployment_file: mlops/azureml/deploy/online/online-deployment.yml
|
|
||||||
- template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
traffic_allocation: taxi-online-dp=100
|
|
||||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
|
||||||
parameters:
|
|
||||||
deployment_name: taxi-online-dp
|
|
||||||
sample_request: data/taxi-request.json
|
|
||||||
request_type: json
|
|
|
@ -1,4 +0,0 @@
|
||||||
black==22.3.0
|
|
||||||
flake8==4.0.1
|
|
||||||
isort==5.10.1
|
|
||||||
pre-commit==2.19.0
|
|
Загрузка…
Ссылка в новой задаче