|
@ -0,0 +1,96 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-cloud-workstation
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */8 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-cloud-workstation.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: pip install mlflow reqs
|
||||
run: pip install -r sdk/python/mlflow-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/cloud-workstation.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "cloud-workstation.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python cloud-workstation.ipynb cloud-workstation.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cloud-workstation
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/cloud-workstation.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/cloud-workstation.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/cloud-workstation.ipynb[${{ github.ref_name }}]"
|
|
@ -0,0 +1,94 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-deploy-model
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */8 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-deploy-model.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/deploy-model.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "deploy-model.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python deploy-model.ipynb deploy-model.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: deploy-model
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/deploy-model.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/deploy-model.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/deploy-model.ipynb[${{ github.ref_name }}]"
|
|
@ -0,0 +1,100 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-explore-data
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */12 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-explore-data.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/explore-data.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "explore-data.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
|
||||
# load data into 'data' subdirectory
|
||||
mkdir data
|
||||
cd data
|
||||
wget https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv
|
||||
cd ..
|
||||
papermill -k python explore-data.ipynb explore-data.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: explore-data
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/explore-data.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/explore-data.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/explore-data.ipynb[${{ github.ref_name }}]"
|
|
@ -0,0 +1,96 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-pipeline
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */8 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-pipeline.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: pip install mlflow reqs
|
||||
run: pip install -r sdk/python/mlflow-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/pipeline.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "pipeline.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python pipeline.ipynb pipeline.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: pipeline
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/pipeline.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/pipeline.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/pipeline.ipynb[${{ github.ref_name }}]"
|
|
@ -0,0 +1,96 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-quickstart
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */8 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-quickstart.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: pip install mlflow reqs
|
||||
run: pip install -r sdk/python/mlflow-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/quickstart.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "quickstart.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python quickstart.ipynb quickstart.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: quickstart
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/quickstart.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/quickstart.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/quickstart.ipynb[${{ github.ref_name }}]"
|
|
@ -0,0 +1,96 @@
|
|||
# This code is autogenerated.
|
||||
# Code is generated by running custom script: python3 readme.py
|
||||
# Any manual changes to this file may cause incorrect behavior.
|
||||
# Any manual changes will be overwritten if the code is regenerated.
|
||||
|
||||
name: tutorials-get-started-notebooks-train-model
|
||||
# This file is created by tutorials/readme.py.
|
||||
# Please do not edit directly.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 */8 * * *"
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- tutorials/get-started-notebooks/**
|
||||
- .github/workflows/tutorials-get-started-notebooks-train-model.yml
|
||||
- sdk/python/dev-requirements.txt
|
||||
- infra/**
|
||||
- sdk/python/setup.sh
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
- name: setup python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.8"
|
||||
- name: pip install notebook reqs
|
||||
run: pip install -r sdk/python/dev-requirements.txt
|
||||
- name: pip install mlflow reqs
|
||||
run: pip install -r sdk/python/mlflow-requirements.txt
|
||||
- name: azure login
|
||||
uses: azure/login@v1
|
||||
with:
|
||||
creds: ${{secrets.AZUREML_CREDENTIALS}}
|
||||
- name: bootstrap resources
|
||||
run: |
|
||||
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
|
||||
bash bootstrap.sh
|
||||
working-directory: infra
|
||||
continue-on-error: false
|
||||
- name: setup SDK
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: sdk/python
|
||||
continue-on-error: true
|
||||
- name: setup-cli
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash setup.sh
|
||||
working-directory: cli
|
||||
continue-on-error: true
|
||||
- name: run get-started-notebooks/train-model.ipynb
|
||||
run: |
|
||||
source "${{ github.workspace }}/infra/sdk_helpers.sh";
|
||||
source "${{ github.workspace }}/infra/init_environment.sh";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
|
||||
bash "${{ github.workspace }}/infra/sdk_helpers.sh" replace_template_values "train-model.ipynb";
|
||||
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
|
||||
papermill -k python train-model.ipynb train-model.output.ipynb
|
||||
working-directory: tutorials/get-started-notebooks
|
||||
- name: upload notebook's working folder as an artifact
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: train-model
|
||||
path: tutorials/get-started-notebooks
|
||||
|
||||
- name: Send IcM on failure
|
||||
if: ${{ failure() && github.ref_type == 'branch' && (github.ref_name == 'main' || contains(github.ref_name, 'release')) }}
|
||||
uses: ./.github/actions/generate-icm
|
||||
with:
|
||||
host: ${{ secrets.AZUREML_ICM_CONNECTOR_HOST_NAME }}
|
||||
connector_id: ${{ secrets.AZUREML_ICM_CONNECTOR_CONNECTOR_ID }}
|
||||
certificate: ${{ secrets.AZUREML_ICM_CONNECTOR_CERTIFICATE }}
|
||||
private_key: ${{ secrets.AZUREML_ICM_CONNECTOR_PRIVATE_KEY }}
|
||||
args: |
|
||||
incident:
|
||||
Title: "[azureml-examples] Notebook validation failed on branch '${{ github.ref_name }}' for notebook 'get-started-notebooks/train-model.ipynb'"
|
||||
Summary: |
|
||||
Notebook 'get-started-notebooks/train-model.ipynb' is failing on branch '${{ github.ref_name }}': ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
Severity: 4
|
||||
RoutingId: "github://azureml-examples"
|
||||
Status: Active
|
||||
Source:
|
||||
IncidentId: "get-started-notebooks/train-model.ipynb[${{ github.ref_name }}]"
|
|
@ -40,6 +40,12 @@ Test Status is for branch - **_main_**
|
|||
|azureml-in-a-day|[azureml-in-a-day](azureml-in-a-day/azureml-in-a-day.ipynb)|Learn how a data scientist uses Azure Machine Learning (Azure ML) to train a model, then use the model for prediction. This tutorial will help you become familiar with the core concepts of Azure ML and their most common usage.|[![azureml-in-a-day](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-azureml-in-a-day-azureml-in-a-day.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-azureml-in-a-day-azureml-in-a-day.yml)|
|
||||
|e2e-distributed-pytorch-image|[e2e-object-classification-distributed-pytorch](e2e-distributed-pytorch-image/e2e-object-classification-distributed-pytorch.ipynb)|Prepare data, test and run a multi-node multi-gpu pytorch job. Use mlflow to analyze your metrics|[![e2e-object-classification-distributed-pytorch](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-e2e-distributed-pytorch-image-e2e-object-classification-distributed-pytorch.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-e2e-distributed-pytorch-image-e2e-object-classification-distributed-pytorch.yml)|
|
||||
|e2e-ds-experience|[e2e-ml-workflow](e2e-ds-experience/e2e-ml-workflow.ipynb)|Create production ML pipelines with Python SDK v2 in a Jupyter notebook|[![e2e-ml-workflow](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-e2e-ds-experience-e2e-ml-workflow.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-e2e-ds-experience-e2e-ml-workflow.yml)|
|
||||
|get-started-notebooks|[cloud-workstation](get-started-notebooks/cloud-workstation.ipynb)|*no description*|[![cloud-workstation](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-cloud-workstation.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-cloud-workstation.yml)|
|
||||
|get-started-notebooks|[deploy-model](get-started-notebooks/deploy-model.ipynb)|*no description*|[![deploy-model](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-deploy-model.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-deploy-model.yml)|
|
||||
|get-started-notebooks|[explore-data](get-started-notebooks/explore-data.ipynb)|*no description*|[![explore-data](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-explore-data.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-explore-data.yml)|
|
||||
|get-started-notebooks|[pipeline](get-started-notebooks/pipeline.ipynb)|Create production ML pipelines with Python SDK v2 in a Jupyter notebook|[![pipeline](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-pipeline.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-pipeline.yml)|
|
||||
|get-started-notebooks|[quickstart](get-started-notebooks/quickstart.ipynb)|*no description*|[![quickstart](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-quickstart.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-quickstart.yml)|
|
||||
|get-started-notebooks|[train-model](get-started-notebooks/train-model.ipynb)|*no description*|[![train-model](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-train-model.yml/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/tutorials-get-started-notebooks-train-model.yml)|
|
||||
|
||||
## Contributing
|
||||
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Model development on a cloud workstation\r\n",
|
||||
"\r\n",
|
||||
"This notebook contains just the code cells used in [Tutorial: Model development on a cloud workstation](https://learn.microsoft.com/azure/machine-learning/tutorial-cloud-workstation). See the article for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "import",
|
||||
"gather": {
|
||||
"logged": 1678743642146
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import argparse\n",
|
||||
"import pandas as pd\n",
|
||||
"import mlflow\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||||
"from sklearn.metrics import classification_report\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "load",
|
||||
"gather": {
|
||||
"logged": 1678743643238
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load the data\n",
|
||||
"credit_df = pd.read_csv(\n",
|
||||
" \"https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\",\n",
|
||||
" header=1,\n",
|
||||
" index_col=0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"train_df, test_df = train_test_split(\n",
|
||||
" credit_df,\n",
|
||||
" test_size=0.25,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "extract",
|
||||
"gather": {
|
||||
"logged": 1678743643367
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Extracting the label column\n",
|
||||
"y_train = train_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
"# convert the dataframe values to array\n",
|
||||
"X_train = train_df.values\n",
|
||||
"\n",
|
||||
"# Extracting the label column\n",
|
||||
"y_test = test_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
"# convert the dataframe values to array\n",
|
||||
"X_test = test_df.values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "mlflow",
|
||||
"gather": {
|
||||
"logged": 1678743647526
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set name for logging\n",
|
||||
"mlflow.set_experiment(\"Develop on cloud tutorial\")\n",
|
||||
"# enable autologging with MLflow\n",
|
||||
"mlflow.sklearn.autolog()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "gbt",
|
||||
"gather": {
|
||||
"logged": 1678743686335
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train Gradient Boosting Classifier\n",
|
||||
"print(f\"Training with data of shape {X_train.shape}\")\n",
|
||||
"\n",
|
||||
"mlflow.start_run()\n",
|
||||
"clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)\n",
|
||||
"clf.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"y_pred = clf.predict(X_test)\n",
|
||||
"\n",
|
||||
"print(classification_report(y_test, y_pred))\n",
|
||||
"# Stop logging for this model\n",
|
||||
"mlflow.end_run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "ada",
|
||||
"gather": {
|
||||
"logged": 1678743694416
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train AdaBoost Classifier\n",
|
||||
"from sklearn.ensemble import AdaBoostClassifier\n",
|
||||
"\n",
|
||||
"print(f\"Training with data of shape {X_train.shape}\")\n",
|
||||
"\n",
|
||||
"mlflow.start_run()\n",
|
||||
"ada = AdaBoostClassifier()\n",
|
||||
"\n",
|
||||
"ada.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
"y_pred = ada.predict(X_test)\n",
|
||||
"\n",
|
||||
"print(classification_report(y_test, y_pred))\n",
|
||||
"# Stop logging for this model\n",
|
||||
"mlflow.end_run()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernel_info": {
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"microsoft": {
|
||||
"ms_spell_check": {
|
||||
"ms_spell_check_language": "en"
|
||||
}
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,837 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Deploy a model as an online endpoint\n",
|
||||
"\n",
|
||||
"Learn to deploy a model to an online endpoint, using Azure Machine Learning Python SDK v2.\n",
|
||||
"\n",
|
||||
"In this tutorial, we use a model trained to predict the likelihood of defaulting on a credit card payment. The goal is to deploy this model and show its use.\n",
|
||||
"\n",
|
||||
"The steps you'll take are:\n",
|
||||
"\n",
|
||||
"> * Register your model\n",
|
||||
"> * Create an endpoint and a first deployment\n",
|
||||
"> * Deploy a trial run\n",
|
||||
"> * Manually send test data to the deployment\n",
|
||||
"> * Get details of the deployment\n",
|
||||
"> * Create a second deployment\n",
|
||||
"> * Manually scale the second deployment\n",
|
||||
"> * Update allocation of production traffic between both deployments\n",
|
||||
"> * Get details of the second deployment\n",
|
||||
"> * Roll out the new deployment and delete the first one"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"1. Open in studio and select a compute instance.\n",
|
||||
" * If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n",
|
||||
" \n",
|
||||
" ![Create compute](./media/create-compute.png)\n",
|
||||
" \n",
|
||||
" * If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n",
|
||||
" \n",
|
||||
"1. If you already completed the earlier training tutorial, [Train a model](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-train-model), you can skip to the next prerequisite.\n",
|
||||
"\n",
|
||||
"1. View your VM quota and ensure you have enough quota available to create online deployments. In this tutorial, you will need at least 8 cores of `STANDARD_DS3_v2` and 12 cores of `STANDARD_F4s_v2`. To view your VM quota usage and request quota increases, see [Manage resource quotas](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-quotas#view-your-usage-and-quotas-in-the-azure-portal).\n",
|
||||
"\n",
|
||||
"## Set your kernel\n",
|
||||
"\n",
|
||||
"* If your compute instance is stopped, start it now. \n",
|
||||
" \n",
|
||||
" ![Start compute](./media/start-compute.png)\n",
|
||||
"\n",
|
||||
"* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n",
|
||||
"\n",
|
||||
" ![Set the kernel](./media/set-kernel.png)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Create handle to workspace\n",
|
||||
"\n",
|
||||
"Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n",
|
||||
"\n",
|
||||
"In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n",
|
||||
"\n",
|
||||
"1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n",
|
||||
"1. Copy the value for workspace, resource group and subscription ID into the code.\n",
|
||||
"1. You'll need to copy one value, close the area and paste, then come back for the next one."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1671511884101
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import MLClient\n",
|
||||
"from azure.identity import DefaultAzureCredential\n",
|
||||
"\n",
|
||||
"# authenticate\n",
|
||||
"credential = DefaultAzureCredential()\n",
|
||||
"\n",
|
||||
"# Get a handle to the workspace\n",
|
||||
"ml_client = MLClient(\n",
|
||||
" credential=credential,\n",
|
||||
" subscription_id=\"<SUBSCRIPTION_ID>\",\n",
|
||||
" resource_group_name=\"<RESOURCE_GROUP>\",\n",
|
||||
" workspace_name=\"<AML_WORKSPACE_NAME>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Creating `MLClient` will not connect to the workspace. The client initialization is lazy and will wait for the first time it needs to make a call (in this notebook, that will happen during compute creation).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register the model\n",
|
||||
"\n",
|
||||
"If you already completed the earlier training tutorial, [Train a model](https://learn.microsoft.com/en-us/azure/machine-learning/tutorial-train-model), you've registered an MLflow model as part of the training script and can skip to the next section. \n",
|
||||
"\n",
|
||||
"If you didn't complete the training tutorial, you'll need to register the model. Registering your model before deployment is a recommended best practice.\n",
|
||||
"\n",
|
||||
"In this example, we specify the `path` (where to upload files from) inline. If you [cloned the tutorials folder](https://learn.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources##learn-from-sample-notebooks), then run the following code as-is. Otherwise, [download the files and metadata for the model to deploy](https://azuremlexampledata.blob.core.windows.net/datasets/credit_defaults_model.zip). Update the path to the location on your local computer where you've unzipped the model's files. \n",
|
||||
"\n",
|
||||
"The SDK automatically uploads the files and registers the model. \n",
|
||||
"\n",
|
||||
"For more information on registering your model as an asset, see [Register your model as an asset in Machine Learning by using the SDK](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-models?tabs=use-local%2Ccli#register-your-model-as-an-asset-in-machine-learning-by-using-the-sdk)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1671511925700
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import the necessary libraries\n",
|
||||
"from azure.ai.ml.entities import Model\n",
|
||||
"from azure.ai.ml.constants import AssetTypes\n",
|
||||
"\n",
|
||||
"# Provide the model details, including the\n",
|
||||
"# path to the model files, if you've stored them locally.\n",
|
||||
"mlflow_model = Model(\n",
|
||||
" path=\"./deploy/credit_defaults_model/\",\n",
|
||||
" type=AssetTypes.MLFLOW_MODEL,\n",
|
||||
" name=\"credit_defaults_model\",\n",
|
||||
" description=\"MLflow Model created from local files.\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Register the model\n",
|
||||
"ml_client.models.create_or_update(mlflow_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Confirm that the model is registered\n",
|
||||
"\n",
|
||||
"You can check the **Models** page in [Azure Machine Learning studio](https://ml.azure.com/) to identify the latest version of your registered model.\n",
|
||||
"\n",
|
||||
"![View model](./media/registered-model-in-studio.png)\n",
|
||||
"\n",
|
||||
"Alternatively, the code below will retrieve the latest version number for you to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"registered_model_name = \"credit_defaults_model\"\n",
|
||||
"\n",
|
||||
"# Let's pick the latest version of the model\n",
|
||||
"latest_model_version = max(\n",
|
||||
" [int(m.version) for m in ml_client.models.list(name=registered_model_name)]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(latest_model_version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that you have a registered model, you can create an endpoint and deployment. The next section will briefly cover some key details about these topics."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Endpoints and deployments\n",
|
||||
"\n",
|
||||
"After you train a machine learning model, you need to deploy it so that others can use it for inferencing. For this purpose, Azure Machine Learning allows you to create **endpoints** and add **deployments** to them.\n",
|
||||
"\n",
|
||||
"An **endpoint**, in this context, is an HTTPS path that provides an interface for clients to send requests (input data) to a trained model and receive the inferencing (scoring) results back from the model. An endpoint provides:\n",
|
||||
"\n",
|
||||
"- Authentication using \"key or token\" based auth \n",
|
||||
"- [TLS(SSL)](https://simple.wikipedia.org/wiki/Transport_Layer_Security) termination\n",
|
||||
"- A stable scoring URI (endpoint-name.region.inference.ml.azure.com)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"A **deployment** is a set of resources required for hosting the model that does the actual inferencing. \n",
|
||||
"\n",
|
||||
"A single endpoint can contain multiple deployments. Endpoints and deployments are independent Azure Resource Manager resources that appear in the Azure portal.\n",
|
||||
"\n",
|
||||
"Azure Machine Learning allows you to implement [online endpoints](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints#what-are-online-endpoints) for real-time inferencing on client data, and [batch endpoints](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints#what-are-batch-endpoints) for inferencing on large volumes of data over a period of time. \n",
|
||||
"\n",
|
||||
"In this tutorial, we'll walk you through the steps of implementing a _managed online endpoint_. Managed online endpoints work with powerful CPU and GPU machines in Azure in a scalable, fully managed way that frees you from the overhead of setting up and managing the underlying deployment infrastructure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Create an online endpoint\n",
|
||||
"\n",
|
||||
"Now that you have a registered model, it's time to create your online endpoint. The endpoint name needs to be unique in the entire Azure region. For this tutorial, you'll create a unique name using a universally unique identifier [`UUID`](https://en.wikipedia.org/wiki/Universally_unique_identifier). For more information on the endpoint naming rules, see [managed online endpoint limits](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-quotas#azure-machine-learning-managed-online-endpoints)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"# Create a unique name for the endpoint\n",
|
||||
"online_endpoint_name = \"credit-endpoint-\" + str(uuid.uuid4())[:8]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, we'll define the endpoint, using the `ManagedOnlineEndpoint` class.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> [!TIP]\n",
|
||||
"> * `auth_mode` : Use `key` for key-based authentication. Use `aml_token` for Azure Machine Learning token-based authentication. A `key` doesn't expire, but `aml_token` does expire. For more information on authenticating, see [Authenticate to an online endpoint](https://learn.microsoft.com/azure/machine-learning/how-to-authenticate-online-endpoint).\n",
|
||||
"> * Optionally, you can add a description and tags to your endpoint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import ManagedOnlineEndpoint\n",
|
||||
"\n",
|
||||
"# define an online endpoint\n",
|
||||
"endpoint = ManagedOnlineEndpoint(\n",
|
||||
" name=online_endpoint_name,\n",
|
||||
" description=\"this is an online endpoint\",\n",
|
||||
" auth_mode=\"key\",\n",
|
||||
" tags={\n",
|
||||
" \"training_dataset\": \"credit_defaults\",\n",
|
||||
" },\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the `MLClient` created earlier, we'll now create the endpoint in the workspace. This command will start the endpoint creation and return a confirmation response while the endpoint creation continues.\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
"> Expect the endpoint creation to take approximately 2 minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create the online endpoint\n",
|
||||
"# expect the endpoint to take approximately 2 minutes.\n",
|
||||
"\n",
|
||||
"endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Once you've created the endpoint, you can retrieve it as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" f'Endpoint \"{endpoint.name}\" with provisioning state \"{endpoint.provisioning_state}\" is retrieved'\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Understanding online deployments\n",
|
||||
"\n",
|
||||
"The key aspects of a deployment include:\n",
|
||||
"\n",
|
||||
"- `name` - Name of the deployment.\n",
|
||||
"- `endpoint_name` - Name of the endpoint that will contain the deployment.\n",
|
||||
"- `model` - The model to use for the deployment. This value can be either a reference to an existing versioned model in the workspace or an inline model specification.\n",
|
||||
"- `environment` - The environment to use for the deployment (or to run the model). This value can be either a reference to an existing versioned environment in the workspace or an inline environment specification. The environment can be a Docker image with Conda dependencies or a Dockerfile.\n",
|
||||
"- `code_configuration` - the configuration for the source code and scoring script.\n",
|
||||
" - `path`- Path to the source code directory for scoring the model.\n",
|
||||
" - `scoring_script` - Relative path to the scoring file in the source code directory. This script executes the model on a given input request. For an example of a scoring script, see [Understand the scoring script](https://learn.microsoft.com/azure/machine-learning/how-to-deploy-online-endpoints#understand-the-scoring-script) in the \"Deploy an ML model with an online endpoint\" article.\n",
|
||||
"- `instance_type` - The VM size to use for the deployment. For the list of supported sizes, see [Managed online endpoints SKU list](https://learn.microsoft.com/azure/machine-learning/reference-managed-online-endpoints-vm-sku-list).\n",
|
||||
"- `instance_count` - The number of instances to use for the deployment.\n",
|
||||
" \n",
|
||||
"### Deployment using an MLflow model\n",
|
||||
"\n",
|
||||
"Azure Machine Learning supports no-code deployment of a model created and logged with MLflow. This means that you don't have to provide a scoring script or an environment during model deployment, as the scoring script and environment are automatically generated when training an MLflow model. If you were using a custom model, though, you'd have to specify the environment and scoring script during deployment.\n",
|
||||
"\n",
|
||||
"> [!IMPORTANT]\n",
|
||||
"> If you typically deploy models using scoring scripts and custom environments and want to achieve the same functionality using MLflow models, we recommend reading [Using MLflow models for no-code deployment](https://learn.microsoft.com/azure/machine-learning/how-to-deploy-mlflow-models)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Deploy the model to the endpoint\n",
|
||||
"\n",
|
||||
"You'll begin by creating a single deployment that handles 100% of the incoming traffic. We've chosen an arbitrary color name (*blue*) for the deployment. To create the deployment for our endpoint, we'll use the `ManagedOnlineDeployment` class.\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
"> No need to specify an environment or scoring script as the model to deploy is an MLflow model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import ManagedOnlineDeployment\n",
|
||||
"\n",
|
||||
"# Choose the latest version of our registered model for deployment\n",
|
||||
"model = ml_client.models.get(name=registered_model_name, version=latest_model_version)\n",
|
||||
"\n",
|
||||
"# define an online deployment\n",
|
||||
"blue_deployment = ManagedOnlineDeployment(\n",
|
||||
" name=\"blue\",\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" model=model,\n",
|
||||
" instance_type=\"Standard_DS3_v2\",\n",
|
||||
" instance_count=1,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the `MLClient` created earlier, we'll now create the deployment in the workspace. This command will start the deployment creation and return a confirmation response while the deployment creation continues."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create the online deployment\n",
|
||||
"blue_deployment = ml_client.online_deployments.begin_create_or_update(\n",
|
||||
" blue_deployment\n",
|
||||
").result()\n",
|
||||
"\n",
|
||||
"# blue deployment takes 100% traffic\n",
|
||||
"# expect the deployment to take approximately 8 to 10 minutes.\n",
|
||||
"endpoint.traffic = {\"blue\": 100}\n",
|
||||
"ml_client.online_endpoints.begin_create_or_update(endpoint).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Check the status of the endpoint\n",
|
||||
"You can check the status of the endpoint to see whether the model was deployed without error:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# return an object that contains metadata for the endpoint\n",
|
||||
"endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)\n",
|
||||
"\n",
|
||||
"# print a selection of the endpoint's metadata\n",
|
||||
"print(\n",
|
||||
" f\"Name: {endpoint.name}\\nStatus: {endpoint.provisioning_state}\\nDescription: {endpoint.description}\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# existing traffic details\n",
|
||||
"print(endpoint.traffic)\n",
|
||||
"\n",
|
||||
"# Get the scoring URI\n",
|
||||
"print(endpoint.scoring_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test the endpoint with sample data\n",
|
||||
"\n",
|
||||
"Now that the model is deployed to the endpoint, you can run inference with it. Let's create a sample request file following the design expected in the run method in the scoring script."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Create a directory to store the sample request file.\n",
|
||||
"deploy_dir = \"./deploy\"\n",
|
||||
"os.makedirs(deploy_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, create the file in the deploy directory. The cell below uses IPython magic to write the file into the directory you just created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile {deploy_dir}/sample-request.json\n",
|
||||
"{\n",
|
||||
" \"input_data\": {\n",
|
||||
" \"columns\": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\n",
|
||||
" \"index\": [0, 1],\n",
|
||||
" \"data\": [\n",
|
||||
" [20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8]\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the `MLClient` created earlier, we'll get a handle to the endpoint. The endpoint can be invoked using the `invoke` command with the following parameters:\n",
|
||||
"\n",
|
||||
"* `endpoint_name` - Name of the endpoint\n",
|
||||
"* `request_file` - File with request data\n",
|
||||
"* `deployment_name` - Name of the specific deployment to test in an endpoint\n",
|
||||
"\n",
|
||||
"We'll test the blue deployment with the sample data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test the blue deployment with the sample data\n",
|
||||
"ml_client.online_endpoints.invoke(\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" deployment_name=\"blue\",\n",
|
||||
" request_file=\"./deploy/sample-request.json\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get logs of the deployment\n",
|
||||
"Check the logs to see whether the endpoint/deployment were invoked successfully\n",
|
||||
"If you face errors, see [Troubleshooting online endpoints deployment](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-troubleshoot-online-endpoints?tabs=cli)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"logs = ml_client.online_deployments.get_logs(\n",
|
||||
" name=\"blue\", endpoint_name=online_endpoint_name, lines=50\n",
|
||||
")\n",
|
||||
"print(logs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a second deployment \n",
|
||||
"Deploy the model as a second deployment called `green`. In practice, you can create several deployments and compare their performance. These deployments could use a different version of the same model, a completely different model, or a more powerful compute instance. In our example, you'll deploy the same model version using a more powerful compute instance that could potentially improve performance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# picking the model to deploy. Here we use the latest version of our registered model\n",
|
||||
"model = ml_client.models.get(name=registered_model_name, version=latest_model_version)\n",
|
||||
"\n",
|
||||
"# define an online deployment using a more powerful instance type\n",
|
||||
"green_deployment = ManagedOnlineDeployment(\n",
|
||||
" name=\"green\",\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" model=model,\n",
|
||||
" instance_type=\"Standard_F4s_v2\",\n",
|
||||
" instance_count=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create the online deployment\n",
|
||||
"# expect the deployment to take approximately 8 to 10 minutes\n",
|
||||
"green_deployment = ml_client.online_deployments.begin_create_or_update(\n",
|
||||
" green_deployment\n",
|
||||
").result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scale deployment to handle more traffic\n",
|
||||
"\n",
|
||||
"Using the `MLClient` created earlier, we'll get a handle to the `green` deployment. The deployment can be scaled by increasing or decreasing the `instance_count`.\n",
|
||||
"\n",
|
||||
"In the following code, you'll increase the VM instance manually. However, note that it is also possible to autoscale online endpoints. Autoscale automatically runs the right amount of resources to handle the load on your application. Managed online endpoints support autoscaling through integration with the Azure monitor autoscale feature. To configure autoscaling, see [autoscale online endpoints](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-autoscale-endpoints?tabs=python)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# update definition of the deployment\n",
|
||||
"green_deployment.instance_count = 2\n",
|
||||
"\n",
|
||||
"# update the deployment\n",
|
||||
"# expect the deployment to take approximately 8 to 10 minutes\n",
|
||||
"ml_client.online_deployments.begin_create_or_update(green_deployment).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Update traffic allocation for deployments\n",
|
||||
"You can split production traffic between deployments. You may first want to test the `green` deployment with sample data, just like you did for the `blue` deployment. Once you've tested your green deployment, allocate a small percentage of traffic to it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint.traffic = {\"blue\": 80, \"green\": 20}\n",
|
||||
"ml_client.online_endpoints.begin_create_or_update(endpoint).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can test traffic allocation by invoking the endpoint several times:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can invoke the endpoint several times\n",
|
||||
"for i in range(30):\n",
|
||||
" ml_client.online_endpoints.invoke(\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" request_file=\"./deploy/sample-request.json\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Show logs from the `green` deployment to check that there were incoming requests and the model was scored successfully. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"logs = ml_client.online_deployments.get_logs(\n",
|
||||
" name=\"green\", endpoint_name=online_endpoint_name, lines=50\n",
|
||||
")\n",
|
||||
"print(logs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View metrics using Azure Monitor\n",
|
||||
"You can view various metrics (request numbers, request latency, network bytes, CPU/GPU/Disk/Memory utilization, and more) for an online endpoint and its deployments by following links from the endpoint's **Details** page in the studio. Following these links will take you to the exact metrics page in the Azure portal for the endpoint or deployment.\n",
|
||||
"\n",
|
||||
"![metrics page 1](./media/deployment-metrics-from-endpoint-details-page.png)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If you open the metrics for the online endpoint, you can set up the page to see metrics such as the average request latency as shown in the following figure.\n",
|
||||
"\n",
|
||||
"![metrics page 2](./media/view-endpoint-metrics-in-azure-portal.png)\n",
|
||||
"\n",
|
||||
"For more information on how to view online endpoint metrics, see [Monitor online endpoints](https://learn.microsoft.com/en-us/azure/machine-learning/how-to-monitor-online-endpoints#metrics)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Send all traffic to the new deployment\n",
|
||||
"Once you're fully satisfied with your `green` deployment, switch all traffic to it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint.traffic = {\"blue\": 0, \"green\": 100}\n",
|
||||
"ml_client.begin_create_or_update(endpoint).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Delete the old deployment\n",
|
||||
"Remove the old (blue) deployment:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_client.online_deployments.begin_delete(\n",
|
||||
" name=\"blue\", endpoint_name=online_endpoint_name\n",
|
||||
").result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources\n",
|
||||
"\n",
|
||||
"If you aren't going use the endpoint and deployment after completing this tutorial, you should delete them.\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
"> Expect the complete deletion to take approximately 20 minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_client.online_endpoints.begin_delete(name=online_endpoint_name).result()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernel_info": {
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
artifact_path: credit_defaults_model
|
||||
flavors:
|
||||
python_function:
|
||||
env: conda.yaml
|
||||
loader_module: mlflow.sklearn
|
||||
model_path: model.pkl
|
||||
python_version: 3.8.15
|
||||
sklearn:
|
||||
code: null
|
||||
pickled_model: model.pkl
|
||||
serialization_format: cloudpickle
|
||||
sklearn_version: 0.24.2
|
||||
mlflow_version: 1.26.1
|
||||
model_uuid: 3d4b3022e6ca4afeaa4a6b1616c46486
|
||||
run_id: coral_bell_shm9lpzkyl
|
||||
utc_time_created: '2022-12-15 05:52:49.308371'
|
|
@ -0,0 +1,11 @@
|
|||
channels:
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.8.15
|
||||
- pip<=21.2.4
|
||||
- pip:
|
||||
- mlflow
|
||||
- cloudpickle==2.2.0
|
||||
- psutil==5.8.0
|
||||
- scikit-learn==0.24.2
|
||||
name: mlflow-env
|
|
@ -0,0 +1,7 @@
|
|||
python: 3.8.15
|
||||
build_dependencies:
|
||||
- pip==21.2.4
|
||||
- setuptools==59.8.0
|
||||
- wheel==0.38.4
|
||||
dependencies:
|
||||
- -r requirements.txt
|
|
@ -0,0 +1,4 @@
|
|||
mlflow
|
||||
cloudpickle==2.2.0
|
||||
psutil==5.8.0
|
||||
scikit-learn==0.24.2
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"input_data": {
|
||||
"columns": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],
|
||||
"index": [0, 1],
|
||||
"data": [
|
||||
[20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0],
|
||||
[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8]
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,561 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Tutorial: Upload, access and explore your data in Azure Machine Learning\n",
|
||||
"\n",
|
||||
"In this tutorial you learn how to:\n",
|
||||
"\n",
|
||||
"> * Upload your data to cloud storage\n",
|
||||
"> * Create an Azure Machine Learning data asset\n",
|
||||
"> * Access your data in a notebook for interactive development\n",
|
||||
"> * Create new versions of data assets\n",
|
||||
"\n",
|
||||
"The start of a machine learning project typically involves exploratory data analysis (EDA), data-preprocessing (cleaning, feature engineering), and the building of Machine Learning model prototypes to validate hypotheses. This _prototyping_ project phase is highly interactive. It lends itself to development in an IDE or a Jupyter notebook, with a _Python interactive console_. This tutorial describes these ideas.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n",
|
||||
"\n",
|
||||
" ![Create compute](./media/create-compute.png)\n",
|
||||
"\n",
|
||||
"* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n",
|
||||
"\n",
|
||||
"## Set your kernel\n",
|
||||
"\n",
|
||||
"* If your compute instance is stopped, start it now. \n",
|
||||
" \n",
|
||||
" ![Start compute](./media/start-compute.png)\n",
|
||||
"\n",
|
||||
"* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n",
|
||||
"\n",
|
||||
" ![Set the kernel](./media/set-kernel.png)\n",
|
||||
"\n",
|
||||
"### Download the data used in this tutorial\n",
|
||||
"\n",
|
||||
"For data ingestion, the Azure Data Explorer handles raw data in [these formats](https://learn.microsoft.com/azure/data-explorer/ingestion-supported-formats). This tutorial uses this [CSV-format credit card client data sample](https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv). We see the steps proceed in an Azure Machine Learning resource. In that resource, we'll create a local folder with the suggested name of **data** directly under the folder where this notebook is located.\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
"> This tutorial depends on data placed in an Azure Machine Learning resource folder location. For this tutorial, 'local' means a folder location in that Azure Machine Learning resource. \n",
|
||||
"\n",
|
||||
"1. Select **Open terminal** below the three dots, as shown in this image:\n",
|
||||
"\n",
|
||||
" ![Open terminal](./media/open-terminal.png)\n",
|
||||
"\n",
|
||||
"1. The terminal window opens in a new tab. \n",
|
||||
"1. Make sure you `cd` to the same folder where this notebook is located. For example, if the notebook is in a folder named **get-started-notebooks**:\n",
|
||||
"\n",
|
||||
" ```\n",
|
||||
" cd get-started-notebooks # modify this to the path where your notebook is located\n",
|
||||
" ```\n",
|
||||
"\n",
|
||||
"1. Enter these commands in the terminal window to copy the data to your compute instance:\n",
|
||||
"\n",
|
||||
" ```\n",
|
||||
" mkdir data\n",
|
||||
" cd data # the sub-folder where you'll store the data\n",
|
||||
" wget https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\n",
|
||||
" ```\n",
|
||||
"1. You can now close the terminal window.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"[Learn more about this data on the UCI Machine Learning Repository.](https://archive.ics.uci.edu/ml/datasets/default+of+credit+card+clients)\n",
|
||||
"\n",
|
||||
"## Create handle to workspace\n",
|
||||
"\n",
|
||||
"Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n",
|
||||
"\n",
|
||||
"In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n",
|
||||
"\n",
|
||||
"1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n",
|
||||
"1. Copy the value for workspace, resource group and subscription ID into the code.\n",
|
||||
"1. You'll need to copy one value, close the area and paste, then come back for the next one."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675966726847
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import MLClient\n",
|
||||
"from azure.identity import DefaultAzureCredential\n",
|
||||
"from azure.ai.ml.entities import Data\n",
|
||||
"from azure.ai.ml.constants import AssetTypes\n",
|
||||
"\n",
|
||||
"# authenticate\n",
|
||||
"credential = DefaultAzureCredential()\n",
|
||||
"\n",
|
||||
"# Get a handle to the workspace\n",
|
||||
"ml_client = MLClient(\n",
|
||||
" credential=credential,\n",
|
||||
" subscription_id=\"<SUBSCRIPTION_ID>\",\n",
|
||||
" resource_group_name=\"<RESOURCE_GROUP>\",\n",
|
||||
" workspace_name=\"<AML_WORKSPACE_NAME>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (in the notebook below, that will happen during compute creation)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"\n",
|
||||
"## Upload data to cloud storage\n",
|
||||
"\n",
|
||||
"Azure Machine Learning uses Uniform Resource Identifiers (URIs), which point to storage locations in the cloud. A URI makes it easy to access data in notebooks and jobs. Data URI formats look similar to the web URLs that you use in your web browser to access web pages. For example:\n",
|
||||
"\n",
|
||||
"* Access data from public https server: `https://<account_name>.blob.core.windows.net/<container_name>/<folder>/<file>`\n",
|
||||
"* Access data from Azure Data Lake Gen 2: `abfss://<file_system>@<account_name>.dfs.core.windows.net/<folder>/<file>`\n",
|
||||
"\n",
|
||||
"An Azure Machine Learning data asset is similar to web browser bookmarks (favorites). Instead of remembering long storage paths (URIs) that point to your most frequently used data, you can create a data asset, and then access that asset with a friendly name.\n",
|
||||
"\n",
|
||||
"Data asset creation also creates a *reference* to the data source location, along with a copy of its metadata. Because the data remains in its existing location, you incur no extra storage cost, and don't risk data source integrity. You can create Data assets from Azure Machine Learning datastores, Azure Storage, public URLs, and local files.\n",
|
||||
"\n",
|
||||
"> [!TIP]\n",
|
||||
"> For smaller-size data uploads, Azure Machine Learning data asset creation works well for data uploads from local machine resources to cloud storage. This approach avoids the need for extra tools or utilities. However, a larger-size data upload might require a dedicated tool or utility - for example, **azcopy**. The azcopy command-line tool moves data to and from Azure Storage. Learn more about [azcopy](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10).\n",
|
||||
"\n",
|
||||
"The next notebook cell creates the data asset. The code sample uploads the raw data file to the designated cloud storage resource. \n",
|
||||
"\n",
|
||||
"Each time you create a data asset, you need a unique version for it. If the version already exists, you'll get an error. In this code, we're using time to generate a unique version, which will mostly work. But if you happen to run this same cell on a different day at the exact same time, you'll get an error. If this occurs, chances are good that it will be successful if you re-run the cell.\n",
|
||||
"\n",
|
||||
"You can also omit the **version** parameter, and a version number is generated for you, starting with 1 and then incrementing from there. In this tutorial, we want to refer to specific version numbers, so we create a version number instead."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675461156382
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import Data\n",
|
||||
"from azure.ai.ml.constants import AssetTypes\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# update the 'my_path' variable to match the location of where you downloaded the data on your\n",
|
||||
"# local filesystem\n",
|
||||
"\n",
|
||||
"my_path = \"./data/default_of_credit_card_clients.csv\"\n",
|
||||
"v1 = str(time.strftime(\"%H.%M.%S\", time.gmtime()))\n",
|
||||
"\n",
|
||||
"my_data = Data(\n",
|
||||
" name=\"credit-card\",\n",
|
||||
" version=v1,\n",
|
||||
" description=\"Credit card data\",\n",
|
||||
" path=my_path,\n",
|
||||
" type=AssetTypes.URI_FILE,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create data asset\n",
|
||||
"ml_client.data.create_or_update(my_data)\n",
|
||||
"\n",
|
||||
"print(f\"Data asset created. Name: {my_data.name}, version: {my_data.version}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"You can see the uploaded data by selecting **Data** on the left. You'll see the data is uploaded and a data asset is created:\n",
|
||||
"\n",
|
||||
"![Image of data section of studio shows uploaded data](./media/access-and-explore-data.png)\n",
|
||||
"\n",
|
||||
"This data is named **credit-card**, and in the **Data assets** tab, we can see it in the **Name** column. This data uploaded to your workspace's default datastore named **workspaceblobstore**, seen in the **Data source** column. \n",
|
||||
"\n",
|
||||
"An Azure Machine Learning datastore is a *reference* to an *existing* storage account on Azure. A datastore offers these benefits:\n",
|
||||
"\n",
|
||||
"1. A common and easy-to-use API, to interact with different storage types (Blob/Files/Azure Data Lake Storage) and authentication methods.\n",
|
||||
"1. An easier way to discover useful datastores, when working as a team.\n",
|
||||
"1. In your scripts, a way to hide connection information for credential-based data access (service principal/SAS/key).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Access your data in a notebook\n",
|
||||
"\n",
|
||||
"Pandas directly support URIs - this example shows how to read a CSV file from an Azure Machine Learning Datastore:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(\"azureml://subscriptions/<subid>/resourcegroups/<rgname>/workspaces/<workspace_name>/datastores/<datastore_name>/paths/<folder>/<filename>.csv\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"However, as mentioned previously, it can become hard to remember these URIs. Additionally, you must manually substitute all **<_substring_>** values in the **pd.read_csv** command with the real values for your resources. \n",
|
||||
"\n",
|
||||
"You'll want to create data assets for frequently accessed data. Here's an easier way to access the CSV file in Pandas:\n",
|
||||
"\n",
|
||||
"> [!IMPORTANT]\n",
|
||||
"> In a notebook cell, execute this code to install the `azureml-fsspec` Python library in your Jupyter kernel:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U azureml-fsspec"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675445030495
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"# get a handle of the data asset and print the URI\n",
|
||||
"data_asset = ml_client.data.get(name=\"credit-card\", version=v1)\n",
|
||||
"print(f\"Data asset URI: {data_asset.path}\")\n",
|
||||
"\n",
|
||||
"# read into pandas - note that you will see 2 headers in your data frame - that is ok, for now\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(data_asset.path)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Read [Access data from Azure cloud storage during interactive development](how-to-access-data-interactive.md) to learn more about data access in a notebook.\n",
|
||||
"\n",
|
||||
"## Create a new version of the data asset\n",
|
||||
"\n",
|
||||
"You might have noticed that the data needs a little light cleaning, to make it fit to train a machine learning model. It has:\n",
|
||||
"\n",
|
||||
"* two headers\n",
|
||||
"* a client ID column; we wouldn't use this feature in Machine Learning\n",
|
||||
"* spaces in the response variable name\n",
|
||||
"\n",
|
||||
"Also, compared to the CSV format, the Parquet file format becomes a better way to store this data. Parquet offers compression, and it maintains schema. Therefore, to clean the data and store it in Parquet, use:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675445038545
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# read in data again, this time using the 2nd row as the header\n",
|
||||
"df = pd.read_csv(data_asset.path, header=1)\n",
|
||||
"# rename column\n",
|
||||
"df.rename(columns={\"default payment next month\": \"default\"}, inplace=True)\n",
|
||||
"# remove ID column\n",
|
||||
"df.drop(\"ID\", axis=1, inplace=True)\n",
|
||||
"\n",
|
||||
"# write file to filesystem\n",
|
||||
"df.to_parquet(\"./data/cleaned-credit-card.parquet\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"This table shows the structure of the data in the original **default_of_credit_card_clients.csv** file .CSV file downloaded in an earlier step. The uploaded data contains 23 explanatory variables and 1 response variable, as shown here:\n",
|
||||
"\n",
|
||||
"|Column Name(s) | Variable Type |Description |\n",
|
||||
"|---------|---------|---------|\n",
|
||||
"|X1 | Explanatory | Amount of the given credit (NT dollar): it includes both the individual consumer credit and their family (supplementary) credit. |\n",
|
||||
"|X2 | Explanatory | Gender (1 = male; 2 = female). |\n",
|
||||
"|X3 | Explanatory | Education (1 = graduate school; 2 = university; 3 = high school; 4 = others). |\n",
|
||||
"|X4 | Explanatory | Marital status (1 = married; 2 = single; 3 = others). |\n",
|
||||
"|X5 | Explanatory | Age (years). |\n",
|
||||
"|X6-X11 | Explanatory | History of past payment. We tracked the past monthly payment records (from April to September 2005). -1 = pay duly; 1 = payment delay for one month; 2 = payment delay for two months; . . .; 8 = payment delay for eight months; 9 = payment delay for nine months and above. |\n",
|
||||
"|X12-17 | Explanatory | Amount of bill statement (NT dollar) from April to September 2005. |\n",
|
||||
"|X18-23 | Explanatory | Amount of previous payment (NT dollar) from April to September 2005. |\n",
|
||||
"|Y | Response | Default payment (Yes = 1, No = 0) |\n",
|
||||
"\n",
|
||||
"Next, create a new _version_ of the data asset (the data automatically uploads to cloud storage):\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
">\n",
|
||||
"> This Python code cell sets **name** and **version** values for the data asset it creates. As a result, the code in this cell will fail if executed more than once, without a change to these values. Fixed **name** and **version** values offer a way to pass values that work for specific situations, without concern for auto-generated or randomly-generated values.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675382989789
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import Data\n",
|
||||
"from azure.ai.ml.constants import AssetTypes\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# Next, create a new *version* of the data asset (the data is automatically uploaded to cloud storage):\n",
|
||||
"v2 = v1 + \"_cleaned\"\n",
|
||||
"my_path = \"./data/cleaned-credit-card.parquet\"\n",
|
||||
"\n",
|
||||
"# Define the data asset, and use tags to make it clear the asset can be used in training\n",
|
||||
"\n",
|
||||
"my_data = Data(\n",
|
||||
" name=\"credit-card\",\n",
|
||||
" version=v2,\n",
|
||||
" description=\"Default of credit card clients data.\",\n",
|
||||
" tags={\"training_data\": \"true\", \"format\": \"parquet\"},\n",
|
||||
" path=my_path,\n",
|
||||
" type=AssetTypes.URI_FILE,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"## create the data asset\n",
|
||||
"\n",
|
||||
"my_data = ml_client.data.create_or_update(my_data)\n",
|
||||
"\n",
|
||||
"print(f\"Data asset created. Name: {my_data.name}, version: {my_data.version}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"The cleaned parquet file is the latest version data source. This code shows the CSV version result set first, then the Parquet version:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1675383001940
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"# get a handle of the data asset and print the URI\n",
|
||||
"data_asset_v1 = ml_client.data.get(name=\"credit-card\", version=v1)\n",
|
||||
"data_asset_v2 = ml_client.data.get(name=\"credit-card\", version=v2)\n",
|
||||
"\n",
|
||||
"# print the v1 data\n",
|
||||
"print(f\"V1 Data asset URI: {data_asset_v1.path}\")\n",
|
||||
"v1df = pd.read_csv(data_asset_v1.path)\n",
|
||||
"print(v1df.head(5))\n",
|
||||
"\n",
|
||||
"# print the v2 data\n",
|
||||
"print(\n",
|
||||
" \"_____________________________________________________________________________________________________________\\n\"\n",
|
||||
")\n",
|
||||
"print(f\"V2 Data asset URI: {data_asset_v2.path}\")\n",
|
||||
"v2df = pd.read_parquet(data_asset_v2.path)\n",
|
||||
"print(v2df.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Read [Create data assets](https://learn.microsoft.com/azure/machine-learning/how-to-create-data-assets) for more information about data assets.\n",
|
||||
"\n",
|
||||
"Read [Create datastores](https://learn.microsoft.com/azure/machine-learning/how-to-datastore) to learn more about datastores.\n",
|
||||
"\n",
|
||||
"Continue with tutorials to learn how to develop a training script.\n",
|
||||
"\n",
|
||||
"> [Model development on a cloud workstation](https://learn.microsoft.com/azure/machine-learning/tutorial-cloud-workstation)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernel_info": {
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"microsoft": {
|
||||
"host": {
|
||||
"AzureML": {
|
||||
"notebookHasBeenCompleted": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
После Ширина: | Высота: | Размер: 117 KiB |
После Ширина: | Высота: | Размер: 24 KiB |
Двоичные данные
tutorials/get-started-notebooks/media/deployment-metrics-from-endpoint-details-page.png
Normal file
После Ширина: | Высота: | Размер: 140 KiB |
После Ширина: | Высота: | Размер: 153 KiB |
После Ширина: | Высота: | Размер: 136 KiB |
После Ширина: | Высота: | Размер: 17 KiB |
После Ширина: | Высота: | Размер: 23 KiB |
После Ширина: | Высота: | Размер: 9.2 KiB |
После Ширина: | Высота: | Размер: 60 KiB |
После Ширина: | Высота: | Размер: 22 KiB |
После Ширина: | Высота: | Размер: 24 KiB |
После Ширина: | Высота: | Размер: 152 KiB |
Двоичные данные
tutorials/get-started-notebooks/media/view-endpoint-metrics-in-azure-portal.png
Normal file
После Ширина: | Высота: | Размер: 118 KiB |
После Ширина: | Высота: | Размер: 7.7 MiB |
|
@ -0,0 +1,739 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# First look at Azure Machine Learning\n",
|
||||
"\n",
|
||||
"This tutorial is an introduction to some of the most used features of the Azure Machine Learning service. In it, you will create, register and deploy a model. This tutorial will help you become familiar with the core concepts of Azure Machine Learning and their most common usage. \n",
|
||||
"\n",
|
||||
"You'll learn how to run a training job on a scalable compute resource, then deploy it, and finally test the deployment.\n",
|
||||
"\n",
|
||||
"You'll create a training script to handle the data preparation, train and register a model. Once you train the model, you'll *deploy* it as an *endpoint*, then call the endpoint for *inferencing*.\n",
|
||||
"\n",
|
||||
"The steps you'll take are:\n",
|
||||
"\n",
|
||||
"> * Set up a handle to your Azure Machine Learning workspace\n",
|
||||
"> * Create your training script\n",
|
||||
"> * Create a scalable compute resource, a compute cluster \n",
|
||||
"> * Create and run a command job that will run the training script on the compute cluster, configured with the appropriate job environment\n",
|
||||
"> * View the output of your training script\n",
|
||||
"> * Deploy the newly-trained model as an endpoint\n",
|
||||
"> * Call the Azure Machine Learning endpoint for inferencing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n",
|
||||
"\n",
|
||||
" ![Create compute](./media/create-compute.png)\n",
|
||||
"\n",
|
||||
"* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n",
|
||||
"\n",
|
||||
"## Set your kernel\n",
|
||||
"\n",
|
||||
"* If your compute instance is stopped, start it now. \n",
|
||||
" \n",
|
||||
" ![Start compute](./media/start-compute.png)\n",
|
||||
"\n",
|
||||
"* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n",
|
||||
"\n",
|
||||
" ![Set the kernel](./media/set-kernel.png)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create handle to workspace\n",
|
||||
"\n",
|
||||
"Before we dive in the code, you need a way to reference your workspace. The workspace is the top-level resource for Azure Machine Learning, providing a centralized place to work with all the artifacts you create when you use Azure Machine Learning.\n",
|
||||
"\n",
|
||||
"You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n",
|
||||
"\n",
|
||||
"1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n",
|
||||
"1. Copy the value for workspace, resource group and subscription ID into the code. \n",
|
||||
"1. You'll need to copy one value, close the area and paste, then come back for the next one.\n",
|
||||
"\n",
|
||||
"![image of workspace credentials](./media/find-credentials.png)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003731988
|
||||
},
|
||||
"name": "ml_client"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import MLClient\n",
|
||||
"from azure.identity import DefaultAzureCredential\n",
|
||||
"\n",
|
||||
"# authenticate\n",
|
||||
"credential = DefaultAzureCredential()\n",
|
||||
"\n",
|
||||
"# Get a handle to the workspace\n",
|
||||
"ml_client = MLClient(\n",
|
||||
" credential=credential,\n",
|
||||
" subscription_id=\"<SUBSCRIPTION_ID>\",\n",
|
||||
" resource_group_name=\"<RESOURCE_GROUP>\",\n",
|
||||
" workspace_name=\"<AML_WORKSPACE_NAME>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (in the notebook below, that will happen during compute creation)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create training script\n",
|
||||
"\n",
|
||||
"Let's start by creating the training script - the *main.py* Python file.\n",
|
||||
"\n",
|
||||
"First create a source folder for the script:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003739848
|
||||
},
|
||||
"name": "train_src_dir"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"train_src_dir = \"./src\"\n",
|
||||
"os.makedirs(train_src_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This script handles the preprocessing of the data, splitting it into test and train data. It then consumes this data to train a tree based model and return the output model. \n",
|
||||
"\n",
|
||||
"[MLFlow](https://learn.microsoft.com/azure/machine-learning/how-to-log-mlflow-models) will be used to log the parameters and metrics during our pipeline run. \n",
|
||||
"\n",
|
||||
"The cell below uses IPython magic to write the training script into the directory you just created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "write_main"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile {train_src_dir}/main.py\n",
|
||||
"import os\n",
|
||||
"import argparse\n",
|
||||
"import pandas as pd\n",
|
||||
"import mlflow\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||||
"from sklearn.metrics import classification_report\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
" \"\"\"Main function of the script.\"\"\"\n",
|
||||
"\n",
|
||||
" # input and output arguments\n",
|
||||
" parser = argparse.ArgumentParser()\n",
|
||||
" parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n",
|
||||
" parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n",
|
||||
" parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n",
|
||||
" parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n",
|
||||
" parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n",
|
||||
" args = parser.parse_args()\n",
|
||||
" \n",
|
||||
" # Start Logging\n",
|
||||
" mlflow.start_run()\n",
|
||||
"\n",
|
||||
" # enable autologging\n",
|
||||
" mlflow.sklearn.autolog()\n",
|
||||
"\n",
|
||||
" ###################\n",
|
||||
" #<prepare the data>\n",
|
||||
" ###################\n",
|
||||
" print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n",
|
||||
"\n",
|
||||
" print(\"input data:\", args.data)\n",
|
||||
" \n",
|
||||
" credit_df = pd.read_csv(args.data, header=1, index_col=0)\n",
|
||||
"\n",
|
||||
" mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n",
|
||||
" mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n",
|
||||
"\n",
|
||||
" train_df, test_df = train_test_split(\n",
|
||||
" credit_df,\n",
|
||||
" test_size=args.test_train_ratio,\n",
|
||||
" )\n",
|
||||
" ####################\n",
|
||||
" #</prepare the data>\n",
|
||||
" ####################\n",
|
||||
"\n",
|
||||
" ##################\n",
|
||||
" #<train the model>\n",
|
||||
" ##################\n",
|
||||
" # Extracting the label column\n",
|
||||
" y_train = train_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
" # convert the dataframe values to array\n",
|
||||
" X_train = train_df.values\n",
|
||||
"\n",
|
||||
" # Extracting the label column\n",
|
||||
" y_test = test_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
" # convert the dataframe values to array\n",
|
||||
" X_test = test_df.values\n",
|
||||
"\n",
|
||||
" print(f\"Training with data of shape {X_train.shape}\")\n",
|
||||
"\n",
|
||||
" clf = GradientBoostingClassifier(\n",
|
||||
" n_estimators=args.n_estimators, learning_rate=args.learning_rate\n",
|
||||
" )\n",
|
||||
" clf.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
" y_pred = clf.predict(X_test)\n",
|
||||
"\n",
|
||||
" print(classification_report(y_test, y_pred))\n",
|
||||
" ###################\n",
|
||||
" #</train the model>\n",
|
||||
" ###################\n",
|
||||
"\n",
|
||||
" ##########################\n",
|
||||
" #<save and register model>\n",
|
||||
" ##########################\n",
|
||||
" # Registering the model to the workspace\n",
|
||||
" print(\"Registering the model via MLFlow\")\n",
|
||||
" mlflow.sklearn.log_model(\n",
|
||||
" sk_model=clf,\n",
|
||||
" registered_model_name=args.registered_model_name,\n",
|
||||
" artifact_path=args.registered_model_name,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Saving the model to a file\n",
|
||||
" mlflow.sklearn.save_model(\n",
|
||||
" sk_model=clf,\n",
|
||||
" path=os.path.join(args.registered_model_name, \"trained_model\"),\n",
|
||||
" )\n",
|
||||
" ###########################\n",
|
||||
" #</save and register model>\n",
|
||||
" ###########################\n",
|
||||
" \n",
|
||||
" # Stop Logging\n",
|
||||
" mlflow.end_run()\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As you can see in this script, once the model is trained, the model file is saved and registered to the workspace. Now you can use the registered model in inferencing endpoints.\n",
|
||||
"\n",
|
||||
"You might need to select **Refresh** to see the new folder and script in your **Files**.\n",
|
||||
"\n",
|
||||
"![refresh](./media/refresh.png)\n",
|
||||
"\n",
|
||||
"## Create a compute cluster, a scalable way to run a training job\n",
|
||||
"\n",
|
||||
"You already have a compute instance, which you're using to run the notebook. Now you'll add a second type of compute, a **compute cluster** that you'll use to run your training job. While a compute instance is a single node machine, a compute cluster can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark.\n",
|
||||
"\n",
|
||||
"You'll provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n",
|
||||
"\n",
|
||||
"For this example, you only need a basic cluster, so you'll use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import AmlCompute\n",
|
||||
"\n",
|
||||
"# Name assigned to the compute cluster\n",
|
||||
"cpu_compute_target = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # let's see if the compute target already exists\n",
|
||||
" cpu_cluster = ml_client.compute.get(cpu_compute_target)\n",
|
||||
" print(\n",
|
||||
" f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"except Exception:\n",
|
||||
" print(\"Creating a new cpu compute target...\")\n",
|
||||
"\n",
|
||||
" # Let's create the Azure Machine Learning compute object with the intended parameters\n",
|
||||
" cpu_cluster = AmlCompute(\n",
|
||||
" name=cpu_compute_target,\n",
|
||||
" # Azure Machine Learning Compute is the on-demand VM service\n",
|
||||
" type=\"amlcompute\",\n",
|
||||
" # VM Family\n",
|
||||
" size=\"STANDARD_DS3_V2\",\n",
|
||||
" # Minimum running nodes when there is no job running\n",
|
||||
" min_instances=0,\n",
|
||||
" # Nodes in cluster\n",
|
||||
" max_instances=4,\n",
|
||||
" # How many seconds will the node running after the job termination\n",
|
||||
" idle_time_before_scale_down=180,\n",
|
||||
" # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n",
|
||||
" tier=\"Dedicated\",\n",
|
||||
" )\n",
|
||||
" print(\n",
|
||||
" f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n",
|
||||
" )\n",
|
||||
" # Now, we pass the object to MLClient's create_or_update method\n",
|
||||
" cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure the command\n",
|
||||
"\n",
|
||||
"Now that you have a script that can perform the desired tasks, and a compute cluster to run the script, you'll use a general purpose **command** that can run command line actions. This command line action can directly call system commands or run a script. \n",
|
||||
"\n",
|
||||
"Here, you'll create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n",
|
||||
"* Use the compute cluster to run the command.\n",
|
||||
"* Use an *environment* that defines software and runtime libraries needed for the training script. Azure Machine Learning provides many curated or ready-made environments, which are useful for common training and inference scenarios. You'll use one of those environments here. In the [Train a model](train-model.ipynb) tutorial, you'll learn how to create a custom environment. \n",
|
||||
"* Configure some metadata like display name, experiment name etc. An *experiment* is a container for all the iterations you do on a certain project. All the jobs submitted under the same experiment name would be listed next to each other in Azure Machine Learning studio.\n",
|
||||
"* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation.\n",
|
||||
"* In this sample, we access the data from a file on the internet. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003747393
|
||||
},
|
||||
"name": "registered_model_name"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import command\n",
|
||||
"from azure.ai.ml import Input\n",
|
||||
"\n",
|
||||
"registered_model_name = \"credit_defaults_model\"\n",
|
||||
"\n",
|
||||
"job = command(\n",
|
||||
" inputs=dict(\n",
|
||||
" data=Input(\n",
|
||||
" type=\"uri_file\",\n",
|
||||
" path=\"https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\",\n",
|
||||
" ),\n",
|
||||
" test_train_ratio=0.2,\n",
|
||||
" learning_rate=0.25,\n",
|
||||
" registered_model_name=registered_model_name,\n",
|
||||
" ),\n",
|
||||
" code=\"./src/\", # location of source code\n",
|
||||
" command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n",
|
||||
" environment=\"AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest\",\n",
|
||||
" compute=\"cpu-cluster\",\n",
|
||||
" experiment_name=\"train_model_credit_default_prediction\",\n",
|
||||
" display_name=\"credit_default_prediction\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit the job \n",
|
||||
"\n",
|
||||
"It's now time to submit the job to run in Azure Machine Learning. This time you'll use `create_or_update` on `ml_client`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003755505
|
||||
},
|
||||
"name": "create_job"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_client.create_or_update(job)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View job output and wait for job completion\n",
|
||||
"\n",
|
||||
"View the job in Azure Machine Learning studio by selecting the link in the output of the previous cell. \n",
|
||||
"\n",
|
||||
"The output of this job will look like this in the Azure Machine Learning studio. Explore the tabs for various details like metrics, outputs etc. Once completed, the job will register a model in your workspace as a result of training. \n",
|
||||
"\n",
|
||||
"![Screenshot that shows the job overview](./media/view-job.gif \"View the job in studio\")\n",
|
||||
"\n",
|
||||
"> [!IMPORTANT]\n",
|
||||
"> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute cluster has been scaled down to zero nodes and custom environment is still building."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy the model as an online endpoint\n",
|
||||
"\n",
|
||||
"Now deploy your machine learning model as a web service in the Azure cloud, an [`online endpoint`](https://docs.microsoft.com/azure/machine-learning/concept-endpoints).\n",
|
||||
"\n",
|
||||
"To deploy a machine learning service, you'll use the model you registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a new online endpoint\n",
|
||||
"\n",
|
||||
"Now that you have a registered model, it's time to create your online endpoint. The endpoint name needs to be unique in the entire Azure region. For this tutorial, you'll create a unique name using [`UUID`](https://en.wikipedia.org/wiki/Universally_unique_identifier#:~:text=A%20universally%20unique%20identifier%20)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003781233
|
||||
},
|
||||
"name": "online_endpoint_name"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"# Creating a unique name for the endpoint\n",
|
||||
"online_endpoint_name = \"credit-endpoint-\" + str(uuid.uuid4())[:8]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create the endpoint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003878862
|
||||
},
|
||||
"name": "endpoint"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Expect the endpoint creation to take a few minutes\n",
|
||||
"from azure.ai.ml.entities import (\n",
|
||||
" ManagedOnlineEndpoint,\n",
|
||||
" ManagedOnlineDeployment,\n",
|
||||
" Model,\n",
|
||||
" Environment,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create an online endpoint\n",
|
||||
"endpoint = ManagedOnlineEndpoint(\n",
|
||||
" name=online_endpoint_name,\n",
|
||||
" description=\"this is an online endpoint\",\n",
|
||||
" auth_mode=\"key\",\n",
|
||||
" tags={\n",
|
||||
" \"training_dataset\": \"credit_defaults\",\n",
|
||||
" \"model_type\": \"sklearn.GradientBoostingClassifier\",\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"endpoint = ml_client.online_endpoints.begin_create_or_update(endpoint).result()\n",
|
||||
"\n",
|
||||
"print(f\"Endpoint {endpoint.name} provisioning state: {endpoint.provisioning_state}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Expect the endpoint creation to take a few minutes.\n",
|
||||
"\n",
|
||||
"Once the endpoint has been created, you can retrieve it as below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003879481
|
||||
},
|
||||
"name": "retrieve_endpoint"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" f'Endpoint \"{endpoint.name}\" with provisioning state \"{endpoint.provisioning_state}\" is retrieved'\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy the model to the endpoint\n",
|
||||
"\n",
|
||||
"Once the endpoint is created, deploy the model with the entry script. Each endpoint can have multiple deployments. Direct traffic to these deployments can be specified using rules. Here you'll create a single deployment that handles 100% of the incoming traffic. We have chosen a color name for the deployment, for example, *blue*, *green*, *red* deployments, which is arbitrary.\n",
|
||||
"\n",
|
||||
"You can check the **Models** page on Azure Machine Learning studio, to identify the latest version of your registered model. Alternatively, the code below will retrieve the latest version number for you to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679003879136
|
||||
},
|
||||
"name": "latest_model_version"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Let's pick the latest version of the model\n",
|
||||
"latest_model_version = max(\n",
|
||||
" [int(m.version) for m in ml_client.models.list(name=registered_model_name)]\n",
|
||||
")\n",
|
||||
"print(f'Latest model is version \"{latest_model_version}\" ')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Deploy the latest version of the model. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679004373833
|
||||
},
|
||||
"name": "blue_deployment"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# picking the model to deploy. Here we use the latest version of our registered model\n",
|
||||
"model = ml_client.models.get(name=registered_model_name, version=latest_model_version)\n",
|
||||
"\n",
|
||||
"# Expect this deployment to take approximately 6 to 8 minutes.\n",
|
||||
"# create an online deployment.\n",
|
||||
"blue_deployment = ManagedOnlineDeployment(\n",
|
||||
" name=\"blue\",\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" model=model,\n",
|
||||
" instance_type=\"Standard_DS3_v2\",\n",
|
||||
" instance_count=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"blue_deployment = ml_client.begin_create_or_update(blue_deployment).result()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Expect this deployment to take approximately 6 to 8 minutes.\n",
|
||||
"\n",
|
||||
"When the deployment is done, you're ready to test it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test with a sample query\n",
|
||||
"\n",
|
||||
"Once the model is deployed to the endpoint, you can run inference with it.\n",
|
||||
"\n",
|
||||
"Create a sample request file following the design expected in the run method in the score script."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1679004374166
|
||||
},
|
||||
"name": "deploy_dir"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deploy_dir = \"./deploy\"\n",
|
||||
"os.makedirs(deploy_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "write_sample"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile {deploy_dir}/sample-request.json\n",
|
||||
"{\n",
|
||||
" \"input_data\": {\n",
|
||||
" \"columns\": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\n",
|
||||
" \"index\": [0, 1],\n",
|
||||
" \"data\": [\n",
|
||||
" [20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0,689,0,0,0,0],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 9, 8]\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "test"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test the blue deployment with some sample data\n",
|
||||
"ml_client.online_endpoints.invoke(\n",
|
||||
" endpoint_name=online_endpoint_name,\n",
|
||||
" request_file=\"./deploy/sample-request.json\",\n",
|
||||
" deployment_name=\"blue\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources\n",
|
||||
"\n",
|
||||
"If you're not going to use the endpoint, delete it to stop using the resource. Make sure no other deployments are using an endpoint before you delete it.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> [!NOTE]\n",
|
||||
"> Expect the complete deletion to take approximately 20 minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "delete_endpoint"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_client.online_endpoints.begin_delete(name=online_endpoint_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"You now have an Azure Machine Learning workspace, which contains a compute instance to use for your development environment.\n",
|
||||
"\n",
|
||||
"Continue on to learn how to use the compute instance to run notebooks and scripts in the Azure Machine Learning cloud. \n",
|
||||
"\n",
|
||||
"|Tutorial |Description |\n",
|
||||
"|---------|---------|\n",
|
||||
"| [Tutorial: Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/azure/tutorial-explore-data) | Store large data in the cloud and retrieve it from notebooks and scripts |\n",
|
||||
"| [Tutorial: Model development on a cloud workstation](https://learn.microsoft.com/azure/tutorial-cloud-workstation) | Start prototyping and developing machine learning models |\n",
|
||||
"| [Tutorial: Train a model in Azure Machine Learning](https://learn.microsoft.com/azure/tutorial-train-model) | Dive in to the details of training a model |\n",
|
||||
"| [Tutorial: Deploy a model as an online endpoint](https://learn.microsoft.com/azure/tutorial-deploy-model) | Dive in to the details of deploying a model |\n",
|
||||
"| [Tutorial: Create production machine learning pipelines](https://learn.microsoft.com/azure/tutorial-pipeline-python-sdk) | Split a complete machine learning task into a multistep workflow. |"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"description": "Learn how a data scientist uses Azure Machine Learning (Azure ML) to train a model, then use the model for prediction. This tutorial will help you become familiar with the core concepts of Azure ML and their most common usage.",
|
||||
"kernel_info": {
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"microsoft": {
|
||||
"ms_spell_check": {
|
||||
"ms_spell_check_language": "en"
|
||||
}
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,641 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Day 1: Train a model\n",
|
||||
"\n",
|
||||
"Learn how a data scientist uses Azure Machine Learning to train a model. In this example, we use the associated credit card dataset to show how you can use Azure Machine Learning for a classification problem. The goal is to predict if a customer has a high likelihood of defaulting on a credit card payment.\n",
|
||||
"\n",
|
||||
"The training script handles the data preparation, then trains and registers a model. This tutorial takes you through steps to submit a cloud-based training job (command job). If you would like to learn more about how to load your data into Azure, see [Create data assets](how-to-create-data-assets.md). \n",
|
||||
"\n",
|
||||
"The steps are:\n",
|
||||
"\n",
|
||||
" * Get a handle to your Azure Machine Learning workspace\n",
|
||||
" * Create your compute resource and job environment\n",
|
||||
" * Create your training script\n",
|
||||
" * Create and run your command job to run the training script on the compute resource, configured with the appropriate job environment and the data source\n",
|
||||
" * View the output of your training script\n",
|
||||
" * Deploy the newly-trained model as an endpoint\n",
|
||||
" * Call the Azure Machine Learning endpoint for inferencing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"* If you opened this notebook from Azure Machine Learning studio, you need a compute instance to run the code. If you don't have a compute instance, select **Create compute** on the toolbar to first create one. You can use all the default settings. \n",
|
||||
"\n",
|
||||
" ![Create compute](./media/create-compute.png)\n",
|
||||
"\n",
|
||||
"* If you're seeing this notebook elsewhere, complete [Create resources you need to get started](https://docs.microsoft.com/azure/machine-learning/quickstart-create-resources) to create an Azure Machine Learning workspace and a compute instance.\n",
|
||||
"\n",
|
||||
"## Set your kernel\n",
|
||||
"\n",
|
||||
"* If your compute instance is stopped, start it now. \n",
|
||||
" \n",
|
||||
" ![Start compute](./media/start-compute.png)\n",
|
||||
"\n",
|
||||
"* Once your compute instance is running, make sure the that the kernel, found on the top right, is `Python 3.10 - SDK v2`. If not, use the dropdown to select this kernel.\n",
|
||||
"\n",
|
||||
" ![Set the kernel](./media/set-kernel.png)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Use a command job to train a model in Azure Machine Learning\n",
|
||||
"\n",
|
||||
"To train a model, you need to submit a *job*. The type of job you'll submit in this tutorial is a *command job*. Azure Machine Learning offers several different types of jobs to train models. Users can select their method of training based on complexity of the model, data size, and training speed requirements. In this tutorial, you'll learn how to submit a *command job* to run a *training script*. \n",
|
||||
"\n",
|
||||
"A command job is a function that allows you to submit a custom training script to train your model. This can also be defined as a custom training job. A command job in Azure Machine Learning is a type of job that runs a script or command in a specified environment. You can use command jobs to train models, process data, or any other custom code you want to execute in the cloud. \n",
|
||||
"\n",
|
||||
"In this tutorial, we'll focus on using a command job to create a custom training job that we'll use to train a model. For any custom training job, the below items are required:\n",
|
||||
"\n",
|
||||
"* compute resource (usually a compute cluster, which we recommend for scalability)\n",
|
||||
"* environment\n",
|
||||
"* data\n",
|
||||
"* command job \n",
|
||||
"* training script\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In this tutorial we'll provide all these items for our example: creating a classifier to predict customers who have a high likelihood of defaulting on credit card payments.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create handle to workspace\n",
|
||||
"\n",
|
||||
"Before we dive in the code, you need a way to reference your workspace. You'll create `ml_client` for a handle to the workspace. You'll then use `ml_client` to manage resources and jobs.\n",
|
||||
"\n",
|
||||
"In the next cell, enter your Subscription ID, Resource Group name and Workspace name. To find these values:\n",
|
||||
"\n",
|
||||
"1. In the upper right Azure Machine Learning studio toolbar, select your workspace name.\n",
|
||||
"1. Copy the value for workspace, resource group and subscription ID into the code.\n",
|
||||
"1. You'll need to copy one value, close the area and paste, then come back for the next one."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262283435
|
||||
},
|
||||
"name": "credential"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import MLClient\n",
|
||||
"from azure.identity import DefaultAzureCredential\n",
|
||||
"\n",
|
||||
"# authenticate\n",
|
||||
"credential = DefaultAzureCredential()\n",
|
||||
"# # Get a handle to the workspace\n",
|
||||
"ml_client = MLClient(\n",
|
||||
" credential=credential,\n",
|
||||
" subscription_id=\"<SUBSCRIPTION_ID>\",\n",
|
||||
" resource_group_name=\"<RESOURCE_GROUP>\",\n",
|
||||
" workspace_name=\"<AML_WORKSPACE_NAME>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> [!NOTE]\n",
|
||||
"> Creating MLClient will not connect to the workspace. The client initialization is lazy, it will wait for the first time it needs to make a call (in the notebook below, that will happen during compute creation)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a compute cluster to run your job\n",
|
||||
"\n",
|
||||
"In Azure, a job can refer to several tasks that Azure allows its users to do: training, pipeline creation, deployment, etc. For this tutorial and our purpose of training a machine learning model, we'll use *job* as a reference to running training computations (*training job*).\n",
|
||||
"\n",
|
||||
"You need a compute resource for running any job in Azure Machine Learning. It can be single or multi-node machines with Linux or Windows OS, or a specific compute fabric like Spark. In Azure, there are two compute resources that you can choose from: instance and cluster. A compute instance contains one node of computation resources while a *compute cluster* contains several. A *compute cluster* contains more memory for the computation task. For training, we recommend using a compute cluster because it allows the user to distribute calculations on multiple nodes of computation, which results in a faster training experience. \n",
|
||||
"\n",
|
||||
"You provision a Linux compute cluster. See the [full list on VM sizes and prices](https://azure.microsoft.com/pricing/details/machine-learning/) .\n",
|
||||
"\n",
|
||||
"For this example, you only need a basic cluster, so you use a Standard_DS3_v2 model with 2 vCPU cores, 7-GB RAM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262287630
|
||||
},
|
||||
"name": "cpu_compute_target"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import AmlCompute\n",
|
||||
"\n",
|
||||
"# Name assigned to the compute cluster\n",
|
||||
"cpu_compute_target = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # let's see if the compute target already exists\n",
|
||||
" cpu_cluster = ml_client.compute.get(cpu_compute_target)\n",
|
||||
" print(\n",
|
||||
" f\"You already have a cluster named {cpu_compute_target}, we'll reuse it as is.\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"except Exception:\n",
|
||||
" print(\"Creating a new cpu compute target...\")\n",
|
||||
"\n",
|
||||
" # Let's create the Azure Machine Learning compute object with the intended parameters\n",
|
||||
" cpu_cluster = AmlCompute(\n",
|
||||
" name=cpu_compute_target,\n",
|
||||
" # Azure Machine Learning Compute is the on-demand VM service\n",
|
||||
" type=\"amlcompute\",\n",
|
||||
" # VM Family\n",
|
||||
" size=\"STANDARD_DS3_V2\",\n",
|
||||
" # Minimum running nodes when there is no job running\n",
|
||||
" min_instances=0,\n",
|
||||
" # Nodes in cluster\n",
|
||||
" max_instances=4,\n",
|
||||
" # How many seconds will the node running after the job termination\n",
|
||||
" idle_time_before_scale_down=180,\n",
|
||||
" # Dedicated or LowPriority. The latter is cheaper but there is a chance of job termination\n",
|
||||
" tier=\"Dedicated\",\n",
|
||||
" )\n",
|
||||
" print(\n",
|
||||
" f\"AMLCompute with name {cpu_cluster.name} will be created, with compute size {cpu_cluster.size}\"\n",
|
||||
" )\n",
|
||||
" # Now, we pass the object to MLClient's create_or_update method\n",
|
||||
" cpu_cluster = ml_client.compute.begin_create_or_update(cpu_cluster)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a job environment\n",
|
||||
"\n",
|
||||
"To run your Azure Machine Learning job on your compute resource, you need an [environment](https://learn.microsoft.com/articles/machine-learning/concept-environments). An environment lists the software runtime and libraries that you want installed on the compute where you’ll be training. It's similar to your python environment on your local machine.\n",
|
||||
"\n",
|
||||
"Azure Machine Learning provides many curated or ready-made environments, which are useful for common training and inference scenarios. \n",
|
||||
"\n",
|
||||
"In this example, you'll create a custom conda environment for your jobs, using a conda yaml file.\n",
|
||||
"\n",
|
||||
"First, create a directory to store the file in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262301389
|
||||
},
|
||||
"name": "dependencies_dir"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"dependencies_dir = \"./dependencies\"\n",
|
||||
"os.makedirs(dependencies_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The cell below uses IPython magic to write the conda file into the directory you just created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "write_model"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile {dependencies_dir}/conda.yml\n",
|
||||
"name: model-env\n",
|
||||
"channels:\n",
|
||||
" - conda-forge\n",
|
||||
"dependencies:\n",
|
||||
" - python=3.8\n",
|
||||
" - numpy=1.21.2\n",
|
||||
" - pip=21.2.4\n",
|
||||
" - scikit-learn=0.24.2\n",
|
||||
" - scipy=1.7.1\n",
|
||||
" - pandas>=1.1,<1.2\n",
|
||||
" - pip:\n",
|
||||
" - inference-schema[numpy-support]==1.3.0\n",
|
||||
" - mlflow== 1.26.1\n",
|
||||
" - azureml-mlflow==1.42.0\n",
|
||||
" - psutil>=5.8,<5.9\n",
|
||||
" - tqdm>=4.59,<4.60\n",
|
||||
" - ipykernel~=6.0\n",
|
||||
" - matplotlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"The specification contains some usual packages, that you'll use in your job (numpy, pip).\n",
|
||||
"\n",
|
||||
"Reference this *yaml* file to create and register this custom environment in your workspace:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262314695
|
||||
},
|
||||
"name": "custom_env_name"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml.entities import Environment\n",
|
||||
"\n",
|
||||
"custom_env_name = \"aml-scikit-learn\"\n",
|
||||
"\n",
|
||||
"custom_job_env = Environment(\n",
|
||||
" name=custom_env_name,\n",
|
||||
" description=\"Custom environment for Credit Card Defaults job\",\n",
|
||||
" tags={\"scikit-learn\": \"0.24.2\"},\n",
|
||||
" conda_file=os.path.join(dependencies_dir, \"conda.yml\"),\n",
|
||||
" image=\"mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:latest\",\n",
|
||||
")\n",
|
||||
"custom_job_env = ml_client.environments.create_or_update(custom_job_env)\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" f\"Environment with name {custom_job_env.name} is registered to workspace, the environment version is {custom_job_env.version}\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure a training job using the command function\n",
|
||||
"\n",
|
||||
"You create an Azure Machine Learning *command job* to train a model for credit default prediction. The command job runs a *training script* in a specified environment on a specified compute resource. You've already created the environment and the compute cluster. Next you'll create the training script. In our specific case, we're training our dataset to produce a classifier using the `GradientBoostingClassifier` model. \n",
|
||||
"\n",
|
||||
"The *training script* handles the data preparation, training and registering of the trained model. The method `train_test_split` handles splitting the dataset into test and training data. In this tutorial, you'll create a Python training script. \n",
|
||||
"\n",
|
||||
"Command jobs can be run from CLI, Python SDK, or studio interface. In this tutorial, you'll use the Azure Machine Learning Python SDK v2 to create and run the command job.\n",
|
||||
"\n",
|
||||
"## Create training script\n",
|
||||
"\n",
|
||||
"Let's start by creating the training script - the *main.py* python file.\n",
|
||||
"\n",
|
||||
"First create a source folder for the script:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262322022
|
||||
},
|
||||
"name": "train_src_dir"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"train_src_dir = \"./src\"\n",
|
||||
"os.makedirs(train_src_dir, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This script handles the preprocessing of the data, splitting it into test and train data. It then consumes this data to train a tree based model and return the output model. \n",
|
||||
"\n",
|
||||
"[MLFlow](https://learn.microsoft.com/articles/machine-learning/concept-mlflow) is used to log the parameters and metrics during our job. The MLFlow package allows you to keep track of metrics and results for each model Azure trains. We'll be using MLFlow to first get the best model for our data, then we'll view the model's metrics on the Azure studio. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"name": "write_main"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile {train_src_dir}/main.py\n",
|
||||
"import os\n",
|
||||
"import argparse\n",
|
||||
"import pandas as pd\n",
|
||||
"import mlflow\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"from sklearn.ensemble import GradientBoostingClassifier\n",
|
||||
"from sklearn.metrics import classification_report\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"def main():\n",
|
||||
" \"\"\"Main function of the script.\"\"\"\n",
|
||||
"\n",
|
||||
" # input and output arguments\n",
|
||||
" parser = argparse.ArgumentParser()\n",
|
||||
" parser.add_argument(\"--data\", type=str, help=\"path to input data\")\n",
|
||||
" parser.add_argument(\"--test_train_ratio\", type=float, required=False, default=0.25)\n",
|
||||
" parser.add_argument(\"--n_estimators\", required=False, default=100, type=int)\n",
|
||||
" parser.add_argument(\"--learning_rate\", required=False, default=0.1, type=float)\n",
|
||||
" parser.add_argument(\"--registered_model_name\", type=str, help=\"model name\")\n",
|
||||
" args = parser.parse_args()\n",
|
||||
" \n",
|
||||
" # Start Logging\n",
|
||||
" mlflow.start_run()\n",
|
||||
"\n",
|
||||
" # enable autologging\n",
|
||||
" mlflow.sklearn.autolog()\n",
|
||||
"\n",
|
||||
" ###################\n",
|
||||
" #<prepare the data>\n",
|
||||
" ###################\n",
|
||||
" print(\" \".join(f\"{k}={v}\" for k, v in vars(args).items()))\n",
|
||||
"\n",
|
||||
" print(\"input data:\", args.data)\n",
|
||||
" \n",
|
||||
" credit_df = pd.read_csv(args.data, header=1, index_col=0)\n",
|
||||
"\n",
|
||||
" mlflow.log_metric(\"num_samples\", credit_df.shape[0])\n",
|
||||
" mlflow.log_metric(\"num_features\", credit_df.shape[1] - 1)\n",
|
||||
"\n",
|
||||
" #Split train and test datasets\n",
|
||||
" train_df, test_df = train_test_split(\n",
|
||||
" credit_df,\n",
|
||||
" test_size=args.test_train_ratio,\n",
|
||||
" )\n",
|
||||
" ####################\n",
|
||||
" #</prepare the data>\n",
|
||||
" ####################\n",
|
||||
"\n",
|
||||
" ##################\n",
|
||||
" #<train the model>\n",
|
||||
" ##################\n",
|
||||
" # Extracting the label column\n",
|
||||
" y_train = train_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
" # convert the dataframe values to array\n",
|
||||
" X_train = train_df.values\n",
|
||||
"\n",
|
||||
" # Extracting the label column\n",
|
||||
" y_test = test_df.pop(\"default payment next month\")\n",
|
||||
"\n",
|
||||
" # convert the dataframe values to array\n",
|
||||
" X_test = test_df.values\n",
|
||||
"\n",
|
||||
" print(f\"Training with data of shape {X_train.shape}\")\n",
|
||||
"\n",
|
||||
" clf = GradientBoostingClassifier(\n",
|
||||
" n_estimators=args.n_estimators, learning_rate=args.learning_rate\n",
|
||||
" )\n",
|
||||
" clf.fit(X_train, y_train)\n",
|
||||
"\n",
|
||||
" y_pred = clf.predict(X_test)\n",
|
||||
"\n",
|
||||
" print(classification_report(y_test, y_pred))\n",
|
||||
" ###################\n",
|
||||
" #</train the model>\n",
|
||||
" ###################\n",
|
||||
"\n",
|
||||
" ##########################\n",
|
||||
" #<save and register model>\n",
|
||||
" ##########################\n",
|
||||
" # Registering the model to the workspace\n",
|
||||
" print(\"Registering the model via MLFlow\")\n",
|
||||
" mlflow.sklearn.log_model(\n",
|
||||
" sk_model=clf,\n",
|
||||
" registered_model_name=args.registered_model_name,\n",
|
||||
" artifact_path=args.registered_model_name,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Saving the model to a file\n",
|
||||
" mlflow.sklearn.save_model(\n",
|
||||
" sk_model=clf,\n",
|
||||
" path=os.path.join(args.registered_model_name, \"trained_model\"),\n",
|
||||
" )\n",
|
||||
" ###########################\n",
|
||||
" #</save and register model>\n",
|
||||
" ###########################\n",
|
||||
" \n",
|
||||
" # Stop Logging\n",
|
||||
" mlflow.end_run()\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" main()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this script, once the model is trained, the model file is saved and registered to the workspace. Registering your model allows you to store and version your models in the Azure cloud, in your workspace. Once you register a model, you can find all other registered model in one place in the Azure Studio called the model registry. The model registry helps you organize and keep track of your trained models. \n",
|
||||
"\n",
|
||||
"## Configure the command\n",
|
||||
"\n",
|
||||
"Now that you have a script that can perform the classification task, use the general purpose **command** that can run command line actions. This command line action can be directly calling system commands or by running a script. \n",
|
||||
"\n",
|
||||
"Here, create input variables to specify the input data, split ratio, learning rate and registered model name. The command script will:\n",
|
||||
"* Use the compute created earlier to run this command.\n",
|
||||
"* Use the environment created earlier - you can use the `@latest` notation to indicate the latest version of the environment when the command is run.\n",
|
||||
"* Configure some metadata like display name, experiment name etc. An *experiment* is a container for all the iterations you do on a certain project. All the jobs submitted under the same experiment name are next to each other in Azure Machine Learning studio.\n",
|
||||
"* Configure the command line action itself - `python main.py` in this case. The inputs/outputs are accessible in the command via the `${{ ... }}` notation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262332367
|
||||
},
|
||||
"name": "registered_model_name"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure.ai.ml import command\n",
|
||||
"from azure.ai.ml import Input\n",
|
||||
"\n",
|
||||
"registered_model_name = \"credit_defaults_model\"\n",
|
||||
"\n",
|
||||
"job = command(\n",
|
||||
" inputs=dict(\n",
|
||||
" data=Input(\n",
|
||||
" type=\"uri_file\",\n",
|
||||
" path=\"https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv\",\n",
|
||||
" ),\n",
|
||||
" test_train_ratio=0.2,\n",
|
||||
" learning_rate=0.25,\n",
|
||||
" registered_model_name=registered_model_name,\n",
|
||||
" ),\n",
|
||||
" code=\"./src/\", # location of source code\n",
|
||||
" command=\"python main.py --data ${{inputs.data}} --test_train_ratio ${{inputs.test_train_ratio}} --learning_rate ${{inputs.learning_rate}} --registered_model_name ${{inputs.registered_model_name}}\",\n",
|
||||
" environment=\"aml-scikit-learn@latest\",\n",
|
||||
" compute=\"cpu-cluster\",\n",
|
||||
" experiment_name=\"train_model_credit_default_prediction\",\n",
|
||||
" display_name=\"credit_default_prediction\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit the job \n",
|
||||
"\n",
|
||||
"It's now time to submit the job to run in Azure Machine Learning studio. This time you'll use `create_or_update` on `ml_client`. `ml_client` is a client class that allows you to connect to your Azure subscription using Python and interact with Azure Machine Learning services. `ml_client` allows you to submit your jobs using Python."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1677262345449
|
||||
},
|
||||
"name": "create_job"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_client.create_or_update(job)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View job output and wait for job completion\n",
|
||||
"\n",
|
||||
"View the job in Azure Machine Learning studio by selecting the link in the output of the previous cell. The output of this job will look like this in the Azure Machine Learning studio. Explore the tabs for various details like metrics, outputs etc. Once completed, the job will register a model in your workspace as a result of training. \n",
|
||||
"\n",
|
||||
"![Screenshot shows the overview page for the job.](./media/view-job.gif)\n",
|
||||
"\n",
|
||||
"> [!IMPORTANT]\n",
|
||||
"> Wait until the status of the job is complete before returning to this notebook to continue. The job will take 2 to 3 minutes to run. It could take longer (up to 10 minutes) if the compute cluster has been scaled down to zero nodes and custom environment is still building.\n",
|
||||
"\n",
|
||||
"When you run the cell, the notebook output shows a link to the job's details page on Azure Studio. Alternatively, you can also select Jobs on the left navigation menu. A job is a grouping of many runs from a specified script or piece of code. Information for the run is stored under that job. The details page gives an overview of the job, the time it took to run, when it was created, etc. The page also has tabs to other information about the job such as metrics, Outputs + logs, and code. Listed below are the tabs available in the job's details page:\n",
|
||||
"\n",
|
||||
"* Overview: The overview section provides basic information about the job, including its status, start and end times, and the type of job that was run\n",
|
||||
"* Inputs: The input section lists the data and code that were used as inputs for the job. This section can include datasets, scripts, environment configurations, and other resources that were used during training. \n",
|
||||
"* Outputs + logs: The Outputs + logs tab contains logs generated while the job was running. This tab assists in troubleshooting if anything goes wrong with your training script or model creation.\n",
|
||||
"* Metrics: The metrics tab showcases key performance metrics from your model such as training score, f1 score, and precision score. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources\n",
|
||||
"\n",
|
||||
"If you plan to continue now to other tutorials, skip to [Next steps](#next-steps).\n",
|
||||
"\n",
|
||||
"### Stop compute instance\n",
|
||||
"\n",
|
||||
"If you're not going to use it now, stop the compute instance:\n",
|
||||
"\n",
|
||||
"1. In the studio, in the left navigation area, select **Compute**.\n",
|
||||
"1. In the top tabs, select **Compute instances**\n",
|
||||
"1. Select the compute instance in the list.\n",
|
||||
"1. On the top toolbar, select **Stop**.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"Learn about deploying a model \n",
|
||||
"\n",
|
||||
"[Deploy a model](https://learn.microsoft.com/articles/machine-learning/tutorial-deploy-model).\n",
|
||||
"\n",
|
||||
"This tutorial used an online data file. To learn more about other ways to access data, see [Tutorial: Upload, access and explore your data in Azure Machine Learning](https://learn.microsoft.com/articles/machine-learning/tutorial-explore-data).\n",
|
||||
"\n",
|
||||
"If you would like to learn more about different ways to train models in Azure Machine Learning, see [What is automated machine learning (AutoML)?](https://learn.microsoft.com/articles/machine-learning/concept-automated-ml). Automated ML is a supplemental tool to reduce the amount of time a data scientist spends finding a model that works best with their data.\n",
|
||||
"\n",
|
||||
"If you would like more examples similar to this tutorial, see [**Samples**](https://learn.microsoft.com/articles/machine-learning/quickstart-create-resources#learn-from-sample-notebooks) section of studio. These same samples are available at our [GitHub examples page.](https://github.com/Azure/azureml-examples) The examples include complete Python Notebooks that you can run code and learn to train a model. You can modify and run existing scripts from the samples, containing scenarios including classification, natural language processing, and anomaly detection. \n",
|
||||
"\n",
|
||||
"To train models by creating your own custom environments using a [docker image,](how-to-manage-environments-v2.md#create-an-environment-from-a-docker-build-context)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"categories": [
|
||||
"SDK v2",
|
||||
"tutorials"
|
||||
],
|
||||
"kernel_info": {
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"microsoft": {
|
||||
"host": {
|
||||
"AzureML": {
|
||||
"notebookHasBeenCompleted": true
|
||||
}
|
||||
},
|
||||
"ms_spell_check": {
|
||||
"ms_spell_check_language": "en"
|
||||
}
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
name: workstation_env
|
||||
dependencies:
|
||||
- python=3.8
|
||||
- pip=21.2.4
|
||||
- scikit-learn=0.24.2
|
||||
- scipy=1.7.1
|
||||
- pandas>=1.1,<1.2
|
||||
- pip:
|
||||
- mlflow== 1.26.1
|
||||
- azureml-mlflow==1.42.0
|
||||
- psutil>=5.8,<5.9
|
||||
- ipykernel~=6.0
|
||||
- matplotlib
|
|
@ -119,7 +119,12 @@ def write_notebook_workflow(
|
|||
forecast_import = get_forecast_reqs(name, nb_config)
|
||||
posix_folder = folder.replace(os.sep, "/")
|
||||
posix_notebook = notebook.replace(os.sep, "/")
|
||||
|
||||
if "explore-data" in name:
|
||||
runs_on = "ubuntu-20.04"
|
||||
workflow_sched = "0 */12 * * *"
|
||||
else:
|
||||
runs_on = "ubuntu-latest"
|
||||
workflow_sched = "0 */8 * * *"
|
||||
workflow_yaml = f"""{READONLY_HEADER}
|
||||
name: tutorials-{classification}-{name}
|
||||
# This file is created by tutorials/readme.py.
|
||||
|
@ -129,7 +134,7 @@ on:\n"""
|
|||
workflow_yaml += f""" workflow_dispatch:\n"""
|
||||
if enable_scheduled_runs:
|
||||
workflow_yaml += f""" schedule:
|
||||
- cron: "0 */8 * * *"\n"""
|
||||
- cron: "{workflow_sched}"\n"""
|
||||
workflow_yaml += f""" pull_request:
|
||||
branches:
|
||||
- main\n"""
|
||||
|
@ -148,7 +153,7 @@ concurrency:
|
|||
cancel-in-progress: true
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: {runs_on}
|
||||
steps:
|
||||
- name: check out repo
|
||||
uses: actions/checkout@v2
|
||||
|
@ -199,6 +204,14 @@ jobs:
|
|||
touch /tmp/code/code
|
||||
chmod +x /tmp/code/code
|
||||
export PATH="/tmp/code:$PATH"\n"""
|
||||
if "explore-data" in name:
|
||||
workflow_yaml += f"""
|
||||
|
||||
# load data into 'data' subdirectory
|
||||
mkdir data
|
||||
cd data
|
||||
wget https://azuremlexamples.blob.core.windows.net/datasets/credit_card/default_of_credit_card_clients.csv
|
||||
cd .."""
|
||||
|
||||
if not ("automl" in folder):
|
||||
workflow_yaml += f"""
|
||||
|
@ -351,7 +364,7 @@ def modify_notebooks(notebooks):
|
|||
print("modifying notebooks...")
|
||||
# setup variables
|
||||
kernelspec = {
|
||||
"display_name": "Python 3.10 - SDK V2",
|
||||
"display_name": "Python 3.10 - SDK v2",
|
||||
"language": "python",
|
||||
"name": "python310-sdkv2",
|
||||
}
|
||||
|
|