Update into master (#65)
Updating to remove cookie cutter, remove iot, and simplify folders. Also adding pytests.
This commit is contained in:
Родитель
a36b122c3f
Коммит
b22f5ac4ec
|
@ -3,7 +3,7 @@
|
|||
# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
|
||||
#
|
||||
# An Agent_Name Variable must be creating in the Azure DevOps UI.
|
||||
# An Agent_Name Variable must be creating in the Azure DevOps UI.
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
|
||||
#
|
||||
# This must point to an Agent Pool, with a Self-Hosted Linux VM with a DOcker.
|
||||
|
@ -32,6 +32,10 @@ stages:
|
|||
- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
|
||||
parameters:
|
||||
Agent: $(Agent_Name)
|
||||
jobDisplayName: MLAKSDeployAMLJob
|
||||
jobDisplayName: az-ml-realtime-score
|
||||
DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
|
||||
workload_vars: ../vars/ml_realtime_scoring.yml
|
||||
workload_vars: ../vars/az-ml-realtime-score.yml
|
||||
flighting_release: false
|
||||
flighting_preview: false
|
||||
flighting_master: false
|
||||
post_cleanup: false
|
||||
|
|
|
@ -1,155 +1,50 @@
|
|||
# AI Architecture Template TODO: update tile
|
||||
#
|
||||
# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
|
||||
#
|
||||
# An Agent_Name Variable must be creating in the Azure DevOps UI.
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
|
||||
#
|
||||
# This must point to an Agent Pool, with a Self-Hosted Linux VM with a DOcker.
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: aitemplates
|
||||
type: github
|
||||
name: microsoft/AI
|
||||
endpoint: AIArchitecturesAndPractices-GitHub
|
||||
|
||||
schedules:
|
||||
- cron: "*/10 * * * *"
|
||||
displayName: Daily midnight build
|
||||
always: true
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
# MLAKSDeploy Pipeline
|
||||
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- staging
|
||||
|
||||
variables:
|
||||
- group: AzureKeyVault
|
||||
|
||||
|
||||
jobs:
|
||||
- job: MLAKSDeployAMLJob
|
||||
timeoutInMinutes: 300
|
||||
cancelTimeoutInMinutes: 2
|
||||
|
||||
pool:
|
||||
vmImage: 'Ubuntu-16.04'
|
||||
|
||||
strategy:
|
||||
maxParallel: 3
|
||||
matrix: {"eastus": {"azureregion": "eastus", "azureresourcegroup" : "mlaksdplyamleastus"},"southcentralus": {"azureregion": "southcentralus", "azureresourcegroup" : "mlaksdplyamlsouthctrl" },"westus2": {"azureregion": "westus2", "azureresourcegroup" : "mlaksdplyamlwestus"}}
|
||||
|
||||
steps:
|
||||
- bash: |
|
||||
source /usr/share/miniconda/etc/profile.d/conda.sh
|
||||
which conda
|
||||
conda env create -f {{cookiecutter.project_name}}/environment.yml
|
||||
conda env list
|
||||
conda activate MLAKSDeployAML
|
||||
conda env list
|
||||
echo Login Azure Account
|
||||
az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret)
|
||||
cd {{cookiecutter.project_name}}
|
||||
echo Execute 00_AMLConfiguration.ipynb
|
||||
papermill 00_AMLConfiguration.ipynb 00_AMLConfiguration_Output.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3 \
|
||||
-p subscription_id $(azuresubscription) \
|
||||
-p resource_group $(azureresourcegroup) \
|
||||
-p workspace_name $(workspacename) \
|
||||
-p workspace_region $(azureregion) \
|
||||
-p image_name $(aksimagename)
|
||||
displayName: '00_AML_Configuration.ipynb'
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 01_DataPrep.ipynb
|
||||
location: "{{cookiecutter.project_name}}"
|
||||
|
||||
- bash: |
|
||||
mkdir -p {{cookiecutter.project_name}}/iotedge/data_folder
|
||||
mkdir -p {{cookiecutter.project_name}}/aks/data_folder
|
||||
cd {{cookiecutter.project_name}}
|
||||
cp data_folder/*.tsv iotedge/data_folder
|
||||
cp data_folder/*.tsv aks/data_folder
|
||||
displayName: 'Copying data'
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 02_TrainOnLocal.ipynb
|
||||
location: "{{cookiecutter.project_name}}"
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 03_DevelopScoringScript.ipynb
|
||||
location: "{{cookiecutter.project_name}}"
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 04_CreateImage.ipynb
|
||||
location: "{{cookiecutter.project_name}}"
|
||||
|
||||
- bash: |
|
||||
source /usr/share/miniconda/etc/profile.d/conda.sh
|
||||
conda activate MLAKSDeployAML
|
||||
echo Execute 05_DeployOnAKS.ipynb
|
||||
export PYTHONPATH=$(pwd)/{{cookiecutter.project_name}}:${PYTHONPATH}
|
||||
cd {{cookiecutter.project_name}}/aks
|
||||
papermill 05_DeployOnAKS.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3 \
|
||||
-p aks_name $(aksname) \
|
||||
-p aks_location $(azureregion) \
|
||||
-p aks_service_name $(aksvcname)
|
||||
displayName: '05_DeployOnAKS.ipynb'
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 06_SpeedTestWebApp.ipynb
|
||||
location: "{{cookiecutter.project_name}}/aks"
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 07_RealTimeScoring.ipynb
|
||||
location: "{{cookiecutter.project_name}}/aks"
|
||||
|
||||
# - bash: |
|
||||
# source /usr/share/miniconda/etc/profile.d/conda.sh
|
||||
# conda activate MLAKSDeployAML
|
||||
# export PYTHONPATH=$(pwd)/{{cookiecutter.project_name}}:${PYTHONPATH}
|
||||
# cd {{cookiecutter.project_name}}/iotedge
|
||||
# echo Execute 05_DeployOnIOTedge.ipynb
|
||||
# papermill 05_DeployOnIOTedge.ipynb test.ipynb \
|
||||
# --log-output \
|
||||
# --no-progress-bar \
|
||||
# -k python3 \
|
||||
# -p iot_hub_name fstlstnameiothub \
|
||||
# -p device_id mydevice \
|
||||
# -p module_name mymodule
|
||||
# displayName: '05_DeployOnIOTedge.ipynb'
|
||||
|
||||
- template: steps/papermill.yml
|
||||
parameters:
|
||||
notebook: 08_TearDown.ipynb
|
||||
location: "{{cookiecutter.project_name}}/aks"
|
||||
|
||||
# - template: steps/papermill.yml
|
||||
# parameters:
|
||||
# notebook: 06_TearDown.ipynb
|
||||
# location: "{{cookiecutter.project_name}}/iotedge"
|
||||
|
||||
- bash: |
|
||||
source /usr/share/miniconda/etc/profile.d/conda.sh
|
||||
conda activate MLAKSDeployAML
|
||||
echo Execute Resource Group Delete
|
||||
existResponse=$(az group exists -n $(azureresourcegroup))
|
||||
if [ "$existResponse" == "true" ]; then
|
||||
echo Deleting project resource group
|
||||
az group delete --name $(azureresourcegroup) --yes
|
||||
else
|
||||
echo Project resource group did not exist
|
||||
fi
|
||||
echo Done Cleanup
|
||||
displayName: 'Backup Cleanup'
|
||||
condition: or(canceled(),failed())
|
||||
|
||||
- task: CreateWorkItem@1
|
||||
inputs:
|
||||
workItemType: 'Issue'
|
||||
title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed
|
||||
assignedTo: 'Fidan <fboylu@microsoft.com>'
|
||||
associate: true
|
||||
teamProject: $(System.TeamProject)
|
||||
|
||||
fieldMappings: |
|
||||
Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type.
|
||||
displayName: 'Create work item on failure'
|
||||
condition: failed()
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
|
||||
stages:
|
||||
- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
|
||||
parameters:
|
||||
Agent: $(Agent_Name)
|
||||
jobDisplayName: ai-architecture-template #TODO: Update with project name
|
||||
DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
|
||||
workload_vars: ../vars/ai-architecture-template.yml #TODO: Update with project name
|
||||
flighting_release: false
|
||||
flighting_preview: false
|
||||
flighting_master: false
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
# AI Architecture Template TODO: update tile
|
||||
#
|
||||
# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
|
||||
#
|
||||
# An Agent_Name Variable must be creating in the Azure DevOps UI.
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
|
||||
#
|
||||
# This must point to an Agent Pool, with a Self-Hosted Linux VM with a Docker.
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops
|
||||
|
||||
parameters:
|
||||
azureSubscription: ''
|
||||
azure_subscription: ''
|
||||
location: ''
|
||||
azureresourcegroup: ''
|
||||
workspacename: ''
|
||||
azureregion: westus2
|
||||
aksimagename: ''
|
||||
aks_name: ''
|
||||
aks_service_name: myimage
|
||||
conda: ''
|
||||
doCleanup: true
|
||||
python_path: ''
|
||||
flighting_release: false
|
||||
flighting_preview: false
|
||||
flighting_master: false
|
||||
|
||||
steps:
|
||||
- template: config_conda.yml
|
||||
parameters:
|
||||
conda_location: .
|
||||
azureSubscription: ${{parameters.azureSubscription}}
|
||||
conda: ai-architecture-template
|
||||
flighting_release: ${{parameters.flighting_release}}
|
||||
flighting_preview: ${{parameters.flighting_preview}}
|
||||
flighting_master: ${{parameters.flighting_master}}
|
||||
|
||||
- template: azpapermill.yml
|
||||
parameters:
|
||||
notebook: 00_AMLConfiguration.ipynb
|
||||
location: ${{parameters.location}}
|
||||
azureSubscription: ${{parameters.azureSubscription}}
|
||||
conda: ai-architecture-template
|
||||
azure_subscription: ${{parameters.azure_subscription}}
|
||||
azureresourcegroup: ${{parameters.azureresourcegroup}}
|
||||
workspacename: "aiarchtemplate"
|
||||
azureregion: ${{parameters.azureregion}}
|
||||
aksimagename: ${{parameters.aksimagename}}
|
||||
|
||||
# Insert more notebook steps here
|
||||
|
||||
- template: pytest_steps.yml
|
||||
parameters:
|
||||
location: ${{parameters.location}}
|
||||
azureSubscription: ${{parameters.azureSubscription}}
|
||||
conda: ai-architecture-template
|
||||
|
||||
- template: cleanuptask.yml
|
||||
parameters:
|
||||
azureSubscription: ${{parameters.azureSubscription}}
|
||||
conda: ${{parameters.conda}}
|
||||
azureresourcegroup: ${{parameters.azureresourcegroup}}
|
||||
doCleanup: ${{parameters.doCleanup}}
|
|
@ -0,0 +1,6 @@
|
|||
variables:
|
||||
TridentWorkloadTypeShort: aiarchtemp # TODO: update with project short name
|
||||
DeployLocation: westus
|
||||
ProjectLocation: "notebooks/"
|
||||
PythonPath: "."
|
||||
Template: steps/ai-architecture-template.yml # TODO: update file name to project name
|
|
@ -1,13 +1,11 @@
|
|||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Environments
|
||||
.env
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
# Project Configuration Files
|
||||
workspace_conf.yml
|
||||
*.output_ipynb
|
||||
.azureml
|
||||
pylint-results.xml
|
||||
.idea
|
||||
score.py
|
||||
|
||||
#AML
|
||||
aml_config/
|
||||
|
@ -19,14 +17,137 @@ scripts/.amlignore
|
|||
__pycache__/
|
||||
scripts/__pycache__/
|
||||
|
||||
# Products
|
||||
*.tsv
|
||||
*.txt
|
||||
*.pkl
|
||||
datafolder/
|
||||
lgbmenv.yml
|
||||
score.py
|
||||
# Environments
|
||||
.env
|
||||
|
||||
.idea
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
|
|
@ -0,0 +1,584 @@
|
|||
[MASTER]
|
||||
|
||||
# A comma-separated list of package or module names from where C extensions may
|
||||
# be loaded. Extensions are loading into the active Python interpreter and may
|
||||
# run arbitrary code.
|
||||
extension-pkg-whitelist=
|
||||
|
||||
# Add files or directories to the blacklist. They should be base names, not
|
||||
# paths.
|
||||
ignore=CVS
|
||||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=
|
||||
|
||||
# Python code to execute, usually for sys.path manipulation such as
|
||||
# pygtk.require().
|
||||
#init-hook=
|
||||
|
||||
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
|
||||
# number of processors available to use.
|
||||
jobs=1
|
||||
|
||||
# Control the amount of potential inferred values when inferring a single
|
||||
# object. This can help the performance when dealing with large functions or
|
||||
# complex, nested conditions.
|
||||
limit-inference-results=100
|
||||
|
||||
# List of plugins (as comma separated values of python module names) to load,
|
||||
# usually to register additional checkers.
|
||||
load-plugins=pylint_junit
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=yes
|
||||
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages.
|
||||
suggestion-mode=yes
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Only show warnings with the listed confidence levels. Leave empty to show
|
||||
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
|
||||
confidence=
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s). You
|
||||
# can either give multiple identifiers separated by comma (,) or put this
|
||||
# option multiple times (only on the command line, not in the configuration
|
||||
# file where it should appear only once). You can also use "--disable=all" to
|
||||
# disable everything first and then reenable specific checks. For example, if
|
||||
# you want to run only the similarities checker, you can use "--disable=all
|
||||
# --enable=similarities". If you want to run only the classes checker, but have
|
||||
# no Warning level messages displayed, use "--disable=all --enable=classes
|
||||
# --disable=W".
|
||||
disable=missing-module-docstring,
|
||||
trailing-whitespace,
|
||||
fixme,
|
||||
print-statement,
|
||||
parameter-unpacking,
|
||||
unpacking-in-except,
|
||||
old-raise-syntax,
|
||||
backtick,
|
||||
long-suffix,
|
||||
old-ne-operator,
|
||||
old-octal-literal,
|
||||
import-star-module-level,
|
||||
non-ascii-bytes-literal,
|
||||
raw-checker-failed,
|
||||
bad-inline-option,
|
||||
locally-disabled,
|
||||
file-ignored,
|
||||
suppressed-message,
|
||||
useless-suppression,
|
||||
deprecated-pragma,
|
||||
use-symbolic-message-instead,
|
||||
apply-builtin,
|
||||
basestring-builtin,
|
||||
buffer-builtin,
|
||||
cmp-builtin,
|
||||
coerce-builtin,
|
||||
execfile-builtin,
|
||||
file-builtin,
|
||||
long-builtin,
|
||||
raw_input-builtin,
|
||||
reduce-builtin,
|
||||
standarderror-builtin,
|
||||
unicode-builtin,
|
||||
xrange-builtin,
|
||||
coerce-method,
|
||||
delslice-method,
|
||||
getslice-method,
|
||||
setslice-method,
|
||||
no-absolute-import,
|
||||
old-division,
|
||||
dict-iter-method,
|
||||
dict-view-method,
|
||||
next-method-called,
|
||||
metaclass-assignment,
|
||||
indexing-exception,
|
||||
raising-string,
|
||||
reload-builtin,
|
||||
oct-method,
|
||||
hex-method,
|
||||
nonzero-method,
|
||||
cmp-method,
|
||||
input-builtin,
|
||||
round-builtin,
|
||||
intern-builtin,
|
||||
unichr-builtin,
|
||||
map-builtin-not-iterating,
|
||||
zip-builtin-not-iterating,
|
||||
range-builtin-not-iterating,
|
||||
filter-builtin-not-iterating,
|
||||
using-cmp-argument,
|
||||
eq-without-hash,
|
||||
div-method,
|
||||
idiv-method,
|
||||
rdiv-method,
|
||||
exception-message-attribute,
|
||||
invalid-str-codec,
|
||||
sys-max-int,
|
||||
bad-python3-import,
|
||||
deprecated-string-function,
|
||||
deprecated-str-translate-call,
|
||||
deprecated-itertools-function,
|
||||
deprecated-types-field,
|
||||
next-method-defined,
|
||||
dict-items-not-iterating,
|
||||
dict-keys-not-iterating,
|
||||
dict-values-not-iterating,
|
||||
deprecated-operator-function,
|
||||
deprecated-urllib-function,
|
||||
xreadlines-attribute,
|
||||
deprecated-sys-function,
|
||||
exception-escape,
|
||||
comprehension-escape
|
||||
|
||||
# Enable the message, report, category or checker with the given id(s). You can
|
||||
# either give multiple identifier separated by comma (,) or put this option
|
||||
# multiple time (only on the command line, not in the configuration file where
|
||||
# it should appear only once). See also the "--disable" option for examples.
|
||||
enable=c-extension-no-member
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Python expression which should return a score less than or equal to 10. You
|
||||
# have access to the variables 'error', 'warning', 'refactor', and 'convention'
|
||||
# which contain the number of messages in each category, as well as 'statement'
|
||||
# which is the total number of statements analyzed. This score is used by the
|
||||
# global evaluation report (RP0004).
|
||||
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
|
||||
|
||||
# Template used to display messages. This is a python new-style format string
|
||||
# used to format the message information. See doc for all details.
|
||||
#msg-template=
|
||||
|
||||
# Set the output format. Available formats are text, parseable, colorized, json
|
||||
# and msvs (visual studio). You can also give a reporter class, e.g.
|
||||
# mypackage.mymodule.MyReporterClass.
|
||||
output-format=text
|
||||
|
||||
# Tells whether to display a full report or only the messages.
|
||||
reports=no
|
||||
|
||||
# Activate the evaluation score.
|
||||
score=yes
|
||||
|
||||
|
||||
[REFACTORING]
|
||||
|
||||
# Maximum number of nested blocks for function / method body
|
||||
max-nested-blocks=5
|
||||
|
||||
# Complete name of functions that never returns. When checking for
|
||||
# inconsistent-return-statements if a never returning function is called then
|
||||
# it will be considered as an explicit return statement and no message will be
|
||||
# printed.
|
||||
never-returning-functions=sys.exit
|
||||
|
||||
|
||||
[BASIC]
|
||||
|
||||
# Naming style matching correct argument names.
|
||||
argument-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct argument names. Overrides argument-
|
||||
# naming-style.
|
||||
#argument-rgx=
|
||||
|
||||
# Naming style matching correct attribute names.
|
||||
attr-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct attribute names. Overrides attr-naming-
|
||||
# style.
|
||||
#attr-rgx=
|
||||
|
||||
# Bad variable names which should always be refused, separated by a comma.
|
||||
bad-names=foo,
|
||||
bar,
|
||||
baz,
|
||||
toto,
|
||||
tutu,
|
||||
tata
|
||||
|
||||
# Naming style matching correct class attribute names.
|
||||
class-attribute-naming-style=any
|
||||
|
||||
# Regular expression matching correct class attribute names. Overrides class-
|
||||
# attribute-naming-style.
|
||||
#class-attribute-rgx=
|
||||
|
||||
# Naming style matching correct class names.
|
||||
class-naming-style=PascalCase
|
||||
|
||||
# Regular expression matching correct class names. Overrides class-naming-
|
||||
# style.
|
||||
#class-rgx=
|
||||
|
||||
# Naming style matching correct constant names.
|
||||
const-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct constant names. Overrides const-naming-
|
||||
# style.
|
||||
#const-rgx=
|
||||
|
||||
# Minimum line length for functions/classes that require docstrings, shorter
|
||||
# ones are exempt.
|
||||
docstring-min-length=-1
|
||||
|
||||
# Naming style matching correct function names.
|
||||
function-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct function names. Overrides function-
|
||||
# naming-style.
|
||||
#function-rgx=
|
||||
|
||||
# Good variable names which should always be accepted, separated by a comma.
|
||||
good-names=i,
|
||||
j,
|
||||
k,
|
||||
ex,
|
||||
Run,
|
||||
_
|
||||
|
||||
# Include a hint for the correct naming format with invalid-name.
|
||||
include-naming-hint=no
|
||||
|
||||
# Naming style matching correct inline iteration names.
|
||||
inlinevar-naming-style=any
|
||||
|
||||
# Regular expression matching correct inline iteration names. Overrides
|
||||
# inlinevar-naming-style.
|
||||
#inlinevar-rgx=
|
||||
|
||||
# Naming style matching correct method names.
|
||||
method-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct method names. Overrides method-naming-
|
||||
# style.
|
||||
#method-rgx=
|
||||
|
||||
# Naming style matching correct module names.
|
||||
module-naming-style=any
|
||||
|
||||
# Regular expression matching correct module names. Overrides module-naming-
|
||||
# style.
|
||||
#module-rgx=
|
||||
|
||||
# Colon-delimited sets of names that determine each other's naming style when
|
||||
# the name regexes allow several styles.
|
||||
name-group=
|
||||
|
||||
# Regular expression which should only match function or class names that do
|
||||
# not require a docstring.
|
||||
no-docstring-rgx=^_
|
||||
|
||||
# List of decorators that produce properties, such as abc.abstractproperty. Add
|
||||
# to this list to register other decorators that produce valid properties.
|
||||
# These decorators are taken in consideration only for invalid-name.
|
||||
property-classes=abc.abstractproperty
|
||||
|
||||
# Naming style matching correct variable names.
|
||||
variable-naming-style=snake_case
|
||||
|
||||
# Regular expression matching correct variable names. Overrides variable-
|
||||
# naming-style.
|
||||
#variable-rgx=
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
|
||||
expected-line-ending-format=
|
||||
|
||||
# Regexp for a line that is allowed to be longer than the limit.
|
||||
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
|
||||
|
||||
# Number of spaces of indent required inside a hanging or continued line.
|
||||
indent-after-paren=4
|
||||
|
||||
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
|
||||
# tab).
|
||||
indent-string=' '
|
||||
|
||||
# Maximum number of characters on a single line.
|
||||
max-line-length=120
|
||||
|
||||
# Maximum number of lines in a module.
|
||||
max-module-lines=1000
|
||||
|
||||
# List of optional constructs for which whitespace checking is disabled. `dict-
|
||||
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
|
||||
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
|
||||
# `empty-line` allows space-only lines.
|
||||
no-space-check=trailing-comma,
|
||||
dict-separator
|
||||
|
||||
# Allow the body of a class to be on the same line as the declaration if body
|
||||
# contains single statement.
|
||||
single-line-class-stmt=no
|
||||
|
||||
# Allow the body of an if to be on the same line as the test if there is no
|
||||
# else.
|
||||
single-line-if-stmt=no
|
||||
|
||||
|
||||
[LOGGING]
|
||||
|
||||
# Format style used to check logging format string. `old` means using %
|
||||
# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
|
||||
logging-format-style=old
|
||||
|
||||
# Logging modules to check that the string format arguments are in logging
|
||||
# function parameter format.
|
||||
logging-modules=logging
|
||||
|
||||
|
||||
[MISCELLANEOUS]
|
||||
|
||||
# List of note tags to take in consideration, separated by a comma.
|
||||
notes=FIXME,
|
||||
XXX,
|
||||
TODO
|
||||
|
||||
|
||||
[SIMILARITIES]
|
||||
|
||||
# Ignore comments when computing similarities.
|
||||
ignore-comments=yes
|
||||
|
||||
# Ignore docstrings when computing similarities.
|
||||
ignore-docstrings=yes
|
||||
|
||||
# Ignore imports when computing similarities.
|
||||
ignore-imports=no
|
||||
|
||||
# Minimum lines number of a similarity.
|
||||
min-similarity-lines=4
|
||||
|
||||
|
||||
[SPELLING]
|
||||
|
||||
# Limits count of emitted suggestions for spelling mistakes.
|
||||
max-spelling-suggestions=4
|
||||
|
||||
# Spelling dictionary name. Available dictionaries: none. To make it work,
|
||||
# install the python-enchant package.
|
||||
spelling-dict=
|
||||
|
||||
# List of comma separated words that should not be checked.
|
||||
spelling-ignore-words=
|
||||
|
||||
# A path to a file that contains the private dictionary; one word per line.
|
||||
spelling-private-dict-file=
|
||||
|
||||
# Tells whether to store unknown words to the private dictionary (see the
|
||||
# --spelling-private-dict-file option) instead of raising a message.
|
||||
spelling-store-unknown-words=no
|
||||
|
||||
|
||||
[STRING]
|
||||
|
||||
# This flag controls whether the implicit-str-concat-in-sequence should
|
||||
# generate a warning on implicit string concatenation in sequences defined over
|
||||
# several lines.
|
||||
check-str-concat-over-line-jumps=no
|
||||
|
||||
|
||||
[TYPECHECK]
|
||||
|
||||
# List of decorators that produce context managers, such as
|
||||
# contextlib.contextmanager. Add to this list to register other decorators that
|
||||
# produce valid context managers.
|
||||
contextmanager-decorators=contextlib.contextmanager
|
||||
|
||||
# List of members which are set dynamically and missed by pylint inference
|
||||
# system, and so shouldn't trigger E1101 when accessed. Python regular
|
||||
# expressions are accepted.
|
||||
generated-members=
|
||||
|
||||
# Tells whether missing members accessed in mixin class should be ignored. A
|
||||
# mixin class is detected if its name ends with "mixin" (case insensitive).
|
||||
ignore-mixin-members=yes
|
||||
|
||||
# Tells whether to warn about missing members when the owner of the attribute
|
||||
# is inferred to be None.
|
||||
ignore-none=yes
|
||||
|
||||
# This flag controls whether pylint should warn about no-member and similar
|
||||
# checks whenever an opaque object is returned when inferring. The inference
|
||||
# can return multiple potential results while evaluating a Python object, but
|
||||
# some branches might not be evaluated, which results in partial inference. In
|
||||
# that case, it might be useful to still emit no-member and other checks for
|
||||
# the rest of the inferred objects.
|
||||
ignore-on-opaque-inference=yes
|
||||
|
||||
# List of class names for which member attributes should not be checked (useful
|
||||
# for classes with dynamically set attributes). This supports the use of
|
||||
# qualified names.
|
||||
ignored-classes=optparse.Values,thread._local,_thread._local
|
||||
|
||||
# List of module names for which member attributes should not be checked
|
||||
# (useful for modules/projects where namespaces are manipulated during runtime
|
||||
# and thus existing member attributes cannot be deduced by static analysis). It
|
||||
# supports qualified module names, as well as Unix pattern matching.
|
||||
ignored-modules=
|
||||
|
||||
# Show a hint with possible names when a member name was not found. The aspect
|
||||
# of finding the hint is based on edit distance.
|
||||
missing-member-hint=yes
|
||||
|
||||
# The minimum edit distance a name should have in order to be considered a
|
||||
# similar match for a missing member name.
|
||||
missing-member-hint-distance=1
|
||||
|
||||
# The total number of similar names that should be taken in consideration when
|
||||
# showing a hint for a missing member.
|
||||
missing-member-max-choices=1
|
||||
|
||||
# List of decorators that change the signature of a decorated function.
|
||||
signature-mutators=
|
||||
|
||||
|
||||
[VARIABLES]
|
||||
|
||||
# List of additional names supposed to be defined in builtins. Remember that
|
||||
# you should avoid defining new builtins when possible.
|
||||
additional-builtins=
|
||||
|
||||
# Tells whether unused global variables should be treated as a violation.
|
||||
allow-global-unused-variables=yes
|
||||
|
||||
# List of strings which can identify a callback function by name. A callback
|
||||
# name must start or end with one of those strings.
|
||||
callbacks=cb_,
|
||||
_cb
|
||||
|
||||
# A regular expression matching the name of dummy variables (i.e. expected to
|
||||
# not be used).
|
||||
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
|
||||
|
||||
# Argument names that match this expression will be ignored. Default to name
|
||||
# with leading underscore.
|
||||
ignored-argument-names=_.*|^ignored_|^unused_
|
||||
|
||||
# Tells whether we should check for unused import in __init__ files.
|
||||
init-import=no
|
||||
|
||||
# List of qualified module names which can have objects that can redefine
|
||||
# builtins.
|
||||
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
|
||||
|
||||
|
||||
[CLASSES]
|
||||
|
||||
# List of method names used to declare (i.e. assign) instance attributes.
|
||||
defining-attr-methods=__init__,
|
||||
__new__,
|
||||
setUp,
|
||||
__post_init__
|
||||
|
||||
# List of member names, which should be excluded from the protected access
|
||||
# warning.
|
||||
exclude-protected=_asdict,
|
||||
_fields,
|
||||
_replace,
|
||||
_source,
|
||||
_make
|
||||
|
||||
# List of valid names for the first argument in a class method.
|
||||
valid-classmethod-first-arg=cls
|
||||
|
||||
# List of valid names for the first argument in a metaclass class method.
|
||||
valid-metaclass-classmethod-first-arg=cls
|
||||
|
||||
|
||||
[DESIGN]
|
||||
|
||||
# Maximum number of arguments for function / method.
|
||||
max-args=5
|
||||
|
||||
# Maximum number of attributes for a class (see R0902).
|
||||
max-attributes=7
|
||||
|
||||
# Maximum number of boolean expressions in an if statement (see R0916).
|
||||
max-bool-expr=5
|
||||
|
||||
# Maximum number of branch for function / method body.
|
||||
max-branches=12
|
||||
|
||||
# Maximum number of locals for function / method body.
|
||||
max-locals=15
|
||||
|
||||
# Maximum number of parents for a class (see R0901).
|
||||
max-parents=7
|
||||
|
||||
# Maximum number of public methods for a class (see R0904).
|
||||
max-public-methods=20
|
||||
|
||||
# Maximum number of return / yield for function / method body.
|
||||
max-returns=6
|
||||
|
||||
# Maximum number of statements in function / method body.
|
||||
max-statements=50
|
||||
|
||||
# Minimum number of public methods for a class (see R0903).
|
||||
min-public-methods=2
|
||||
|
||||
|
||||
[IMPORTS]
|
||||
|
||||
# List of modules that can be imported at any level, not just the top level
|
||||
# one.
|
||||
allow-any-import-level=
|
||||
|
||||
# Allow wildcard imports from modules that define __all__.
|
||||
allow-wildcard-with-all=no
|
||||
|
||||
# Analyse import fallback blocks. This can be used to support both Python 2 and
|
||||
# 3 compatible code, which means that the block might have code that exists
|
||||
# only in one or another interpreter, leading to false positives when analysed.
|
||||
analyse-fallback-blocks=no
|
||||
|
||||
# Deprecated modules which should not be used, separated by a comma.
|
||||
deprecated-modules=optparse,tkinter.tix
|
||||
|
||||
# Create a graph of external dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
ext-import-graph=
|
||||
|
||||
# Create a graph of every (i.e. internal and external) dependencies in the
|
||||
# given file (report RP0402 must not be disabled).
|
||||
import-graph=
|
||||
|
||||
# Create a graph of internal dependencies in the given file (report RP0402 must
|
||||
# not be disabled).
|
||||
int-import-graph=
|
||||
|
||||
# Force import order to recognize a module as part of the standard
|
||||
# compatibility libraries.
|
||||
known-standard-library=
|
||||
|
||||
# Force import order to recognize a module as part of a third party library.
|
||||
known-third-party=enchant
|
||||
|
||||
# Couples of modules and preferred modules, separated by a comma.
|
||||
preferred-modules=
|
||||
|
||||
|
||||
[EXCEPTIONS]
|
||||
|
||||
# Exceptions that will emit a warning when being caught. Defaults to
|
||||
# "BaseException, Exception".
|
||||
overgeneral-exceptions=BaseException,
|
||||
Exception
|
|
@ -0,0 +1,9 @@
|
|||
# Microsoft Open Source Code of Conduct
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
|
||||
Resources:
|
||||
|
||||
- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
|
||||
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
|
||||
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
|
65
README.md
65
README.md
|
@ -6,9 +6,6 @@
|
|||
|
||||
In this repository there are a number of tutorials in Jupyter notebooks that have step-by-step instructions on (1) how to train a machine learning model using Python; (2) how to deploy a trained machine learning model throught Azure Machine Learning (AzureML). The tutorials cover how to deploy models on following deployment target:
|
||||
|
||||
- [Azure Kubernetes Service (AKS) Cluster](./{{cookiecutter.project_name}}/aks)
|
||||
- [Azure IoT Edge](./{{cookiecutter.project_name}}/iotedge)
|
||||
|
||||
## Overview
|
||||
This scenario shows how to deploy a Frequently Asked Questions (FAQ) matching model as a web service to provide predictions for user questions. For this scenario, “Input Data” in the [architecture diagram](https://docs.microsoft.com/en-us/azure/architecture/reference-architectures/ai/realtime-scoring-python) refers to text strings containing the user questions to match with a list of FAQs. The scenario is designed for the Scikit-Learn machine learning library for Python but can be generalized to any scenario that uses Python models to make real-time predictions.
|
||||
|
||||
|
@ -25,9 +22,10 @@ An example app that consumes the results is included with the scenario.
|
|||
|
||||
## Prerequisites
|
||||
1. Linux (Ubuntu).
|
||||
2. [Anaconda Python](https://www.anaconda.com/download)
|
||||
3. [Docker](https://docs.docker.com/v17.12/install/linux/docker-ee/ubuntu) installed.
|
||||
4. [Azure account](https://azure.microsoft.com).
|
||||
1. [Anaconda Python](https://www.anaconda.com/download)
|
||||
1. [Docker](https://docs.docker.com/v17.12/install/linux/docker-ee/ubuntu) installed.
|
||||
1. [Azure account](https://azure.microsoft.com).
|
||||
|
||||
|
||||
---
|
||||
**NOTE**
|
||||
|
@ -44,27 +42,46 @@ DSVM](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtu
|
|||
which addresses the first three prerequisites.
|
||||
|
||||
## Setup
|
||||
To set up your environment to run these notebooks, please follow these steps. They setup the notebooks to use Docker and Azure seamlessly.
|
||||
1. Create an _Ubuntu_ _Linux_ DSVM and perform the following steps.
|
||||
|
||||
2. Install [cookiecutter](https://cookiecutter.readthedocs.io/en/latest/installation.html), a tool creates projects from project templates.
|
||||
```bash
|
||||
pip install cookiecutter
|
||||
```
|
||||
To set up your environment to run these notebooks, please follow these steps. They setup the notebooks to use Azure seamlessly.
|
||||
|
||||
3. Use cookiecutter to clone this repository. Cookiecutter will prompt a series of questions where you will choose a specific framework, select your deployment settings, and obtain an Azure ML workspace.
|
||||
```bash
|
||||
cookiecutter https://github.com/Microsoft/MLAKSDeployAML.git
|
||||
1. Create a _Linux_ _Ubuntu_ VM.
|
||||
1. Log in to your VM. We recommend that you use a graphical client
|
||||
such as
|
||||
[X2Go](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro#x2go)
|
||||
to access your VM. The remaining steps are to be done on the VM.
|
||||
1. Open a terminal emulator.
|
||||
1. Clone, fork, or download the zip file for this repository:
|
||||
```
|
||||
git clone https://github.com/Microsoft/az-ml-realtime-score.git
|
||||
```
|
||||
1. Enter the local repository:
|
||||
```
|
||||
cd az-ml-realtime-score
|
||||
```
|
||||
1. Copy `sample_workspace_conf.yml` to a new file, `workspace_conf.yml`, and fill in each field. This will keep secrets out of the source code, and this file will be ignored by git.
|
||||
1. Create the Python az-ml-realtime-score virtual environment using the environment.yml:
|
||||
```
|
||||
conda env create -f environment.yml
|
||||
```
|
||||
1. Activate the virtual environment:
|
||||
```
|
||||
source activate az-ml-realtime-score
|
||||
```
|
||||
The remaining steps should be done in this virtual environment.
|
||||
1. Login to Azure:
|
||||
```
|
||||
az login
|
||||
```
|
||||
You can verify that you are logged in to your subscription by executing
|
||||
the command:
|
||||
```
|
||||
az account show -o table
|
||||
```
|
||||
1. Start the Jupyter notebook server:
|
||||
```
|
||||
jupyter notebook
|
||||
```
|
||||
You will be asked to choose or enter information such as *project name*, *subsciption id*, *resource group*, etc. in an interactive way. You can press *Enter* to accept the default value or enter a value of your choice. For example, if you want to learn how to deploy machine learing model on AKS Cluster, you should choose the value "aks" for variable *deployment_type*. Instead, if you want to learn about deploying machine learning model on IoT Edge, you should select "iotedge" for the variable *deployment_type*.
|
||||
|
||||
Provide a valid value for "subscription_id", otherwise a `subscription id is missing` error will be generated **after** all the questions are asked. You will have to perform Step 3 all over again. The full list of questions can be found in [cookiecutter.json](./cookiecutter.json) file.
|
||||
|
||||
Please make sure all entered information are correct, as these information are used to customize the content of your repo.
|
||||
|
||||
4. On your local machine, you should now have a repo with the *project_name* you specified. Find the README.md file in this repo and proceed with instructions specified in it.
|
||||
|
||||
|
||||
|
||||
# Contributing
|
||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.3 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
|
@ -0,0 +1,21 @@
|
|||
name: az-ml-realtime-score
|
||||
channels:
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.6.2
|
||||
- pip
|
||||
- jupyter
|
||||
- pytest
|
||||
- pytest-cov
|
||||
- pylint
|
||||
- pandas
|
||||
- pip:
|
||||
- papermill
|
||||
- azureml-core==1.0.85.2
|
||||
- pylint-junit
|
||||
- pytest-nunit
|
||||
- nbconvert
|
||||
- junit-xml
|
||||
- nbformat
|
||||
- Microsoft-AI-Azure-Utility-Samples
|
||||
- python-dotenv
|
|
@ -0,0 +1,199 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ai-architecture-template - 00_AMLConfiguration.ipynb\n",
|
||||
"TODO: Update with new repo name\n",
|
||||
"\n",
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License.\n",
|
||||
"\n",
|
||||
"# Installation and configuration\n",
|
||||
"This notebook configures the notebooks in this tutorial to connect to an Azure Machine Learning (AML) Workspace. \n",
|
||||
"You can use an existing workspace or create a new one.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"If you have already completed the prerequisites and selected the correct Kernel for this notebook, the AML Python SDK \n",
|
||||
"is already installed. Let's load the imports and check the AML SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azure_utils.machine_learning.utils import load_configuration, get_or_create_workspace\n",
|
||||
"\n",
|
||||
"print(\"AML SDK Version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up your Azure Machine Learning workspace\n",
|
||||
"## Load Configurations from file\n",
|
||||
"\n",
|
||||
"Configurations are loaded from a file, to prevent accident commits of Azure secerts into source control.\n",
|
||||
"This file name is included in the .gitignore to also prevent accident commits. A template file is included that should\n",
|
||||
"be copied, and each parameter filled in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cfg = load_configuration(\"../workspace_conf.yml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Load Configurations into Notebook.\n",
|
||||
"\n",
|
||||
"The following cell loads the configurations from the local file, into the notebook memory. The following cell is also\n",
|
||||
"marked as a parameter cell. When using this notebook with [papermill](https://github.com/nteract/papermill), these\n",
|
||||
"parameters can be override. See the tests for examples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
},
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"subscription_id = cfg['subscription_id']\n",
|
||||
"resource_group = cfg['resource_group']\n",
|
||||
"workspace_name = cfg['workspace_name']\n",
|
||||
"workspace_region = cfg['workspace_region']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the workspace\n",
|
||||
"This cell will create an AML workspace for you in a subscription, provided you have the correct permissions.\n",
|
||||
"\n",
|
||||
"This will fail when:\n",
|
||||
"1. You do not have permission to create a workspace in the resource group\n",
|
||||
"1. You do not have permission to create a resource group if it's non-existing.\n",
|
||||
"1. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this \n",
|
||||
"subscription\n",
|
||||
"\n",
|
||||
"If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or to \n",
|
||||
"provision the required resources. If this cell succeeds, you're done configuring AML!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_or_create_workspace(workspace_name, subscription_id, resource_group, workspace_region)\n",
|
||||
"ws_json = ws.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Let's check the details of the workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(json.dumps(ws_json, indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"You are now ready to move on to the [AutoML Local](01_DataPrep.ipynb) notebook."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"celltoolbar": "Tags",
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,733 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data Preparation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this notebook, we use a subset of [Stack Exchange network](https://archive.org/details/stackexchange) question data \n",
|
||||
"which includes original questions tagged as 'JavaScript', their duplicate questions and their answers. Here, we \n",
|
||||
"provide the steps to prepare the data to use in model development for training a model that will match a new \n",
|
||||
"question with an existing original question. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from azure_utils.utilities import read_csv_gz, clean_text, round_sample_strat, random_merge\n",
|
||||
"from notebooks import DIRECTORY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below, we define some parameters that will be used in the data cleaning as well as train and test set preparation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The size of the test set\n",
|
||||
"test_size = 0.21\n",
|
||||
"# The minimum length of clean text\n",
|
||||
"min_text = 150\n",
|
||||
"# The minimum number of duplicates per question\n",
|
||||
"min_dupes = 12\n",
|
||||
"# The maximum number of duplicate matches\n",
|
||||
"match = 20\n",
|
||||
"# The output files path\n",
|
||||
"outputs_path = DIRECTORY + \"/data_folder\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data cleaning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we download the questions, duplicate questions and answers and load the datasets into pandas dataframes using \n",
|
||||
"the helper functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# URLs to original questions, duplicate questions, and answers.\n",
|
||||
"data_url = \"https://bostondata.blob.core.windows.net/stackoverflow/{}\"\n",
|
||||
"questions_url = data_url.format(\"orig-q.tsv.gz\")\n",
|
||||
"dupes_url = data_url.format(\"dup-q.tsv.gz\")\n",
|
||||
"answers_url = data_url.format(\"ans.tsv.gz\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load datasets.\n",
|
||||
"questions = read_csv_gz(questions_url, names=('Id', 'AnswerId', 'Text0', 'CreationDate'))\n",
|
||||
"dupes = read_csv_gz(dupes_url, names=('Id', 'AnswerId', 'Text0', 'CreationDate'))\n",
|
||||
"answers = read_csv_gz(answers_url, names=('Id', 'Text0'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now check the dataframes. Notice that questions and duplicates have \"AnswerID\" column that would help match \n",
|
||||
"ith the index of answers dataframe."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dupes.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"answers.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's check the first original question's text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(questions.iloc[0, 1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now check the duplicates for that question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(dupes[dupes.AnswerId == questions.iloc[0, 0]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below is the answer to the original question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(answers.at[questions.iloc[0, 0], 'Text0'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we use the helper functions to clean questions, duplicates and answers from unwanted text such as code, html \n",
|
||||
"tags and links. Notice that we add a new column 'Text' to each dataframe for clean text in lowercase."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean up all text, and keep only data with some clean text.\n",
|
||||
"for df in (questions, dupes, answers):\n",
|
||||
" df[\"Text\"] = df.Text0.apply(clean_text).str.lower()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = questions[questions.Text.str.len() > 0]\n",
|
||||
"answers = answers[answers.Text.str.len() > 0]\n",
|
||||
"dupes = dupes[dupes.Text.str.len() > 0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's compare the first original question and cleaned version as an example."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Original question.\n",
|
||||
"print(questions.iloc[0, 1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# After cleaning.\n",
|
||||
"print(questions.iloc[0, 3])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It turns out that some duplicate questions were also in original questions. Also, some original questions and some \n",
|
||||
"duplicate questions were duplicated in the datasets. In the following, we remove them from the dataframes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# First, remove dupes that are questions, then remove duplicated questions and dupes.\n",
|
||||
"dupes = dupes[~dupes.index.isin(questions.index)]\n",
|
||||
"questions = questions[~questions.index.duplicated(keep='first')]\n",
|
||||
"dupes = dupes[~dupes.index.duplicated(keep='first')]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We also make sure we keep questions with answers and duplicates."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Keep only questions with answers and dupes, answers to questions, and dupes of questions.\n",
|
||||
"questions = questions[\n",
|
||||
" questions.AnswerId.isin(answers.index) & questions.AnswerId.isin(dupes.AnswerId)\n",
|
||||
"]\n",
|
||||
"answers = answers[answers.index.isin(questions.AnswerId)]\n",
|
||||
"dupes = dupes[dupes.AnswerId.isin(questions.AnswerId)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Verify data integrity.\n",
|
||||
"assert questions.AnswerId.isin(answers.index).all()\n",
|
||||
"assert answers.index.isin(questions.AnswerId).all()\n",
|
||||
"assert questions.AnswerId.isin(dupes.AnswerId).all()\n",
|
||||
"assert dupes.AnswerId.isin(questions.AnswerId).all()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below are some statistics on the data. Notice that some questions have very low number of duplicates while others may \n",
|
||||
"have a large number. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Report on the data.\n",
|
||||
"print(\"Text statistics:\")\n",
|
||||
"print(\n",
|
||||
" pd.DataFrame(\n",
|
||||
" [\n",
|
||||
" questions.Text.str.len().describe().rename(\"questions\"),\n",
|
||||
" answers.Text.str.len().describe().rename(\"answers\"),\n",
|
||||
" dupes.Text.str.len().describe().rename(\"dupes\"),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"print(\"\\nDuplication statistics:\")\n",
|
||||
"print(pd.DataFrame([dupes.AnswerId.value_counts().describe().rename(\"duplications\")]))\n",
|
||||
"print(\n",
|
||||
" \"\\nLargest class: {:.2%}\".format(\n",
|
||||
" dupes.AnswerId.value_counts().max() / dupes.shape[0]\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, we reset all indexes to use them as columns in the rest of the steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reset each dataframe's index.\n",
|
||||
"questions.reset_index(inplace=True)\n",
|
||||
"answers.reset_index(inplace=True)\n",
|
||||
"dupes.reset_index(inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We filter the questions and duplicates to have at least min_text number of characters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Apply the minimum text length to questions and dupes.\n",
|
||||
"questions = questions[questions.Text.str.len() >= min_text]\n",
|
||||
"dupes = dupes[dupes.Text.str.len() >= min_text]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Keep only questions with dupes, and dupes of questions.\n",
|
||||
"label_column = \"AnswerId\"\n",
|
||||
"questions = questions[questions[label_column].isin(dupes[label_column])]\n",
|
||||
"dupes = dupes[dupes[label_column].isin(questions[label_column])]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here, we remove questions and their duplicates that are less than min_dupes parameter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Restrict the questions to those with a minimum number of dupes.\n",
|
||||
"answerid_count = dupes.groupby(label_column)[label_column].count()\n",
|
||||
"answerid_min = answerid_count.index[answerid_count >= min_dupes]\n",
|
||||
"questions = questions[questions[label_column].isin(answerid_min)]\n",
|
||||
"dupes = dupes[dupes[label_column].isin(answerid_min)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
" # Verify data integrity.\n",
|
||||
"assert questions[label_column].isin(dupes[label_column]).all()\n",
|
||||
"assert dupes[label_column].isin(questions[label_column]).all()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here are some statistics on the resulting dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Report on the data.\n",
|
||||
"print(\"Restrictions: min_text={}, min_dupes={}\".format(min_text, min_dupes))\n",
|
||||
"print(\"Restricted text statistics:\")\n",
|
||||
"print(\n",
|
||||
" pd.DataFrame(\n",
|
||||
" [\n",
|
||||
" questions.Text.str.len().describe().rename(\"questions\"),\n",
|
||||
" dupes.Text.str.len().describe().rename(\"dupes\"),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"print(\"\\nRestricted duplication statistics:\")\n",
|
||||
"print(\n",
|
||||
" pd.DataFrame([dupes[label_column].value_counts().describe().rename(\"duplications\")])\n",
|
||||
")\n",
|
||||
"print(\n",
|
||||
" \"\\nRestricted largest class: {:.2%}\".format(\n",
|
||||
" dupes[label_column].value_counts().max() / dupes.shape[0]\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare train and test sets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this part, we prepare train and test sets. For training a binary classification model, we will need to construct \n",
|
||||
"match and non-match pairs from duplicates and their questions. Finding matching pairs can be accomplished by joining \n",
|
||||
"each duplicate with its question. However, non-match examples need to be constructed randomly. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As a first step, to make sure we train and test the performance of the model on each question, we will need to have \n",
|
||||
"examples of match and non-match pairs for each question both in train and test sets. In order to achieve that, \n",
|
||||
"we split the duplicates in a stratified manner into train and test sets making sure at least 1 or more duplicates per \n",
|
||||
"question is in the test set depending on test_size parameter and number of duplicates per each question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split dupes into train and test ensuring at least one of each label class is in test.\n",
|
||||
"dupes_test = round_sample_strat(dupes, dupes[label_column], frac=test_size)\n",
|
||||
"dupes_train = dupes[~dupes.Id.isin(dupes_test.Id)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert dupes_test[label_column].unique().shape[0] == dupes[label_column].unique().shape[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The relevant columns for text pairs data.\n",
|
||||
"balanced_pairs_columns = ['Id_x', 'AnswerId_x', 'Text_x', 'Id_y', 'Text_y', 'AnswerId_y', 'Label', 'n']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we pair each training duplicate in train set with its matching question and N-1 random questions using the \n",
|
||||
"helper function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use AnswerId to pair each training dupe with its matching question and also with N-1 questions not its match.\n",
|
||||
"balanced_pairs_train = random_merge(dupes_train, questions, N=match)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Labeling is done such that matching pairs are labeled as 1 and non-match pairs are labeled as 0."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Label records by matching AnswerIds.\n",
|
||||
"balanced_pairs_train[\"Label\"] = (\n",
|
||||
" balanced_pairs_train.AnswerId_x == balanced_pairs_train.AnswerId_y\n",
|
||||
").astype(int)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Keep only the relevant data.\n",
|
||||
"balanced_pairs_train = balanced_pairs_train[balanced_pairs_columns]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"balanced_pairs_train.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sort the data by dupe ID and Label.\n",
|
||||
"balanced_pairs_train.sort_values(by=['Id_x', 'Label'], ascending=[True, False], inplace=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In testing set, we match each duplicate with all the original questions and label them same way as training set."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use AnswerId to pair each testing dupe with all questions.\n",
|
||||
"balanced_pairs_test = random_merge(dupes_test, questions, N=questions.shape[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Label records by matching AnswerIds.\n",
|
||||
"balanced_pairs_test[\"Label\"] = (\n",
|
||||
" balanced_pairs_test.AnswerId_x == balanced_pairs_test.AnswerId_y\n",
|
||||
").astype(int)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Keep only the relevant data.\n",
|
||||
"balanced_pairs_test = balanced_pairs_test[balanced_pairs_columns]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"balanced_pairs_test.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sort the data by dupe ID and Label.\n",
|
||||
"balanced_pairs_test.sort_values(\n",
|
||||
" by=[\"Id_x\", \"Label\"], ascending=[True, False], inplace=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, we report the final train and test sets and save as text files to be used by modeling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Report on the datasets.\n",
|
||||
"print(\n",
|
||||
" \"balanced_pairs_train: {:,} rows with {:.2%} matches\".format(\n",
|
||||
" balanced_pairs_train.shape[0], balanced_pairs_train.Label.mean()\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"print(\n",
|
||||
" \"balanced_pairs_test: {:,} rows with {:.2%} matches\".format(\n",
|
||||
" balanced_pairs_test.shape[0], balanced_pairs_test.Label.mean()\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs(outputs_path, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Save the data.\n",
|
||||
"balanced_pairs_train_path = os.path.join(outputs_path, \"balanced_pairs_train.tsv\")\n",
|
||||
"print(\n",
|
||||
" \"Writing {:,} to {}\".format(\n",
|
||||
" balanced_pairs_train.shape[0], balanced_pairs_train_path\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"balanced_pairs_train.to_csv(\n",
|
||||
" balanced_pairs_train_path, sep=\"\\t\", header=True, index=False\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"balanced_pairs_test_path = os.path.join(outputs_path, \"balanced_pairs_test.tsv\")\n",
|
||||
"print(\n",
|
||||
" \"Writing {:,} to {}\".format(balanced_pairs_test.shape[0], balanced_pairs_test_path)\n",
|
||||
")\n",
|
||||
"balanced_pairs_test.to_csv(balanced_pairs_test_path, sep=\"\\t\", header=True, index=False)\n",
|
||||
"\n",
|
||||
"# Save original questions to be used for scoring later.\n",
|
||||
"questions_path = os.path.join(outputs_path, \"questions.tsv\")\n",
|
||||
"print(\"Writing {:,} to {}\".format(questions.shape[0], questions_path))\n",
|
||||
"questions.to_csv(questions_path, sep=\"\\t\", header=True, index=False)\n",
|
||||
"\n",
|
||||
"# Save the test duplicate questions to be used with the scoring function.\n",
|
||||
"dupes_test_path = os.path.join(outputs_path, \"dupes_test.tsv\")\n",
|
||||
"print(\"Writing {:,} to {}\".format(dupes_test.shape[0], dupes_test_path))\n",
|
||||
"dupes_test.to_csv(dupes_test_path, sep=\"\\t\", header=True, index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now move on to [train on local](02_TrainOnLocal.ipynb) notebook to train our model using Azure Machine \n",
|
||||
"Learning."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,664 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train Locally\n",
|
||||
"In this notebook, you will perform the following using Azure Machine Learning.\n",
|
||||
"* Load workspace.\n",
|
||||
"* Configure & execute a local run in a user-managed Python environment.\n",
|
||||
"* Configure & execute a local run in a system-managed Python environment.\n",
|
||||
"* Configure & execute a local run in a Docker environment.\n",
|
||||
"* Register model for operationalization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from azure_utils.machine_learning.utils import get_workspace_from_config\n",
|
||||
"from azureml.core import Experiment\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from notebooks import DIRECTORY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Model Hyperparameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"source": [
|
||||
"This notebook uses a training script that uses \n",
|
||||
"[lightgbm](https://lightgbm.readthedocs.io/en/latest/Python-API.html#scikit-learn-api). \n",
|
||||
"Here we set the number of estimators. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_estimators = \"10\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create An Experiment\n",
|
||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics \n",
|
||||
"and output artifacts from your experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = \"mlaks-train-on-local\"\n",
|
||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure & Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this section, we show three different ways of locally training your model through Azure ML SDK for demonstration \n",
|
||||
"purposes. Only one of these runs is sufficient to register the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"### User-managed environment\n",
|
||||
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages that are \n",
|
||||
"available in the Python environment you choose to run the script. We will use the environment created for this \n",
|
||||
"tutorial which has Azure ML SDK and other dependencies installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Editing a run configuration property on-fly.\n",
|
||||
"run_config_user_managed = RunConfiguration()\n",
|
||||
"\n",
|
||||
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
|
||||
"\n",
|
||||
"# Choose the specific Python environment of this tutorial by pointing to the Python path\n",
|
||||
"run_config_user_managed.environment.python.interpreter_path = (\n",
|
||||
" \"/anaconda/envs/az-ml-realtime-score/bin/python\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Submit script to run in the user-managed environment\n",
|
||||
"Note that the whole `scripts` folder is submitted for execution, including the `item_selector.py` and `label_rank.py` \n",
|
||||
"files. The model will be written to `outputs` directory which is a special directory such that all content in this \n",
|
||||
"directory is automatically uploaded to your workspace. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir(\"script\"):\n",
|
||||
" os.mkdir(\"script\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile script/create_model.py\n",
|
||||
"from azure_utils.machine_learning import create_model\n",
|
||||
"\n",
|
||||
"if __name__ == '__main__':\n",
|
||||
" create_model.main()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scrpt = \"create_model.py\"\n",
|
||||
"args = [\n",
|
||||
" \"--inputs\",\n",
|
||||
" os.path.abspath(DIRECTORY + \"/data_folder\"),\n",
|
||||
" \"--outputs\",\n",
|
||||
" \"outputs\",\n",
|
||||
" \"--estimators\",\n",
|
||||
" num_estimators,\n",
|
||||
" \"--match\",\n",
|
||||
" \"5\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"src = ScriptRunConfig(\n",
|
||||
" source_directory=\"./script\",\n",
|
||||
" script=scrpt,\n",
|
||||
" arguments=args,\n",
|
||||
" run_config=run_config_user_managed,\n",
|
||||
")\n",
|
||||
"#run = exp.submit(src)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get run history details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Block to wait till run finishes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's check that the model is now available in your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# run.get_file_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the accuracy of the model from run logs by querying the run metrics."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### System-managed environment\n",
|
||||
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built \n",
|
||||
"once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config_system_managed = RunConfiguration()\n",
|
||||
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
||||
"run_config_system_managed.auto_prepare_environment = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's specify the conda and pip dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Specify conda dependencies with scikit-learn and pandas\n",
|
||||
"conda_pack = [\"scikit-learn==0.19.1\", \"pandas==0.23.3\"]\n",
|
||||
"requirements = [\"lightgbm==2.1.2\", \"azureml-defaults==1.0.57\", \"Microsoft-AI-Azure-Utility-Samples\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cd = CondaDependencies.create(conda_packages=conda_pack,\n",
|
||||
" pip_packages=requirements)\n",
|
||||
"run_config_system_managed.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"#### Submit script to run in the system-managed environment\n",
|
||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, \n",
|
||||
"this might take up to 5 minutes. But this conda environment is reused so long as you don't change the conda \n",
|
||||
"dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"src = ScriptRunConfig(\n",
|
||||
" source_directory=\"./script\",\n",
|
||||
" script=scrpt,\n",
|
||||
" arguments=args,\n",
|
||||
" run_config=run_config_system_managed,\n",
|
||||
")\n",
|
||||
"run = exp.submit(src)\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Block and wait till run finishes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_file_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"### Docker-based execution\n",
|
||||
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is \n",
|
||||
"already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
|
||||
"\n",
|
||||
"You can also ask the system to pull down a Docker image and execute your scripts in it. We will use the \n",
|
||||
"`continuumio/miniconda3` image for that purpose."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config_docker = RunConfiguration()\n",
|
||||
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
||||
"run_config_docker.auto_prepare_environment = True\n",
|
||||
"run_config_docker.environment.docker.enabled = True\n",
|
||||
"run_config_docker.environment.docker.base_image = \"continuumio/miniconda3\"\n",
|
||||
"\n",
|
||||
"# Specify conda and pip dependencies\n",
|
||||
"cd = CondaDependencies.create(conda_packages=conda_pack,\n",
|
||||
" pip_packages=requirements)\n",
|
||||
"run_config_docker.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here, we map the local `data_folder` that includes the training and testing data to the docker container using `-v` \n",
|
||||
"flag."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"host_dir = os.path.abspath(DIRECTORY + \"/data_folder\")\n",
|
||||
"container_dir = \"/data_folder\"\n",
|
||||
"docker_arg = \"{}:{}\".format(host_dir, container_dir)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This time the run will use the mapped `data_folder` inside the docker container to find the data files."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"args = [\n",
|
||||
" \"--inputs\",\n",
|
||||
" \"/data_folder\",\n",
|
||||
" \"--outputs\",\n",
|
||||
" \"outputs\",\n",
|
||||
" \"--estimators\",\n",
|
||||
" num_estimators,\n",
|
||||
" \"--match\",\n",
|
||||
" \"5\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config_docker.environment.docker.arguments.append(\"-v\")\n",
|
||||
"run_config_docker.environment.docker.arguments.append(docker_arg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"src = ScriptRunConfig(\n",
|
||||
" source_directory=\"./script\",\n",
|
||||
" script=scrpt,\n",
|
||||
" arguments=args,\n",
|
||||
" run_config=run_config_docker,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(src)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now register the model with the workspace so that we can later deploy the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# supply a model name, and the full path to the serialized model file.\n",
|
||||
"model = run.register_model(model_name=\"question_match_model\",\n",
|
||||
" model_path=\"./outputs/model.pkl\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(model.name, model.version, model.url, sep=\"\\n\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "roastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,173 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License.\n",
|
||||
"\n",
|
||||
"# Develop Scoring Script\n",
|
||||
"\n",
|
||||
"In this notebook, we will develop the scoring script and test it locally. We will use the scoring script to create the \n",
|
||||
"web service that will call the model for scoring."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from azure_utils.machine_learning.utils import get_workspace_from_config\n",
|
||||
"from azure_utils.utilities import text_to_json\n",
|
||||
"from azureml.core.model import Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sys.path.append('./scripts/')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's load the workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the model registered earlier and download it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = 'question_match_model'\n",
|
||||
"\n",
|
||||
"model = Model(ws, name=model_name)\n",
|
||||
"print(model.name, model.version, model.url, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model.download(target_dir=\".\", exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Scoring Script"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We use the writefile magic to write the contents of the below cell to `score.py` which includes the `init` and `run` \n",
|
||||
"functions required by AML.\n",
|
||||
"- The init() function typically loads the model into a global object.\n",
|
||||
"- The run(input_data) function uses the model to predict a value based on the input_data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"import timeit as t\n",
|
||||
"from azure_utils.machine_learning.duplicate_model import DuplicateModel\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" logger = logging.getLogger(\"scoring_script\")\n",
|
||||
" global model\n",
|
||||
" model_path = \"model.pkl\"\n",
|
||||
" questions_path = \"./data_folder/questions.tsv\"\n",
|
||||
" start = t.default_timer()\n",
|
||||
" model = DuplicateModel(model_path, questions_path)\n",
|
||||
" end = t.default_timer()\n",
|
||||
" loadTimeMsg = \"Model loading time: {0} ms\".format(\n",
|
||||
" round((end - start) * 1000, 2))\n",
|
||||
" logger.info(loadTimeMsg)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def run(body):\n",
|
||||
" logger = logging.getLogger(\"scoring_script\")\n",
|
||||
" json_load_text = json.loads(body)\n",
|
||||
" text_to_score = json_load_text[\"input\"]\n",
|
||||
" start = t.default_timer()\n",
|
||||
" resp = model.score(text_to_score)\n",
|
||||
" end = t.default_timer()\n",
|
||||
" logger.info(\"Prediction took {0} ms\".format(round((end - start) * 1000,\n",
|
||||
" 2)))\n",
|
||||
" return json.dumps(resp)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,398 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License.\n",
|
||||
"\n",
|
||||
"# Create Image\n",
|
||||
"In this notebook, we show the following steps for deploying a web service using AzureML:\n",
|
||||
"- Create an image\n",
|
||||
"- Test image locally"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azure_utils.machine_learning.utils import load_configuration, get_workspace_from_config\n",
|
||||
"from azure_utils.utilities import text_to_json\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from notebooks import DIRECTORY\n",
|
||||
"\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"AML will use the following information to create an image, provision a cluster and deploy a service. Replace the \n",
|
||||
"values in the following cell with your information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cfg = load_configuration(\"../workspace_conf.yml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
},
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_name = cfg['image_name']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Get workspace\n",
|
||||
"Load existing workspace from the config file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Load model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = 'question_match_model'\n",
|
||||
"\n",
|
||||
"model = Model(ws, name=model_name)\n",
|
||||
"print(model.name, model.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an image\n",
|
||||
"We will now modify the `score.py` created in the previous notebook for the `init()` function to use the model we \n",
|
||||
"registered to the workspace earlier."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"\n",
|
||||
"import sys\n",
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"import timeit as t\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.contrib.services.aml_request import rawhttp\n",
|
||||
"from sklearn.base import BaseEstimator, TransformerMixin\n",
|
||||
"from azure_utils.machine_learning.duplicate_model import DuplicateModel\n",
|
||||
"\n",
|
||||
"sys.path.append('./scripts/')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" logger = logging.getLogger(\"scoring_script\")\n",
|
||||
" global model\n",
|
||||
" model_name = 'question_match_model'\n",
|
||||
" model_path = Model.get_model_path(model_name)\n",
|
||||
" questions_path = './notebooks/data_folder/questions.tsv'\n",
|
||||
" start = t.default_timer()\n",
|
||||
" model = DuplicateModel(model_path, questions_path)\n",
|
||||
" end = t.default_timer()\n",
|
||||
" loadTimeMsg = \"Model loading time: {0} ms\".format(\n",
|
||||
" round((end - start) * 1000, 2))\n",
|
||||
" logger.info(loadTimeMsg)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@rawhttp\n",
|
||||
"def run(request):\n",
|
||||
" \"\"\"\n",
|
||||
" Function runs on each request\n",
|
||||
" \"\"\"\n",
|
||||
" body = request.data\n",
|
||||
" if request.method == 'POST':\n",
|
||||
" logger = logging.getLogger(\"scoring_script\")\n",
|
||||
" json_load_text = json.loads(body)\n",
|
||||
" text_to_score = json_load_text['input']\n",
|
||||
" start = t.default_timer()\n",
|
||||
" resp = model.score(text_to_score)\n",
|
||||
" end = t.default_timer()\n",
|
||||
" logger.info(\"Prediction took {0} ms\".format(\n",
|
||||
" round((end - start) * 1000, 2)))\n",
|
||||
" return (json.dumps(resp))\n",
|
||||
" if request.method == 'GET':\n",
|
||||
" resp_body = {\n",
|
||||
" \"azEnvironment\": \"Azure\",\n",
|
||||
" \"location\": \"westus2\",\n",
|
||||
" \"osType\": \"Ubuntu 16.04\",\n",
|
||||
" \"resourceGroupName\": \"\",\n",
|
||||
" \"resourceId\": \"\",\n",
|
||||
" \"sku\": \"\",\n",
|
||||
" \"subscriptionId\": \"\",\n",
|
||||
" \"uniqueId\": \"PythonMLRST\",\n",
|
||||
" \"vmSize\": \"\",\n",
|
||||
" \"zone\": \"\",\n",
|
||||
" \"isServer\": False,\n",
|
||||
" \"version\": \"\"\n",
|
||||
" }\n",
|
||||
" return (resp_body)\n",
|
||||
" return AMLResponse(\"bad request\", 500)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's specifiy the conda and pip dependencies for the image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conda_pack = [\"scikit-learn==0.19.1\", \"pandas==0.23.3\"]\n",
|
||||
"requirements = [\n",
|
||||
" \"lightgbm==2.1.2\", \"azureml-defaults==1.0.57\", \"azureml-contrib-services\", \n",
|
||||
" \"Microsoft-AI-Azure-Utility-Samples\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lgbmenv = CondaDependencies.create(conda_packages=conda_pack,\n",
|
||||
" pip_packages=requirements)\n",
|
||||
"\n",
|
||||
"with open(\"lgbmenv.yml\", \"w\") as f:\n",
|
||||
" f.write(lgbmenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"image_config = ContainerImage.image_configuration(\n",
|
||||
" execution_script=\"score.py\",\n",
|
||||
" runtime=\"python\",\n",
|
||||
" conda_file=\"lgbmenv.yml\",\n",
|
||||
" description=\"Image with lightgbm model\",\n",
|
||||
" tags={\n",
|
||||
" \"area\": \"text\",\n",
|
||||
" \"type\": \"lightgbm\"\n",
|
||||
" },\n",
|
||||
" dependencies=[\n",
|
||||
" \"./notebooks/data_folder/questions.tsv\"\n",
|
||||
" ],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(\n",
|
||||
" name=image_name,\n",
|
||||
" # this is the model object\n",
|
||||
" models=[model],\n",
|
||||
" image_config=image_config,\n",
|
||||
" workspace=ws,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"image.wait_for_creation(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.name, image.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_version = str(image.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can find the logs of image creation in the following location."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image.image_build_log_uri"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test image locally"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's use one of the duplicate questions to test our image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dupes_test_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
|
||||
"dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
|
||||
"text_to_score = dupes_test.iloc[0, 4]\n",
|
||||
"text_to_score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_text = text_to_json(text_to_score)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"image.run(input_data=json_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conclusion\n",
|
||||
"\n",
|
||||
"We have created a docker Image using AzureML and registred this image on Azure Container Registry (ACR). This docker \n",
|
||||
"image encapsulates a trained machine learning model and scoring scripts. In the next step, we can take this image \n",
|
||||
"and deploy it on the compute target of your choice: Azure Kubernetes Service (AKS) Cluster or Azure IoT Edge."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,646 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deploying a web service to Azure Kubernetes Service (AKS)\n",
|
||||
"In this notebook, we show the following steps for deploying a web service using AzureML:\n",
|
||||
"- Provision an AKS cluster (one time action)\n",
|
||||
"- Deploy the service\n",
|
||||
"- Test the web service\n",
|
||||
"- Scale up the service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from azure_utils.machine_learning.utils import get_workspace_from_config\n",
|
||||
"from azure_utils.machine_learning.utils import load_configuration\n",
|
||||
"from azure_utils.utilities import text_to_json\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"\n",
|
||||
"from notebooks import DIRECTORY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"AML will use the following information to create an image, provision a cluster and deploy a service. Replace the \n",
|
||||
"values in the following cell with your information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cfg = load_configuration(\"../workspace_conf.yml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_name = cfg['image_name']\n",
|
||||
"aks_service_name = cfg['aks_service_name']\n",
|
||||
"aks_name = cfg['aks_name']\n",
|
||||
"aks_location = cfg['workspace_region']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get workspace\n",
|
||||
"Load existing workspace from the config file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image = ws.images[image_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Restore the statistics data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Provision the AKS Cluster\n",
|
||||
"This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete \n",
|
||||
"the cluster or the resource group that contains it, then you would have to recreate it. Let's first check if there are \n",
|
||||
"enough cores in the subscription for the cluster ."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vm_family = \"Dv2\"\n",
|
||||
"vm_size = \"Standard_D4_v2\"\n",
|
||||
"vm_cores = 8\n",
|
||||
"node_count = 4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vm_dict = {vm_family: {\"size\": vm_size, \"cores\": vm_cores}}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prov_config = AksCompute.provisioning_configuration(agent_count=node_count,\n",
|
||||
" vm_size=vm_size,\n",
|
||||
" location=aks_location)\n",
|
||||
"\n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace=ws,\n",
|
||||
" name=aks_name,\n",
|
||||
" provisioning_configuration=prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_target.wait_for_completion(show_output=True)\n",
|
||||
"print(aks_target.provisioning_state)\n",
|
||||
"print(aks_target.provisioning_errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's check that the cluster is created successfully."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_status = aks_target.get_status()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert aks_status == 'Succeeded', 'AKS failed to create'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy web service to AKS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"source": [
|
||||
"Next, we deploy the web service. We deploy two pods with 1 CPU core each."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_replicas = 2\n",
|
||||
"cpu_cores = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Set the web service configuration\n",
|
||||
"aks_config = AksWebservice.deploy_configuration(num_replicas=num_replicas,\n",
|
||||
" cpu_cores=cpu_cores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service = Webservice.deploy_from_image(\n",
|
||||
" workspace=ws,\n",
|
||||
" name=aks_service_name,\n",
|
||||
" image=image,\n",
|
||||
" deployment_config=aks_config,\n",
|
||||
" deployment_target=aks_target,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_service.wait_for_deployment(show_output=True)\n",
|
||||
"print(aks_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can check the logs of the web service with the below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service.get_logs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test the web service\n",
|
||||
"We now test the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_dupes_to_score = 4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dupes_test_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
|
||||
"dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
|
||||
"text_to_score = dupes_test.iloc[0, num_dupes_to_score]\n",
|
||||
"text_to_score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"json_text = text_to_json(text_to_score)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"prediction = aks_service.run(input_data=json_text)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's try a few more duplicate questions and display their top 3 original matches. Let's first get the scoring URL \n",
|
||||
"and API key for the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scoring_url = aks_service.scoring_uri\n",
|
||||
"api_key = aks_service.get_keys()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Write the URI and key to the statistics tracker."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"headers = {\n",
|
||||
" 'content-type': 'application/json',\n",
|
||||
" 'Authorization': ('Bearer ' + api_key)\n",
|
||||
"}\n",
|
||||
"r = requests.post(\n",
|
||||
" scoring_url, data=json_text,\n",
|
||||
" headers=headers) # Run the request twice since the first time takes a\n",
|
||||
"%time r = requests.post(scoring_url, data=json_text, headers=headers) # little longer due to the loading of the model\n",
|
||||
"print(r)\n",
|
||||
"r.json()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dupes_to_score = dupes_test.iloc[:5, num_dupes_to_score]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"results = [\n",
|
||||
" requests.post(scoring_url, data=text_to_json(text), headers=headers)\n",
|
||||
" for text in dupes_to_score\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's print top 3 matches for each duplicate question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"[eval(results[i].json())[0:3] for i in range(0, len(results))]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next let's quickly check what the request response performance is for the deployed model on AKS cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_data = list(map(text_to_json, dupes_to_score)) # Retrieve the text data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"timer_results = list()\n",
|
||||
"for text in text_data:\n",
|
||||
" res=%timeit -r 1 -o -q requests.post(scoring_url, data=text, headers=headers)\n",
|
||||
" timer_results.append(res.best)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"timer_results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Average time taken: {0:4.2f} ms\".format(10 ** 3 * np.mean(timer_results)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scaling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this part, we scale the number of pods to make sure we fully utilize the AKS cluster. To connect to the Kubernetes \n",
|
||||
"cluster, we will use kubectl, the Kubernetes command-line client. To install, run the following:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!sudo az aks install-cli"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we will get the credentials to connect to the cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs(os.path.join(os.path.expanduser('~'),'.kube'), exist_ok=True) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config_path = os.path.join(os.path.expanduser('~'),'.kube/config')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(config_path, 'a') as f:\n",
|
||||
" f.write(aks_target.get_credentials()['userKubeConfig'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's check the nodes and pods of the cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get nodes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get pods --all-namespaces"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get events"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now scale up the number of pods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"new_num_replicas = 10"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get namespaces"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl scale --current-replicas=$num_replicas \\\n",
|
||||
" --replicas=$new_num_replicas {\"deployments/\" + aks_service_name} \\\n",
|
||||
" --namespace azureml-workspace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get pods --all-namespaces"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!kubectl get deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we will test the [throughput of the web service](06_SpeedTestWebApp.ipynb)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
}
|
||||
],
|
||||
"celltoolbar": "Tags",
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Load Test deployed web application"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook pulls some images and tests them against the deployed web application. We submit requests asychronously \n",
|
||||
"which should reduce the contribution of latency."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from urllib.parse import urlparse\n",
|
||||
"\n",
|
||||
"from azure_utils.machine_learning.utils import get_workspace_from_config\n",
|
||||
"from azureml.core.webservice import AksWebservice\n",
|
||||
"from dotenv import get_key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service_name = get_key(env_path, 'aks_service_name')\n",
|
||||
"aks_service = AksWebservice(ws, name=aks_service_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will test our service concurrently but only have 4 concurrent requests at any time. We have only deployed one pod \n",
|
||||
"on one node and increasing the number of concurrent calls does not really increase throughput. Feel free to try \n",
|
||||
"different values and see how the service responds."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CONCURRENT_REQUESTS = 4 # Number of requests at a time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Get the scoring URL and API key of the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scoring_url = aks_service.scoring_uri\n",
|
||||
"api_key = aks_service.get_keys()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below we are going to use [Locust](https://locust.io/) to load test our deployed model. First we need to write the \n",
|
||||
"locustfile."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile locustfile.py\n",
|
||||
"from locust import HttpLocust, TaskSet, task\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"from utilities import text_to_json\n",
|
||||
"from itertools import cycle\n",
|
||||
"\n",
|
||||
"_NUMBER_OF_REQUESTS = os.getenv('NUMBER_OF_REQUESTS', 100)\n",
|
||||
"dupes_test_path = './data_folder/dupes_test.tsv'\n",
|
||||
"dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
|
||||
"dupes_to_score = dupes_test.iloc[:_NUMBER_OF_REQUESTS, 4]\n",
|
||||
"_SCORE_PATH = os.getenv('SCORE_PATH', \"/score\")\n",
|
||||
"_API_KEY = os.getenv('API_KEY')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class UserBehavior(TaskSet):\n",
|
||||
" def on_start(self):\n",
|
||||
" print('Running setup')\n",
|
||||
" self._text_generator = cycle(dupes_to_score.apply(text_to_json))\n",
|
||||
" self._headers = {\n",
|
||||
" \"content-type\": \"application/json\",\n",
|
||||
" 'Authorization': ('Bearer {}'.format(_API_KEY))\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" @task\n",
|
||||
" def score(self):\n",
|
||||
" self.client.post(_SCORE_PATH,\n",
|
||||
" data=next(self._text_generator),\n",
|
||||
" headers=self._headers)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class WebsiteUser(HttpLocust):\n",
|
||||
" task_set = UserBehavior\n",
|
||||
" # min and max time to wait before repeating task\n",
|
||||
" min_wait = 10\n",
|
||||
" max_wait = 200"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below we define the locust command we want to run. We are going to run at a hatch rate of 10 and the whole test will \n",
|
||||
"last 1 minute. Feel free to adjust the parameters below and see how the results differ. The results of the test will \n",
|
||||
"be saved to two csv files **modeltest_requests.csv** and **modeltest_distribution.csv**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parsed_url = urlparse(scoring_url)\n",
|
||||
"cmd = \"locust -H {host} --no-web -c {users} -r {rate} -t {duration} --csv=modeltest --only-summary\".format(\n",
|
||||
" host=\"{url.scheme}://{url.netloc}\".format(url=parsed_url),\n",
|
||||
" users=CONCURRENT_REQUESTS, # concurrent users\n",
|
||||
" rate=10, # hatch rate (users / second)\n",
|
||||
" duration='1m', # test duration\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! API_KEY={api_key} SCORE_PATH={parsed_url.path} PYTHONPATH={os.path.abspath('../')} {cmd}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here are the summary results of our test and below that the distribution infromation of those tests. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.read_csv(\"modeltest_requests.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.read_csv(\"modeltest_distribution.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To tear down the cluster and all related resources go to the [tear down the cluster](07_TearDown.ipynb) notebook."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"formats": "ipynb"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,741 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Explore Duplicate Question Matches\n",
|
||||
"Use this dashboard to explore the relationship between duplicate and original questions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"This section loads needed packages, and defines useful functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from __future__ import print_function\n",
|
||||
"\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"import ipywidgets as widgets\n",
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from azureml.core.webservice import AksWebservice\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from dotenv import get_key, find_dotenv\n",
|
||||
"from azure_utils.machine_learning.utils import get_workspace_from_config\n",
|
||||
"from azure_utils.utilities import read_questions, text_to_json, get_auth\n",
|
||||
"from notebooks import DIRECTORY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = get_workspace_from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service_name = get_key(env_path, 'aks_service_name')\n",
|
||||
"aks_service = AksWebservice(ws, name=aks_service_name)\n",
|
||||
"aks_service.name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load the duplicate questions scoring app's URL."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scoring_url = aks_service.scoring_uri\n",
|
||||
"api_key = aks_service.get_keys()[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"A constructor function for ID-text contents. Constructs buttons and text areas for each text ID and text passage.\n",
|
||||
"* Each buttons's description is set to a text's ID, and its click action is set to the handler.\n",
|
||||
"* Each text area's content is set to a text.\n",
|
||||
"* A dictionary is created to map IDs to text areas."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def buttons_and_texts(data,\n",
|
||||
" id,\n",
|
||||
" answerid,\n",
|
||||
" text,\n",
|
||||
" handle_click,\n",
|
||||
" layout=widgets.Layout(width=\"100%\"),\n",
|
||||
" n=15):\n",
|
||||
" \"\"\"Construct buttons, text areas, and a mapping from IDs to text areas.\"\"\"\n",
|
||||
" items = []\n",
|
||||
" text_map = {}\n",
|
||||
" for i in range(min(n, len(data))):\n",
|
||||
" button = widgets.Button(description=data.iloc[i][id])\n",
|
||||
" button.answerid = data.iloc[i][answerid] if answerid in data else None\n",
|
||||
" button.open = False\n",
|
||||
" button.on_click(handle_click)\n",
|
||||
" items.append(button)\n",
|
||||
" text_area = widgets.Textarea(data.iloc[i][text],\n",
|
||||
" placeholder=data.iloc[i][id],\n",
|
||||
" layout=layout)\n",
|
||||
" items.append(text_area)\n",
|
||||
" text_map[data.iloc[i][id]] = text_area\n",
|
||||
" return items, text_map"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"A constructor function for the duplicates and questions explorer widget. This builds a box containing duplicates and \n",
|
||||
"question tabs, each in turn containing boxes that contain the buttons and text areas."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def duplicates_questions_widget(duplicates,\n",
|
||||
" questions,\n",
|
||||
" layout=widgets.Layout(width=\"100%\")):\n",
|
||||
" \"\"\"Construct a duplicates and questions exploration widget.\"\"\"\n",
|
||||
" # Construct the duplicates Tab of buttons and text areas.\n",
|
||||
" duplicates_items, duplicates_map = buttons_and_texts(\n",
|
||||
" duplicates,\n",
|
||||
" duplicates_id,\n",
|
||||
" duplicates_answerid,\n",
|
||||
" duplicates_text,\n",
|
||||
" duplicates_click,\n",
|
||||
" n=duplicates.shape[0],\n",
|
||||
" )\n",
|
||||
" duplicates_tab = widgets.Tab(\n",
|
||||
" [widgets.VBox(duplicates_items, layout=layout)],\n",
|
||||
" layout=widgets.Layout(width=\"100%\", height=\"500px\", overflow_y=\"auto\"),\n",
|
||||
" )\n",
|
||||
" duplicates_tab.set_title(0, duplicates_title)\n",
|
||||
" # Construct the questions Tab of buttons and text areas.\n",
|
||||
" questions_items, questions_map = buttons_and_texts(\n",
|
||||
" questions,\n",
|
||||
" questions_id,\n",
|
||||
" questions_answerid,\n",
|
||||
" questions_text,\n",
|
||||
" questions_click,\n",
|
||||
" n=questions.shape[0],\n",
|
||||
" )\n",
|
||||
" questions_tab = widgets.Tab(\n",
|
||||
" [widgets.VBox(questions_items, layout=layout)],\n",
|
||||
" layout=widgets.Layout(width=\"100%\", height=\"500px\", overflow_y=\"auto\"),\n",
|
||||
" )\n",
|
||||
" questions_tab.set_title(0, questions_title)\n",
|
||||
" # Put both tabs in an HBox.\n",
|
||||
" duplicates_questions = widgets.HBox([duplicates_tab, questions_tab],\n",
|
||||
" layout=layout)\n",
|
||||
" return duplicates_map, questions_map, duplicates_questions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"A handler function for a question passage button press. If the passage's text window is open, it is collapsed. \n",
|
||||
"Otherwise, it is opened."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def questions_click(button):\n",
|
||||
" \"\"\"Respond to a click on a question button.\"\"\"\n",
|
||||
" global questions_map\n",
|
||||
" if button.open:\n",
|
||||
" questions_map[button.description].rows = None\n",
|
||||
" button.open = False\n",
|
||||
" else:\n",
|
||||
" questions_map[button.description].rows = 10\n",
|
||||
" button.open = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"A handler function for a duplicate obligation button press. If the obligation is not selected, select it and update \n",
|
||||
"the questions tab with its top 15 question passages ordered by match score. Otherwise, if the duplicate's text window \n",
|
||||
"is open, it is collapsed, else it is opened."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def duplicates_click(button):\n",
|
||||
" \"\"\"Respond to a click on a duplicate button.\"\"\"\n",
|
||||
" global duplicates_map\n",
|
||||
" if select_duplicate(button):\n",
|
||||
" duplicates_map[button.description].rows = 10\n",
|
||||
" button.open = True\n",
|
||||
" else:\n",
|
||||
" if button.open:\n",
|
||||
" duplicates_map[button.description].rows = None\n",
|
||||
" button.open = False\n",
|
||||
" else:\n",
|
||||
" duplicates_map[button.description].rows = 10\n",
|
||||
" button.open = True\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def select_duplicate(button):\n",
|
||||
" \"\"\"Update the displayed questions to correspond to the button's duplicate\n",
|
||||
" selections. Returns whether or not the selected duplicate changed.\n",
|
||||
" \"\"\"\n",
|
||||
" global selected_button, questions_map, duplicates_questions\n",
|
||||
" if \"selected_button\" not in globals() or button != selected_button:\n",
|
||||
" if \"selected_button\" in globals():\n",
|
||||
" selected_button.style.button_color = None\n",
|
||||
" selected_button.style.font_weight = \"\"\n",
|
||||
" selected_button = button\n",
|
||||
" selected_button.style.button_color = \"yellow\"\n",
|
||||
" selected_button.style.font_weight = \"bold\"\n",
|
||||
" duplicates_text = duplicates_map[selected_button.description].value\n",
|
||||
" questions_scores = score_text(duplicates_text)\n",
|
||||
" ordered_questions = questions.loc[questions_scores[questions_id]]\n",
|
||||
" questions_items, questions_map = buttons_and_texts(\n",
|
||||
" ordered_questions,\n",
|
||||
" questions_id,\n",
|
||||
" questions_answerid,\n",
|
||||
" questions_text,\n",
|
||||
" questions_click,\n",
|
||||
" n=questions_display,\n",
|
||||
" )\n",
|
||||
" if questions_button_color is True and selected_button.answerid is not None:\n",
|
||||
" set_button_color(questions_items[::2], selected_button.answerid)\n",
|
||||
" if questions_button_score is True:\n",
|
||||
" questions_items = [\n",
|
||||
" item for button, text_area in zip(*[iter(questions_items)] * 2)\n",
|
||||
" for item in (add_button_prob(button, questions_scores),\n",
|
||||
" text_area)\n",
|
||||
" ]\n",
|
||||
" duplicates_questions.children[1].children[0].children = questions_items\n",
|
||||
" duplicates_questions.children[1].set_title(0,\n",
|
||||
" selected_button.description)\n",
|
||||
" return True\n",
|
||||
" else:\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def add_button_prob(button, questions_scores):\n",
|
||||
" \"\"\"Return an HBox containing button and its probability.\"\"\"\n",
|
||||
" id = button.description\n",
|
||||
" prob = widgets.Label(score_label + \": \" + str(\n",
|
||||
" int(\n",
|
||||
" math.ceil(score_scale *\n",
|
||||
" questions_scores.loc[id][questions_probability]))))\n",
|
||||
" return widgets.HBox([button, prob])\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def set_button_color(button, answerid):\n",
|
||||
" \"\"\"Set each button's color according to its label.\"\"\"\n",
|
||||
" for i in range(len(button)):\n",
|
||||
" button[i].style.button_color = (\n",
|
||||
" \"lightgreen\" if button[i].answerid == answerid else None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Functions for interacting with the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def score_text(text):\n",
|
||||
" \"\"\"Return a data frame with the original question scores for the text.\"\"\"\n",
|
||||
" headers = {\n",
|
||||
" \"content-type\": \"application/json\",\n",
|
||||
" \"Authorization\": (\"Bearer \" + api_key),\n",
|
||||
" }\n",
|
||||
" # jsontext = json.dumps({'input':'{0}'.format(text)})\n",
|
||||
" jsontext = text_to_json(text)\n",
|
||||
" result = requests.post(scoring_url, data=jsontext, headers=headers)\n",
|
||||
" # scores = result.json()['result'][0]\n",
|
||||
" scores = eval(result.json())\n",
|
||||
" scores_df = pd.DataFrame(\n",
|
||||
" scores,\n",
|
||||
" columns=[questions_id, questions_answerid, questions_probability])\n",
|
||||
" scores_df[questions_id] = scores_df[questions_id].astype(str)\n",
|
||||
" scores_df[questions_answerid] = scores_df[questions_answerid].astype(str)\n",
|
||||
" scores_df = scores_df.set_index(questions_id, drop=False)\n",
|
||||
" return scores_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Control the appearance of cell output boxes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%html\n",
|
||||
"<style>\n",
|
||||
".output_wrapper, .output {\n",
|
||||
" height:auto !important;\n",
|
||||
" max-height:1000px; /* your desired max-height here */\n",
|
||||
"}\n",
|
||||
".output_scroll {\n",
|
||||
" box-shadow:none !important;\n",
|
||||
" webkit-box-shadow:none !important;\n",
|
||||
"}\n",
|
||||
"</style>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Load data\n",
|
||||
"\n",
|
||||
"Load the pre-formatted text of questions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions_title = 'Questions'\n",
|
||||
"questions_id = 'Id'\n",
|
||||
"questions_answerid = 'AnswerId'\n",
|
||||
"questions_text = 'Text'\n",
|
||||
"questions_probability = 'Probability'\n",
|
||||
"questions_path = DIRECTORY + '/data_folder/questions.tsv'\n",
|
||||
"questions = read_questions(questions_path, questions_id, questions_answerid)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Load the pre-formatted text of duplicates."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"duplicates_title = 'Duplicates'\n",
|
||||
"duplicates_id = 'Id'\n",
|
||||
"duplicates_answerid = 'AnswerId'\n",
|
||||
"duplicates_text = 'Text'\n",
|
||||
"duplicates_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
|
||||
"duplicates = read_questions(duplicates_path, duplicates_id, duplicates_answerid)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Explore original questions matched up with duplicate questions\n",
|
||||
"\n",
|
||||
"Define other variables and settings used in creating the interface."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions_display = 15\n",
|
||||
"questions_button_color = True\n",
|
||||
"questions_button_score = True\n",
|
||||
"score_label = 'Score'\n",
|
||||
"score_scale = 100"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"This builds the exploration widget as a box containing duplicates and question tabs, each in turn containing boxes \n",
|
||||
"that have for each ID-text pair a button and a text area."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {},
|
||||
"report_default": {
|
||||
"hidden": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"duplicates_map, questions_map, duplicates_questions = duplicates_questions_widget(duplicates, questions)\n",
|
||||
"duplicates_questions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To tear down the cluster and related resources go to the [last notebook](08_TearDown.ipynb)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"extensions": {
|
||||
"jupyter_dashboards": {
|
||||
"activeView": "report_default",
|
||||
"version": 1,
|
||||
"views": {
|
||||
"grid_default": {
|
||||
"name": "grid",
|
||||
"type": "grid"
|
||||
},
|
||||
"report_default": {
|
||||
"name": "report",
|
||||
"type": "report"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,246 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tear it all down\n",
|
||||
"Use this notebook to clean up the web service, image, model and the AKS cluster created by the tutorial."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AksCompute\n",
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.webservice import AksWebservice\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from dotenv import get_key, find_dotenv\n",
|
||||
"from azure_utils.utilities import get_auth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"env_path = find_dotenv(raise_error_if_not_found=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's get the workspace information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config(auth=get_auth(env_path))\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the web service to delete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service_name = get_key(env_path, 'aks_service_name')\n",
|
||||
"aks_service = AksWebservice(ws, name=aks_service_name)\n",
|
||||
"print(aks_service.name, aks_service.tags)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the image to delete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"image_name = get_key(env_path, 'image_name')\n",
|
||||
"image_version = int(get_key(env_path, 'image_version'))\n",
|
||||
"image = Image(ws, name=image_name, version=image_version)\n",
|
||||
"print(image.name, image.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the model to delete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = 'question_match_model'\n",
|
||||
"model_version = int(get_key(env_path, 'model_version'))\n",
|
||||
"model = Model(ws, name=model_name, version=model_version)\n",
|
||||
"print(model.name, model.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's retrieve the AKS compute to delete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_name = get_key(env_path, 'aks_name')\n",
|
||||
"aks_target = AksCompute(ws, name=aks_name)\n",
|
||||
"print(aks_target.name, aks_target.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Delete the service, image and model. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_service.delete()\n",
|
||||
"image.delete()\n",
|
||||
"model.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's delete the AKS compute from the workspace. Since we created the cluster through AML, the corresponding cloud \n",
|
||||
"based objects will also be deleted. If the custer was created externally and attached to the workspace, the below \n",
|
||||
"would raise an exception and nothing will be changed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_target.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you also would like to delete the workspace and all experiments in it, you can use the following."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"ws.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, you can delete the resource group with the following."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"resource_group = get_key(env_path, 'resource_group')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!az group delete --yes --name $resource_group"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "az-ml-realtime-score",
|
||||
"language": "python",
|
||||
"name": "az-ml-realtime-score"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
.ONESHELL:
|
||||
SHELL=/bin/bash
|
||||
|
||||
define PROJECT_HELP_MSG
|
||||
Makefile for testing notebooks
|
||||
Make sure you have edited the dev_env_template files and renamed it to .dev_env
|
||||
All the variables loaded in this makefile must come from the .dev_env file
|
||||
|
||||
Usage:
|
||||
make test run all notebooks
|
||||
make clean delete env and remove files
|
||||
endef
|
||||
export PROJECT_HELP_MSG
|
||||
env_location=.dev_env
|
||||
PWD:=$(shell pwd)
|
||||
include ${env_location}
|
||||
|
||||
|
||||
help:
|
||||
echo "$$PROJECT_HELP_MSG" | less
|
||||
|
||||
|
||||
test: setup test-notebook1 test-notebook2 test-notebook3 test-notebook4 test-notebook5 test-notebook6 test-notebook7 \
|
||||
test-notebook8 test-notebook-iot1 test-notebook9 test-notebook-iot2
|
||||
@echo All Notebooks Passed
|
||||
|
||||
setup:
|
||||
conda env create -f environment.yml
|
||||
ifndef TENANT_ID
|
||||
@echo starting interactive login
|
||||
az login -o table
|
||||
az account set --subscription ${SUBSCRIPTION_ID}
|
||||
else
|
||||
@echo using service principal login
|
||||
az login -t ${TENANT_ID} --service-principal -u ${SP_USERNAME} --password ${SP_PASSWORD}
|
||||
endif
|
||||
|
||||
|
||||
test-notebook1:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 00_AMLConfiguration.ipynb
|
||||
papermill 00_AMLConfiguration.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3 \
|
||||
-p subscription_id ${SUBSCRIPTION_ID} \
|
||||
-p resource_group ${RESOURCE_GROUP} \
|
||||
-p workspace_name ${WORKSPACE_NAME} \
|
||||
-p workspace_region ${WORKSPACE_REGION} \
|
||||
-p image_name ${IMAGE_NAME} \
|
||||
|
||||
test-notebook2:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 01_DataPrep.ipynb
|
||||
papermill 01_DataPrep.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
test-notebook3:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 02_TrainOnLocal.ipynb
|
||||
papermill 02_TrainOnLocal.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
test-notebook4:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 03_DevelopScoringScript.ipynb
|
||||
papermill 03_DevelopScoringScript.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
sleep 1m
|
||||
|
||||
test-notebook5:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 04_CreateImage.ipynb
|
||||
papermill 04_CreateImage.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
sleep 30
|
||||
|
||||
test-notebook6:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 05_DeployOnAKS.ipynb
|
||||
papermill aks/05_DeployOnAKS.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3 \
|
||||
-p aks_name ${AKS_NAME} \
|
||||
-p aks_location ${WORKSPACE_REGION} \
|
||||
-p aks_service_name ${AKS_SERVICE_NAME}
|
||||
|
||||
test-notebook7:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 06_SpeedTestWebApp.ipynb
|
||||
papermill aks/06_SpeedTestWebApp.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
test-notebook8:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 07_RealTimeScoring.ipynb
|
||||
papermill aks/07_RealTimeScoring.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
|
||||
test-notebook-iot1:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 05_DeployOnIOTedge.ipynb
|
||||
export PYTHONPATH=${PWD}:${PYTHONPATH}
|
||||
cd iotedge
|
||||
mkdir ./data_folder
|
||||
cp ../data_folder/dupes_test.tsv ./data_folder
|
||||
papermill 05_DeployOnIOTedge.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3 \
|
||||
-p iot_hub_name fstlstnameiothub \
|
||||
-p device_id mydevice \
|
||||
-p module_name mymodule
|
||||
|
||||
test-notebook9:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 08_TearDown.ipynb
|
||||
papermill aks/08_TearDown.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
test-notebook-iot2:
|
||||
source activate MLAKSDeployAML
|
||||
@echo Testing 06_TearDown.ipynb
|
||||
export PYTHONPATH=${PWD}:${PYTHONPATH}
|
||||
papermill iotedge/06_TearDown.ipynb test.ipynb \
|
||||
--log-output \
|
||||
--no-progress-bar \
|
||||
-k python3
|
||||
|
||||
|
||||
test-cookiecutter-aks:
|
||||
cookiecutter --no-input https://github.com/Microsoft/MLAKSDeployAML.git --checkout yzhang \
|
||||
subscription_id="${SUBSCRIPTION_ID}" \
|
||||
workspace_region=${WORKSPACE_REGION} \
|
||||
deployment_type="aks"
|
||||
|
||||
test-cookiecutter-iot:
|
||||
cookiecutter --no-input https://github.com/Microsoft/MLAKSDeployAML.git --checkout yzhang \
|
||||
subscription_id=${SUBSCRIPTION_ID} \
|
||||
workspace_region=${WORKSPACE_REGION} \
|
||||
deployment_type="iotedge"
|
||||
|
||||
remove-notebook:
|
||||
rm -f test.ipynb
|
||||
|
||||
clean: remove-notebook
|
||||
conda remove --name MLAKSDeployAML -y --all
|
||||
rm -rf aml_config
|
||||
rm -rf __pycache__
|
||||
rm -rf .ipynb_checkpoints
|
||||
rm -rf data_folder
|
||||
rm -rf azureml-models
|
||||
rm -rf score.py lgbmenv.yml model.pkl
|
||||
rm -rf iotedge/deployment.json iotedge/deviceconfig.sh
|
||||
rm -rf iotedge/data_folder
|
||||
|
||||
notebook:
|
||||
source activate MLAKSDeployAML
|
||||
jupyter notebook --port 9999 --ip 0.0.0.0 --no-browser
|
||||
|
||||
install-jupytext:
|
||||
source activate MLAKSDeployAML
|
||||
conda install -c conda-forge jupytext
|
||||
|
||||
convert-to-py:
|
||||
jupytext --set-formats ipynb,py_scripts//py --sync *.ipynb
|
||||
|
||||
sync:
|
||||
jupytext --sync *.ipynb
|
||||
|
||||
convert-to-ipynb:
|
||||
jupytext --set-formats ipynb *.ipynb
|
||||
|
||||
remove-py:
|
||||
rm -r py_scripts
|
||||
|
||||
.PHONY: help test setup clean remove-notebook test-notebook1 test-notebook2 test-notebook3 test-notebook4 \
|
||||
test-notebook5 test-notebook6 test-notebook7 test-notebook8 test-notebook-iot1 test-notebook9 test-notebook-iot2
|
|
@ -0,0 +1,9 @@
|
|||
"""
|
||||
az-ml-realtime-score - __init__.py
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
Licensed under the MIT License.
|
||||
"""
|
||||
import os
|
||||
DIRECTORY = os.path.dirname(os.path.realpath(__file__))
|
||||
WORKING_DIRECTORY = os.getcwd()
|
|
@ -0,0 +1,12 @@
|
|||
# Fill in the fields below and rename to .dev_env
|
||||
# TENANT_ID, SP_USERNAME and SP_PASSWORD are optional. If not supplied Azure cli will default to interactive login
|
||||
TENANT_ID=
|
||||
SP_USERNAME=
|
||||
SP_PASSWORD=
|
||||
SUBSCRIPTION_ID=
|
||||
RESOURCE_GROUP="deployrg"
|
||||
WORKSPACE_NAME="workspace"
|
||||
WORKSPACE_REGION="eastus"
|
||||
IMAGE_NAME="deployimg"
|
||||
AKS_NAME="deployaks"
|
||||
AKS_SERVICE_NAME="deployservice"
|
|
@ -0,0 +1,2 @@
|
|||
[pytest]
|
||||
junit_family=xunit1
|
|
@ -0,0 +1,11 @@
|
|||
subscription_id: "<>"
|
||||
resource_group: "<>"
|
||||
workspace_name: "<>"
|
||||
workspace_region: "<>"
|
||||
|
||||
image_name: "<>"
|
||||
aks_service_name: "<>"
|
||||
aks_name: "<>"
|
||||
aks_location: "<>"
|
||||
|
||||
storage_conn_string: "<>"
|
|
@ -0,0 +1,6 @@
|
|||
"""
|
||||
ai-architecture-template - __init__.py
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
Licensed under the MIT License.
|
||||
"""
|
|
@ -0,0 +1,28 @@
|
|||
"""
|
||||
ai-architecture-template - test_notebooks.py
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
Licensed under the MIT License.
|
||||
"""
|
||||
|
||||
import glob
|
||||
|
||||
import pytest
|
||||
|
||||
from azure_utils.dev_ops.testing_utilities import run_notebook
|
||||
from notebooks import DIRECTORY
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"notebook",
|
||||
[
|
||||
DIRECTORY + "/00_AMLConfiguration.ipynb",
|
||||
DIRECTORY + "/01_DataPrep.ipynb",
|
||||
DIRECTORY + "/02_TrainOnLocal.ipynb",
|
||||
DIRECTORY + "/03_DevelopScoringScript.ipynb",
|
||||
DIRECTORY + "/04_CreateImage.ipynb",
|
||||
DIRECTORY + "/05_DeployOnAKS.ipynb"
|
||||
]
|
||||
)
|
||||
def test_notebook(notebook, add_nunit_attachment):
|
||||
run_notebook(notebook, add_nunit_attachment, kernel_name="az-ml-realtime-score", root=DIRECTORY)
|
Загрузка…
Ссылка в новой задаче