Update into master (#65)

Updating to remove cookie cutter, remove iot, and simplify folders. Also adding pytests.
2020-02-25 00:38:04 -05:00 · 2020-02-25 00:38:04 -05:00 · b22f5ac4ec
--- a/.ci/azure-pipelines-v2.yml
+++ b/.ci/azure-pipelines-v2.yml
@ -3,7 +3,7 @@
 # A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
 # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
 #
-# An Agent_Name Variable must be creating in the Azure DevOps UI. 
+# An Agent_Name Variable must be creating in the Azure DevOps UI.
 # https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
 #
 # This must point to an Agent Pool, with a Self-Hosted Linux VM with a DOcker.
@ -32,6 +32,10 @@ stages:
 - template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
  parameters:
    Agent: $(Agent_Name)
-    jobDisplayName: MLAKSDeployAMLJob
+    jobDisplayName: az-ml-realtime-score
    DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
-    workload_vars: ../vars/ml_realtime_scoring.yml
+    workload_vars: ../vars/az-ml-realtime-score.yml
+    flighting_release: false
+    flighting_preview: false
+    flighting_master: false
+    post_cleanup: false
--- a/.ci/azure-pipelines.yml
+++ b/.ci/azure-pipelines.yml
@ -1,155 +1,50 @@
+# AI Architecture Template TODO: update tile
+#
+# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
+#
+# An Agent_Name Variable must be creating in the Azure DevOps UI.
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
+#
+# This must point to an Agent Pool, with a Self-Hosted Linux VM with a DOcker.
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops
+
+resources:
+  repositories:
+    - repository: aitemplates
+      type: github
+      name: microsoft/AI
+      endpoint: AIArchitecturesAndPractices-GitHub
+
+schedules:
+- cron: "*/10 * * * *"
+  displayName: Daily midnight build
+  always: true
+  branches:
+    include:
+    - master
 # MLAKSDeploy Pipeline

+
 trigger:
  batch: true
  branches:
    include:
    - master
-    - staging
-    
-variables:
- group: AzureKeyVault
-
-
-jobs:
- job: MLAKSDeployAMLJob
-  timeoutInMinutes: 300
-  cancelTimeoutInMinutes: 2
-
-  pool:
-    vmImage: 'Ubuntu-16.04'
-
-  strategy:
-    maxParallel: 3
-    matrix: {"eastus": {"azureregion": "eastus", "azureresourcegroup" : "mlaksdplyamleastus"},"southcentralus": {"azureregion": "southcentralus", "azureresourcegroup" : "mlaksdplyamlsouthctrl" },"westus2": {"azureregion": "westus2", "azureresourcegroup" : "mlaksdplyamlwestus"}}
-
-  steps:
-  - bash: |
-      source /usr/share/miniconda/etc/profile.d/conda.sh
-      which conda
-      conda env create -f {{cookiecutter.project_name}}/environment.yml
-      conda env list
-      conda activate MLAKSDeployAML
-      conda env list
-      echo Login Azure Account
-      az login -t $(sptenent) --service-principal -u $(spidentity) --password $(spsecret)
-      cd {{cookiecutter.project_name}}
-      echo Execute 00_AMLConfiguration.ipynb
-      papermill 00_AMLConfiguration.ipynb 00_AMLConfiguration_Output.ipynb \
-        --log-output \
-        --no-progress-bar \
-        -k python3 \
-        -p subscription_id $(azuresubscription) \
-        -p resource_group $(azureresourcegroup) \
-        -p workspace_name $(workspacename) \
-        -p workspace_region $(azureregion) \
-        -p image_name $(aksimagename)
-    displayName: '00_AML_Configuration.ipynb'
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 01_DataPrep.ipynb
-      location: "{{cookiecutter.project_name}}"
-
-  - bash: |
-      mkdir -p {{cookiecutter.project_name}}/iotedge/data_folder
-      mkdir -p {{cookiecutter.project_name}}/aks/data_folder
-      cd {{cookiecutter.project_name}}
-      cp data_folder/*.tsv iotedge/data_folder
-      cp data_folder/*.tsv aks/data_folder
-    displayName: 'Copying data'
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 02_TrainOnLocal.ipynb
-      location: "{{cookiecutter.project_name}}"
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 03_DevelopScoringScript.ipynb
-      location: "{{cookiecutter.project_name}}"
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 04_CreateImage.ipynb
-      location: "{{cookiecutter.project_name}}"
-
-  - bash: |
-      source /usr/share/miniconda/etc/profile.d/conda.sh
-      conda activate MLAKSDeployAML
-      echo Execute 05_DeployOnAKS.ipynb
-      export PYTHONPATH=$(pwd)/{{cookiecutter.project_name}}:${PYTHONPATH}
-      cd {{cookiecutter.project_name}}/aks
-      papermill 05_DeployOnAKS.ipynb test.ipynb \
-          --log-output \
-          --no-progress-bar \
-          -k python3 \
-          -p aks_name $(aksname) \
-          -p aks_location $(azureregion) \
-          -p aks_service_name $(aksvcname)
-    displayName: '05_DeployOnAKS.ipynb'
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 06_SpeedTestWebApp.ipynb
-      location: "{{cookiecutter.project_name}}/aks"
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 07_RealTimeScoring.ipynb
-      location: "{{cookiecutter.project_name}}/aks"
-
-#  - bash: |
-#      source /usr/share/miniconda/etc/profile.d/conda.sh
-#      conda activate MLAKSDeployAML
-#      export PYTHONPATH=$(pwd)/{{cookiecutter.project_name}}:${PYTHONPATH}
-#      cd {{cookiecutter.project_name}}/iotedge
-#      echo Execute 05_DeployOnIOTedge.ipynb
-#      papermill 05_DeployOnIOTedge.ipynb test.ipynb \
-#          --log-output \
-#          --no-progress-bar \
-#          -k python3 \
-#          -p iot_hub_name fstlstnameiothub \
-#          -p device_id mydevice \
-#          -p module_name mymodule
-#    displayName: '05_DeployOnIOTedge.ipynb'
-
-  - template: steps/papermill.yml
-    parameters:
-      notebook: 08_TearDown.ipynb
-      location: "{{cookiecutter.project_name}}/aks"
-
-#  - template: steps/papermill.yml
-#    parameters:
-#      notebook: 06_TearDown.ipynb
-#      location: "{{cookiecutter.project_name}}/iotedge"
-
-  - bash: |
-      source /usr/share/miniconda/etc/profile.d/conda.sh
-      conda activate MLAKSDeployAML
-      echo Execute Resource Group Delete
-      existResponse=$(az group exists -n $(azureresourcegroup))
-      if [ "$existResponse" == "true" ]; then
-        echo Deleting project resource group
-        az group delete --name $(azureresourcegroup) --yes
-      else
-        echo Project resource group did not exist
-      fi
-      echo Done Cleanup
-    displayName: 'Backup Cleanup'
-    condition: or(canceled(),failed())
-
-  - task: CreateWorkItem@1
-    inputs:
-      workItemType: 'Issue'
-      title: $(System.TeamProject) - Build $(Build.BuildNumber) Failed
-      assignedTo: 'Fidan <fboylu@microsoft.com>'
-      associate: true
-      teamProject: $(System.TeamProject)
-
-      fieldMappings: |
-        Description=Branch: Branch $(Build.SourceBranch) failed to build. Go to Boards>WorkItems and tag the failure type.
-    displayName: 'Create work item on failure'
-    condition: failed()

+pr:
+  autoCancel: true
+  branches:
+    include:
+    - master

+stages:
+- template: .ci/stages/deploy_notebooks_stages_v2.yml@aitemplates
+  parameters:
+    Agent: $(Agent_Name)
+    jobDisplayName: ai-architecture-template #TODO: Update with project name
+    DefaultWorkingDirectory: $(System.DefaultWorkingDirectory)
+    workload_vars: ../vars/ai-architecture-template.yml #TODO: Update with project name
+    flighting_release: false
+    flighting_preview: false
+    flighting_master: false
--- a/.ci/steps/ai-architecture-template.yml
+++ b/.ci/steps/ai-architecture-template.yml
@ -0,0 +1,64 @@
+# AI Architecture Template TODO: update tile
+#
+# A Github Service Connection must also be created with the name "AIArchitecturesAndPractices-GitHub"
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/demands?view=azure-devops&tabs=yaml
+#
+# An Agent_Name Variable must be creating in the Azure DevOps UI.
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/process/variables?view=azure-devops&tabs=yaml%2Cbatch#secret-variables
+#
+# This must point to an Agent Pool, with a Self-Hosted Linux VM with a Docker.
+# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/v2-linux?view=azure-devops
+
+parameters:
+  azureSubscription: ''
+  azure_subscription: ''
+  location: ''
+  azureresourcegroup: ''
+  workspacename: ''
+  azureregion: westus2
+  aksimagename: ''
+  aks_name: ''
+  aks_service_name: myimage
+  conda: ''
+  doCleanup: true
+  python_path: ''
+  flighting_release: false
+  flighting_preview: false
+  flighting_master: false
+
+steps:
+- template: config_conda.yml
+  parameters:
+    conda_location: .
+    azureSubscription: ${{parameters.azureSubscription}}
+    conda: ai-architecture-template
+    flighting_release: ${{parameters.flighting_release}}
+    flighting_preview: ${{parameters.flighting_preview}}
+    flighting_master: ${{parameters.flighting_master}}
+
+- template: azpapermill.yml
+  parameters:
+    notebook:  00_AMLConfiguration.ipynb
+    location: ${{parameters.location}}
+    azureSubscription: ${{parameters.azureSubscription}}
+    conda: ai-architecture-template
+    azure_subscription: ${{parameters.azure_subscription}}
+    azureresourcegroup: ${{parameters.azureresourcegroup}}
+    workspacename: "aiarchtemplate"
+    azureregion: ${{parameters.azureregion}}
+    aksimagename: ${{parameters.aksimagename}}
+
+# Insert more notebook steps here
+
+- template: pytest_steps.yml
+  parameters:
+    location: ${{parameters.location}}
+    azureSubscription: ${{parameters.azureSubscription}}
+    conda: ai-architecture-template
+
+- template: cleanuptask.yml
+  parameters:
+    azureSubscription: ${{parameters.azureSubscription}}
+    conda: ${{parameters.conda}}
+    azureresourcegroup: ${{parameters.azureresourcegroup}}
+    doCleanup: ${{parameters.doCleanup}}
--- a/.ci/vars/ai-architecture-template.yml
+++ b/.ci/vars/ai-architecture-template.yml
@ -0,0 +1,6 @@
+variables:
+  TridentWorkloadTypeShort: aiarchtemp # TODO: update with project short name
+  DeployLocation: westus
+  ProjectLocation: "notebooks/"
+  PythonPath: "."
+  Template: steps/ai-architecture-template.yml # TODO: update file name to project name
--- a/.gitignore
+++ b/.gitignore
@ -1,13 +1,11 @@

-# Python Tools for Visual Studio (PTVS)
-__pycache__/
-*.pyc
-
-# Environments
-.env
-
-# Jupyter Notebook
-.ipynb_checkpoints
+# Project Configuration Files
+workspace_conf.yml
+*.output_ipynb
+.azureml
+pylint-results.xml
+.idea
+score.py

 #AML
 aml_config/
@ -19,14 +17,137 @@ scripts/.amlignore
 __pycache__/
 scripts/__pycache__/

-# Products
-*.tsv
-*.txt
-*.pkl
-datafolder/
-lgbmenv.yml
-score.py
+# Environments
+.env

-.idea
+# Jupyter Notebook
+.ipynb_checkpoints

+# Byte-compiled / optimized / DLL files
+*.py[cod]
+*$py.class

+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
--- a/.pylintrc
+++ b/.pylintrc
@ -0,0 +1,584 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=pylint_junit
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Specify a configuration file.
+#rcfile=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=missing-module-docstring,
+        trailing-whitespace,
+        fixme,
+        print-statement,
+        parameter-unpacking,
+        unpacking-in-except,
+        old-raise-syntax,
+        backtick,
+        long-suffix,
+        old-ne-operator,
+        old-octal-literal,
+        import-star-module-level,
+        non-ascii-bytes-literal,
+        raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        apply-builtin,
+        basestring-builtin,
+        buffer-builtin,
+        cmp-builtin,
+        coerce-builtin,
+        execfile-builtin,
+        file-builtin,
+        long-builtin,
+        raw_input-builtin,
+        reduce-builtin,
+        standarderror-builtin,
+        unicode-builtin,
+        xrange-builtin,
+        coerce-method,
+        delslice-method,
+        getslice-method,
+        setslice-method,
+        no-absolute-import,
+        old-division,
+        dict-iter-method,
+        dict-view-method,
+        next-method-called,
+        metaclass-assignment,
+        indexing-exception,
+        raising-string,
+        reload-builtin,
+        oct-method,
+        hex-method,
+        nonzero-method,
+        cmp-method,
+        input-builtin,
+        round-builtin,
+        intern-builtin,
+        unichr-builtin,
+        map-builtin-not-iterating,
+        zip-builtin-not-iterating,
+        range-builtin-not-iterating,
+        filter-builtin-not-iterating,
+        using-cmp-argument,
+        eq-without-hash,
+        div-method,
+        idiv-method,
+        rdiv-method,
+        exception-message-attribute,
+        invalid-str-codec,
+        sys-max-int,
+        bad-python3-import,
+        deprecated-string-function,
+        deprecated-str-translate-call,
+        deprecated-itertools-function,
+        deprecated-types-field,
+        next-method-defined,
+        dict-items-not-iterating,
+        dict-keys-not-iterating,
+        dict-values-not-iterating,
+        deprecated-operator-function,
+        deprecated-urllib-function,
+        xreadlines-attribute,
+        deprecated-sys-function,
+        exception-escape,
+        comprehension-escape
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'error', 'warning', 'refactor', and 'convention'
+# which contain the number of messages in each category, as well as 'statement'
+# which is the total number of statements analyzed. This score is used by the
+# global evaluation report (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=snake_case
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=any
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,
+               dict-separator
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[LOGGING]
+
+# Format style used to check logging format string. `old` means using %
+# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it work,
+# install the python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether the implicit-str-concat-in-sequence should
+# generate a warning on implicit string concatenation in sequences defined over
+# several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "BaseException, Exception".
+overgeneral-exceptions=BaseException,
+                       Exception
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,9 @@
+# Microsoft Open Source Code of Conduct
+
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
+
+Resources:
+
+- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
+- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
+- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
--- a/README.md
+++ b/README.md
@ -6,9 +6,6 @@

 In this repository there are a number of tutorials in Jupyter notebooks that have step-by-step instructions on (1) how to train a machine learning model using Python; (2) how to deploy a trained machine learning model throught Azure Machine Learning (AzureML). The tutorials cover how to deploy models on following deployment target:

- [Azure Kubernetes Service (AKS) Cluster](./{{cookiecutter.project_name}}/aks)
- [Azure IoT Edge](./{{cookiecutter.project_name}}/iotedge)
-
 ## Overview
 This scenario shows how to deploy a Frequently Asked Questions (FAQ) matching model as a web service to provide predictions for user questions. For this scenario, “Input Data” in the [architecture diagram](https://docs.microsoft.com/en-us/azure/architecture/reference-architectures/ai/realtime-scoring-python) refers to text strings containing the user questions to match with a list of FAQs. The scenario is designed for the Scikit-Learn machine learning library for Python but can be generalized to any scenario that uses Python models to make real-time predictions.

@ -25,9 +22,10 @@ An example app that consumes the results is included with the scenario.

 ## Prerequisites
 1. Linux (Ubuntu).
-2. [Anaconda Python](https://www.anaconda.com/download)
-3. [Docker](https://docs.docker.com/v17.12/install/linux/docker-ee/ubuntu) installed.
-4. [Azure account](https://azure.microsoft.com).
+1. [Anaconda Python](https://www.anaconda.com/download)
+1. [Docker](https://docs.docker.com/v17.12/install/linux/docker-ee/ubuntu) installed.
+1. [Azure account](https://azure.microsoft.com).
+

 ---
 **NOTE**
@ -44,27 +42,46 @@ DSVM](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtu
 which addresses the first three prerequisites.

 ## Setup
-To set up your environment to run these notebooks, please follow these steps.  They setup the notebooks to use Docker and Azure seamlessly.
-1. Create an _Ubuntu_ _Linux_ DSVM and perform the following steps.

-2. Install [cookiecutter](https://cookiecutter.readthedocs.io/en/latest/installation.html), a tool creates projects from project templates.
-```bash
-pip install cookiecutter
-```
+To set up your environment to run these notebooks, please follow these steps.  They setup the notebooks to use Azure seamlessly.

-3. Use cookiecutter to clone this repository. Cookiecutter will prompt a series of questions where you will choose a specific framework, select your deployment settings, and obtain an Azure ML workspace.
-   ```bash
-   cookiecutter https://github.com/Microsoft/MLAKSDeployAML.git
+1. Create a _Linux_ _Ubuntu_ VM.
+1. Log in to your VM.  We recommend that you use a graphical client
+   such as
+   [X2Go](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro#x2go)
+   to access your VM.  The remaining steps are to be done on the VM.
+1. Open a terminal emulator.
+1. Clone, fork, or download the zip file for this repository:
+   ```
+   git clone https://github.com/Microsoft/az-ml-realtime-score.git
+   ```
+1. Enter the local repository:
+   ```
+   cd az-ml-realtime-score
+   ```
+1. Copy `sample_workspace_conf.yml` to a new file, `workspace_conf.yml`, and fill in each field. This will keep secrets out of the source code, and this file will be ignored by git.
+1. Create the Python az-ml-realtime-score virtual environment using the environment.yml:
+   ```
+   conda env create -f environment.yml
+   ```
+1. Activate the virtual environment:
+   ```
+   source activate az-ml-realtime-score
+   ```
+   The remaining steps should be done in this virtual environment.
+1. Login to Azure:
+   ```
+   az login
+   ```
+   You can verify that you are logged in to your subscription by executing
+   the command:
+   ```
+   az account show -o table
+   ```
+1. Start the Jupyter notebook server:
+   ```
+   jupyter notebook
   ```
-   You will be asked to choose or enter information such as *project name*, *subsciption id*, *resource group*, etc. in an interactive way. You can press *Enter* to accept the default value or enter a value of your choice. For example, if you want to learn how to deploy machine learing model on AKS Cluster, you should choose the value "aks" for variable *deployment_type*. Instead, if you want to learn about deploying machine learning model on IoT Edge, you should select "iotedge" for the variable *deployment_type*. 
-
-   Provide a valid value for "subscription_id", otherwise a `subscription id is missing` error will be generated **after** all the questions are asked. You will have to perform Step 3 all over again. The full list of questions can be found in [cookiecutter.json](./cookiecutter.json) file. 
-
-   Please make sure all entered information are correct, as these information are used to customize the content of your repo. 
-
-4. On your local machine, you should now have a repo with the *project_name* you specified. Find the README.md file in this repo and proceed with instructions specified in it. 
-
-

 # Contributing
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a
--- a/SECURITY.md
+++ b/SECURITY.md
@ -0,0 +1,41 @@
+<!-- BEGIN MICROSOFT SECURITY.MD V0.0.3 BLOCK -->
+
+## Security
+
+Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
+
+If you believe you have found a security vulnerability in any Microsoft-owned repository that meets Microsoft's [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)) of a security vulnerability, please report it to us as described below.
+
+## Reporting Security Issues
+
+**Please do not report security vulnerabilities through public GitHub issues.**
+
+Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
+
+If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
+
+You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
+
+Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
+
+  * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
+  * Full paths of source file(s) related to the manifestation of the issue
+  * The location of the affected source code (tag/branch/commit or direct URL)
+  * Any special configuration required to reproduce the issue
+  * Step-by-step instructions to reproduce the issue
+  * Proof-of-concept or exploit code (if possible)
+  * Impact of the issue, including how an attacker might exploit the issue
+
+This information will help us triage your report more quickly.
+
+If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
+
+## Preferred Languages
+
+We prefer all communications to be in English.
+
+## Policy
+
+Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
+
+<!-- END MICROSOFT SECURITY.MD BLOCK -->
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,21 @@
+name: az-ml-realtime-score
+channels:
+  - conda-forge
+dependencies:
+  - python=3.6.2
+  - pip
+  - jupyter
+  - pytest
+  - pytest-cov
+  - pylint
+  - pandas
+  - pip:
+    - papermill
+    - azureml-core==1.0.85.2
+    - pylint-junit
+    - pytest-nunit
+    - nbconvert
+    - junit-xml
+    - nbformat
+    - Microsoft-AI-Azure-Utility-Samples
+    - python-dotenv
--- a/notebooks/00_AMLConfiguration.ipynb
+++ b/notebooks/00_AMLConfiguration.ipynb
@ -0,0 +1,199 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ai-architecture-template - 00_AMLConfiguration.ipynb\n",
+    "TODO: Update with new repo name\n",
+    "\n",
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License.\n",
+    "\n",
+    "# Installation and configuration\n",
+    "This notebook configures the notebooks in this tutorial to connect to an Azure Machine Learning (AML) Workspace.  \n",
+    "You can use an existing workspace or create a new one.\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "If you have already completed the prerequisites and selected the correct Kernel for this notebook, the AML Python SDK \n",
+    "is already installed. Let's load the imports and check the AML SDK version."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "import azureml.core\n",
+    "from azure_utils.machine_learning.utils import load_configuration, get_or_create_workspace\n",
+    "\n",
+    "print(\"AML SDK Version:\", azureml.core.VERSION)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set up your Azure Machine Learning workspace\n",
+    "## Load Configurations from file\n",
+    "\n",
+    "Configurations are loaded from a file, to prevent accident commits of Azure secerts into source control.\n",
+    "This file name is included in the .gitignore to also prevent accident commits. A template file is included that should\n",
+    "be copied, and each parameter filled in."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cfg = load_configuration(\"../workspace_conf.yml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Load Configurations into Notebook.\n",
+    "\n",
+    "The following cell loads the configurations from the local file, into the notebook memory. The following cell is also\n",
+    "marked as a parameter cell. When using this notebook with [papermill](https://github.com/nteract/papermill), these\n",
+    "parameters can be override. See the tests for examples."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    },
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "subscription_id = cfg['subscription_id']\n",
+    "resource_group = cfg['resource_group']\n",
+    "workspace_name = cfg['workspace_name']\n",
+    "workspace_region = cfg['workspace_region']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create the workspace\n",
+    "This cell will create an AML workspace for you in a subscription, provided you have the correct permissions.\n",
+    "\n",
+    "This will fail when:\n",
+    "1. You do not have permission to create a workspace in the resource group\n",
+    "1. You do not have permission to create a resource group if it's non-existing.\n",
+    "1. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this \n",
+    "subscription\n",
+    "\n",
+    "If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or to \n",
+    "provision the required resources. If this cell succeeds, you're done configuring AML!\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ws = get_or_create_workspace(workspace_name, subscription_id, resource_group, workspace_region)\n",
+    "ws_json = ws.get_details()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "Let's check the details of the workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print(json.dumps(ws_json, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "You are now ready to move on to the [AutoML Local](01_DataPrep.ipynb) notebook."
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Tags",
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "source": [],
+    "metadata": {
+     "collapsed": false
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/01_DataPrep.ipynb
+++ b/notebooks/01_DataPrep.ipynb
@ -0,0 +1,733 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Preparation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this notebook, we use a subset of [Stack Exchange network](https://archive.org/details/stackexchange) question data \n",
+    "which includes original questions tagged as 'JavaScript', their duplicate questions and their answers. Here, we \n",
+    "provide the steps to prepare the data to use in model development for training a model that will match a new \n",
+    "question with an existing original question. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "import pandas as pd\n",
+    "from azure_utils.utilities import read_csv_gz, clean_text, round_sample_strat, random_merge\n",
+    "from notebooks import DIRECTORY"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below, we define some parameters that will be used in the data cleaning as well as train and test set preparation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The size of the test set\n",
+    "test_size = 0.21\n",
+    "# The minimum length of clean text\n",
+    "min_text = 150\n",
+    "# The minimum number of duplicates per question\n",
+    "min_dupes = 12\n",
+    "# The maximum number of duplicate matches\n",
+    "match = 20\n",
+    "# The output files path\n",
+    "outputs_path = DIRECTORY + \"/data_folder\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data cleaning"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we download the questions, duplicate questions and answers and load the datasets into pandas dataframes using \n",
+    "the helper functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# URLs to original questions, duplicate questions, and answers.\n",
+    "data_url = \"https://bostondata.blob.core.windows.net/stackoverflow/{}\"\n",
+    "questions_url = data_url.format(\"orig-q.tsv.gz\")\n",
+    "dupes_url = data_url.format(\"dup-q.tsv.gz\")\n",
+    "answers_url = data_url.format(\"ans.tsv.gz\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load datasets.\n",
+    "questions = read_csv_gz(questions_url, names=('Id', 'AnswerId', 'Text0', 'CreationDate'))\n",
+    "dupes = read_csv_gz(dupes_url, names=('Id', 'AnswerId', 'Text0', 'CreationDate'))\n",
+    "answers = read_csv_gz(answers_url, names=('Id', 'Text0'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's now check the dataframes. Notice that questions and duplicates have \"AnswerID\" column that would help match \n",
+    "ith the index of answers dataframe."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "questions.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dupes.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "answers.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check the first original question's text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(questions.iloc[0, 1])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's now check the duplicates for that question."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(dupes[dupes.AnswerId == questions.iloc[0, 0]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below is the answer to the original question."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(answers.at[questions.iloc[0, 0], 'Text0'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we use the helper functions to clean questions, duplicates and answers from unwanted text such as code, html \n",
+    "tags and links. Notice that we add a new column 'Text' to each dataframe for clean text in lowercase."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Clean up all text, and keep only data with some clean text.\n",
+    "for df in (questions, dupes, answers):\n",
+    "    df[\"Text\"] = df.Text0.apply(clean_text).str.lower()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "questions = questions[questions.Text.str.len() > 0]\n",
+    "answers = answers[answers.Text.str.len() > 0]\n",
+    "dupes = dupes[dupes.Text.str.len() > 0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's compare the first original question and cleaned version as an example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original question.\n",
+    "print(questions.iloc[0, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# After cleaning.\n",
+    "print(questions.iloc[0, 3])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It turns out that some duplicate questions were also in original questions. Also, some original questions and some \n",
+    "duplicate questions were duplicated in the datasets. In the following, we remove them from the dataframes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First, remove dupes that are questions, then remove duplicated questions and dupes.\n",
+    "dupes = dupes[~dupes.index.isin(questions.index)]\n",
+    "questions = questions[~questions.index.duplicated(keep='first')]\n",
+    "dupes = dupes[~dupes.index.duplicated(keep='first')]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We also make sure we keep questions with answers and duplicates."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only questions with answers and dupes, answers to questions, and dupes of questions.\n",
+    "questions = questions[\n",
+    "    questions.AnswerId.isin(answers.index) & questions.AnswerId.isin(dupes.AnswerId)\n",
+    "]\n",
+    "answers = answers[answers.index.isin(questions.AnswerId)]\n",
+    "dupes = dupes[dupes.AnswerId.isin(questions.AnswerId)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Verify data integrity.\n",
+    "assert questions.AnswerId.isin(answers.index).all()\n",
+    "assert answers.index.isin(questions.AnswerId).all()\n",
+    "assert questions.AnswerId.isin(dupes.AnswerId).all()\n",
+    "assert dupes.AnswerId.isin(questions.AnswerId).all()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below are some statistics on the data. Notice that some questions have very low number of duplicates while others may \n",
+    "have a large number. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Report on the data.\n",
+    "print(\"Text statistics:\")\n",
+    "print(\n",
+    "    pd.DataFrame(\n",
+    "        [\n",
+    "            questions.Text.str.len().describe().rename(\"questions\"),\n",
+    "            answers.Text.str.len().describe().rename(\"answers\"),\n",
+    "            dupes.Text.str.len().describe().rename(\"dupes\"),\n",
+    "        ]\n",
+    "    )\n",
+    ")\n",
+    "print(\"\\nDuplication statistics:\")\n",
+    "print(pd.DataFrame([dupes.AnswerId.value_counts().describe().rename(\"duplications\")]))\n",
+    "print(\n",
+    "    \"\\nLargest class: {:.2%}\".format(\n",
+    "        dupes.AnswerId.value_counts().max() / dupes.shape[0]\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, we reset all indexes to use them as columns in the rest of the steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Reset each dataframe's index.\n",
+    "questions.reset_index(inplace=True)\n",
+    "answers.reset_index(inplace=True)\n",
+    "dupes.reset_index(inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We filter the questions and duplicates to have at least min_text number of characters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Apply the minimum text length to questions and dupes.\n",
+    "questions = questions[questions.Text.str.len() >= min_text]\n",
+    "dupes = dupes[dupes.Text.str.len() >= min_text]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only questions with dupes, and dupes of questions.\n",
+    "label_column = \"AnswerId\"\n",
+    "questions = questions[questions[label_column].isin(dupes[label_column])]\n",
+    "dupes = dupes[dupes[label_column].isin(questions[label_column])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, we remove questions and their duplicates that are less than min_dupes parameter."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Restrict the questions to those with a minimum number of dupes.\n",
+    "answerid_count = dupes.groupby(label_column)[label_column].count()\n",
+    "answerid_min = answerid_count.index[answerid_count >= min_dupes]\n",
+    "questions = questions[questions[label_column].isin(answerid_min)]\n",
+    "dupes = dupes[dupes[label_column].isin(answerid_min)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    " # Verify data integrity.\n",
+    "assert questions[label_column].isin(dupes[label_column]).all()\n",
+    "assert dupes[label_column].isin(questions[label_column]).all()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here are some statistics on the resulting dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Report on the data.\n",
+    "print(\"Restrictions: min_text={}, min_dupes={}\".format(min_text, min_dupes))\n",
+    "print(\"Restricted text statistics:\")\n",
+    "print(\n",
+    "    pd.DataFrame(\n",
+    "        [\n",
+    "            questions.Text.str.len().describe().rename(\"questions\"),\n",
+    "            dupes.Text.str.len().describe().rename(\"dupes\"),\n",
+    "        ]\n",
+    "    )\n",
+    ")\n",
+    "print(\"\\nRestricted duplication statistics:\")\n",
+    "print(\n",
+    "    pd.DataFrame([dupes[label_column].value_counts().describe().rename(\"duplications\")])\n",
+    ")\n",
+    "print(\n",
+    "    \"\\nRestricted largest class: {:.2%}\".format(\n",
+    "        dupes[label_column].value_counts().max() / dupes.shape[0]\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare train and test sets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this part, we prepare train and test sets. For training a binary classification model, we will need to construct \n",
+    "match and non-match pairs from duplicates and their questions. Finding matching pairs can be accomplished by joining \n",
+    "each duplicate with its question. However, non-match examples need to be constructed randomly. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As a first step, to make sure we train and test the performance of the model on each question, we will need to have \n",
+    "examples of match and non-match pairs for each question both in train and test sets. In order to achieve that, \n",
+    "we split the duplicates in a stratified manner into train and test sets making sure at least 1 or more duplicates per \n",
+    "question is in the test set depending on test_size parameter and number of duplicates per each question."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split dupes into train and test ensuring at least one of each label class is in test.\n",
+    "dupes_test = round_sample_strat(dupes, dupes[label_column], frac=test_size)\n",
+    "dupes_train = dupes[~dupes.Id.isin(dupes_test.Id)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert dupes_test[label_column].unique().shape[0] == dupes[label_column].unique().shape[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The relevant columns for text pairs data.\n",
+    "balanced_pairs_columns = ['Id_x', 'AnswerId_x', 'Text_x', 'Id_y', 'Text_y', 'AnswerId_y', 'Label', 'n']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we pair each training duplicate in train set with its matching question and N-1 random questions using the \n",
+    "helper function."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use AnswerId to pair each training dupe with its matching question and also with N-1 questions not its match.\n",
+    "balanced_pairs_train = random_merge(dupes_train, questions, N=match)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Labeling is done such that matching pairs are labeled as 1 and non-match pairs are labeled as 0."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Label records by matching AnswerIds.\n",
+    "balanced_pairs_train[\"Label\"] = (\n",
+    "    balanced_pairs_train.AnswerId_x == balanced_pairs_train.AnswerId_y\n",
+    ").astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only the relevant data.\n",
+    "balanced_pairs_train = balanced_pairs_train[balanced_pairs_columns]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "balanced_pairs_train.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort the data by dupe ID and Label.\n",
+    "balanced_pairs_train.sort_values(by=['Id_x', 'Label'], ascending=[True, False], inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In testing set, we match each duplicate with all the original questions and label them same way as training set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use AnswerId to pair each testing dupe with all questions.\n",
+    "balanced_pairs_test = random_merge(dupes_test, questions, N=questions.shape[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Label records by matching AnswerIds.\n",
+    "balanced_pairs_test[\"Label\"] = (\n",
+    "    balanced_pairs_test.AnswerId_x == balanced_pairs_test.AnswerId_y\n",
+    ").astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only the relevant data.\n",
+    "balanced_pairs_test = balanced_pairs_test[balanced_pairs_columns]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "balanced_pairs_test.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Sort the data by dupe ID and Label.\n",
+    "balanced_pairs_test.sort_values(\n",
+    "    by=[\"Id_x\", \"Label\"], ascending=[True, False], inplace=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, we report the final train and test sets and save as text files to be used by modeling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Report on the datasets.\n",
+    "print(\n",
+    "    \"balanced_pairs_train: {:,} rows with {:.2%} matches\".format(\n",
+    "        balanced_pairs_train.shape[0], balanced_pairs_train.Label.mean()\n",
+    "    )\n",
+    ")\n",
+    "print(\n",
+    "    \"balanced_pairs_test: {:,} rows with {:.2%} matches\".format(\n",
+    "        balanced_pairs_test.shape[0], balanced_pairs_test.Label.mean()\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.makedirs(outputs_path, exist_ok=True)\n",
+    "\n",
+    "# Save the data.\n",
+    "balanced_pairs_train_path = os.path.join(outputs_path, \"balanced_pairs_train.tsv\")\n",
+    "print(\n",
+    "    \"Writing {:,} to {}\".format(\n",
+    "        balanced_pairs_train.shape[0], balanced_pairs_train_path\n",
+    "    )\n",
+    ")\n",
+    "balanced_pairs_train.to_csv(\n",
+    "    balanced_pairs_train_path, sep=\"\\t\", header=True, index=False\n",
+    ")\n",
+    "\n",
+    "balanced_pairs_test_path = os.path.join(outputs_path, \"balanced_pairs_test.tsv\")\n",
+    "print(\n",
+    "    \"Writing {:,} to {}\".format(balanced_pairs_test.shape[0], balanced_pairs_test_path)\n",
+    ")\n",
+    "balanced_pairs_test.to_csv(balanced_pairs_test_path, sep=\"\\t\", header=True, index=False)\n",
+    "\n",
+    "# Save original questions to be used for scoring later.\n",
+    "questions_path = os.path.join(outputs_path, \"questions.tsv\")\n",
+    "print(\"Writing {:,} to {}\".format(questions.shape[0], questions_path))\n",
+    "questions.to_csv(questions_path, sep=\"\\t\", header=True, index=False)\n",
+    "\n",
+    "# Save the test duplicate questions to be used with the scoring function.\n",
+    "dupes_test_path = os.path.join(outputs_path, \"dupes_test.tsv\")\n",
+    "print(\"Writing {:,} to {}\".format(dupes_test.shape[0], dupes_test_path))\n",
+    "dupes_test.to_csv(dupes_test_path, sep=\"\\t\", header=True, index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now move on to [train on local](02_TrainOnLocal.ipynb) notebook to train our model using Azure Machine \n",
+    "Learning."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "source": [],
+    "metadata": {
+     "collapsed": false
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/02_TrainOnLocal.ipynb
+++ b/notebooks/02_TrainOnLocal.ipynb
@ -0,0 +1,664 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Train Locally\n",
+    "In this notebook, you will perform the following using Azure Machine Learning.\n",
+    "* Load workspace.\n",
+    "* Configure & execute a local run in a user-managed Python environment.\n",
+    "* Configure & execute a local run in a system-managed Python environment.\n",
+    "* Configure & execute a local run in a Docker environment.\n",
+    "* Register model for operationalization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from azure_utils.machine_learning.utils import get_workspace_from_config\n",
+    "from azureml.core import Experiment\n",
+    "from azureml.core import ScriptRunConfig\n",
+    "from azureml.core.conda_dependencies import CondaDependencies\n",
+    "from azureml.core.runconfig import RunConfiguration\n",
+    "from notebooks import DIRECTORY"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Model Hyperparameters"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "source": [
+    "This notebook uses a training script that uses \n",
+    "[lightgbm](https://lightgbm.readthedocs.io/en/latest/Python-API.html#scikit-learn-api). \n",
+    "Here we set the number of estimators. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_estimators = \"10\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initialize Workspace\n",
+    "\n",
+    "Initialize a workspace object from persisted configuration file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create An Experiment\n",
+    "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics \n",
+    "and output artifacts from your experiments."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "experiment_name = \"mlaks-train-on-local\"\n",
+    "exp = Experiment(workspace=ws, name=experiment_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configure & Run"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this section, we show three different ways of locally training your model through Azure ML SDK for demonstration \n",
+    "purposes. Only one of these runs is sufficient to register the model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "### User-managed environment\n",
+    "Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages that are \n",
+    "available in the Python environment you choose to run the script. We will use the environment created for this \n",
+    "tutorial which has Azure ML SDK and other dependencies installed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Editing a run configuration property on-fly.\n",
+    "run_config_user_managed = RunConfiguration()\n",
+    "\n",
+    "run_config_user_managed.environment.python.user_managed_dependencies = True\n",
+    "\n",
+    "# Choose the specific Python environment of this tutorial by pointing to the Python path\n",
+    "run_config_user_managed.environment.python.interpreter_path = (\n",
+    "    \"/anaconda/envs/az-ml-realtime-score/bin/python\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Submit script to run in the user-managed environment\n",
+    "Note that the whole `scripts` folder is submitted for execution, including the `item_selector.py` and `label_rank.py` \n",
+    "files. The model will be written to `outputs` directory which is a special directory such that all content in this \n",
+    "directory is automatically uploaded to your workspace. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.isdir(\"script\"):\n",
+    "    os.mkdir(\"script\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile script/create_model.py\n",
+    "from azure_utils.machine_learning import create_model\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    create_model.main()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scrpt = \"create_model.py\"\n",
+    "args = [\n",
+    "    \"--inputs\",\n",
+    "    os.path.abspath(DIRECTORY + \"/data_folder\"),\n",
+    "    \"--outputs\",\n",
+    "    \"outputs\",\n",
+    "    \"--estimators\",\n",
+    "    num_estimators,\n",
+    "    \"--match\",\n",
+    "    \"5\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "src = ScriptRunConfig(\n",
+    "    source_directory=\"./script\",\n",
+    "    script=scrpt,\n",
+    "    arguments=args,\n",
+    "    run_config=run_config_user_managed,\n",
+    ")\n",
+    "#run = exp.submit(src)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Get run history details"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# run"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Block to wait till run finishes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check that the model is now available in your workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# run.get_file_names()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the accuracy of the model from run logs by querying the run metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# run.get_metrics()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "### System-managed environment\n",
+    "You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built \n",
+    "once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run_config_system_managed = RunConfiguration()\n",
+    "run_config_system_managed.environment.python.user_managed_dependencies = False\n",
+    "run_config_system_managed.auto_prepare_environment = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's specify the conda and pip dependencies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Specify conda dependencies with scikit-learn and pandas\n",
+    "conda_pack = [\"scikit-learn==0.19.1\", \"pandas==0.23.3\"]\n",
+    "requirements = [\"lightgbm==2.1.2\", \"azureml-defaults==1.0.57\", \"Microsoft-AI-Azure-Utility-Samples\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cd = CondaDependencies.create(conda_packages=conda_pack,\n",
+    "                              pip_packages=requirements)\n",
+    "run_config_system_managed.environment.python.conda_dependencies = cd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "#### Submit script to run in the system-managed environment\n",
+    "A new conda environment is built based on the conda dependencies object. If you are running this for the first time,  \n",
+    "this might take up to 5 minutes. But this conda environment is reused so long as you don't change the conda \n",
+    "dependencies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "src = ScriptRunConfig(\n",
+    "    source_directory=\"./script\",\n",
+    "    script=scrpt,\n",
+    "    arguments=args,\n",
+    "    run_config=run_config_system_managed,\n",
+    ")\n",
+    "run = exp.submit(src)\n",
+    "run"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "Block and wait till run finishes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run.get_file_names()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run.get_metrics()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "### Docker-based execution\n",
+    "**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is \n",
+    "already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
+    "\n",
+    "You can also ask the system to pull down a Docker image and execute your scripts in it. We will use the \n",
+    "`continuumio/miniconda3` image for that purpose."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run_config_docker = RunConfiguration()\n",
+    "run_config_docker.environment.python.user_managed_dependencies = False\n",
+    "run_config_docker.auto_prepare_environment = True\n",
+    "run_config_docker.environment.docker.enabled = True\n",
+    "run_config_docker.environment.docker.base_image = \"continuumio/miniconda3\"\n",
+    "\n",
+    "# Specify conda and pip dependencies\n",
+    "cd = CondaDependencies.create(conda_packages=conda_pack,\n",
+    "                              pip_packages=requirements)\n",
+    "run_config_docker.environment.python.conda_dependencies = cd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, we map the local `data_folder` that includes the training and testing data to the docker container using `-v` \n",
+    "flag."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "host_dir = os.path.abspath(DIRECTORY + \"/data_folder\")\n",
+    "container_dir = \"/data_folder\"\n",
+    "docker_arg = \"{}:{}\".format(host_dir, container_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This time the run will use the mapped `data_folder` inside the docker container to find the data files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "args = [\n",
+    "    \"--inputs\",\n",
+    "    \"/data_folder\",\n",
+    "    \"--outputs\",\n",
+    "    \"outputs\",\n",
+    "    \"--estimators\",\n",
+    "    num_estimators,\n",
+    "    \"--match\",\n",
+    "    \"5\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_config_docker.environment.docker.arguments.append(\"-v\")\n",
+    "run_config_docker.environment.docker.arguments.append(docker_arg)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "src = ScriptRunConfig(\n",
+    "    source_directory=\"./script\",\n",
+    "    script=scrpt,\n",
+    "    arguments=args,\n",
+    "    run_config=run_config_docker,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run = exp.submit(src)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run.wait_for_completion(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Register Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run.get_metrics()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Register Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We now register the model with the workspace so that we can later deploy the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# supply a model name, and the full path to the serialized model file.\n",
+    "model = run.register_model(model_name=\"question_match_model\",\n",
+    "                           model_path=\"./outputs/model.pkl\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(model.name, model.version, model.url, sep=\"\\n\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "authors": [
+   {
+    "name": "roastala"
+   }
+  ],
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/03_DevelopScoringScript.ipynb
+++ b/notebooks/03_DevelopScoringScript.ipynb
@ -0,0 +1,173 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License.\n",
+    "\n",
+    "# Develop Scoring Script\n",
+    "\n",
+    "In this notebook, we will develop the scoring script and test it locally. We will use the scoring script to create the \n",
+    "web service that will call the model for scoring."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import pandas as pd\n",
+    "\n",
+    "from azure_utils.machine_learning.utils import get_workspace_from_config\n",
+    "from azure_utils.utilities import text_to_json\n",
+    "from azureml.core.model import Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sys.path.append('./scripts/')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's load the workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the model registered earlier and download it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = 'question_match_model'\n",
+    "\n",
+    "model = Model(ws, name=model_name)\n",
+    "print(model.name, model.version, model.url, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.download(target_dir=\".\", exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Scoring Script"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use the writefile magic to write the contents of the below cell to `score.py` which includes the  `init` and `run` \n",
+    "functions required by AML.\n",
+    "- The init() function typically loads the model into a global object.\n",
+    "- The run(input_data) function uses the model to predict a value based on the input_data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile score.py\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "import json\n",
+    "import logging\n",
+    "import timeit as t\n",
+    "from azure_utils.machine_learning.duplicate_model import DuplicateModel\n",
+    "\n",
+    "def init():\n",
+    "    logger = logging.getLogger(\"scoring_script\")\n",
+    "    global model\n",
+    "    model_path = \"model.pkl\"\n",
+    "    questions_path = \"./data_folder/questions.tsv\"\n",
+    "    start = t.default_timer()\n",
+    "    model = DuplicateModel(model_path, questions_path)\n",
+    "    end = t.default_timer()\n",
+    "    loadTimeMsg = \"Model loading time: {0} ms\".format(\n",
+    "        round((end - start) * 1000, 2))\n",
+    "    logger.info(loadTimeMsg)\n",
+    "\n",
+    "\n",
+    "def run(body):\n",
+    "    logger = logging.getLogger(\"scoring_script\")\n",
+    "    json_load_text = json.loads(body)\n",
+    "    text_to_score = json_load_text[\"input\"]\n",
+    "    start = t.default_timer()\n",
+    "    resp = model.score(text_to_score)\n",
+    "    end = t.default_timer()\n",
+    "    logger.info(\"Prediction took {0} ms\".format(round((end - start) * 1000,\n",
+    "                                                      2)))\n",
+    "    return json.dumps(resp)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "source": [],
+    "metadata": {
+     "collapsed": false
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/04_CreateImage.ipynb
+++ b/notebooks/04_CreateImage.ipynb
@ -0,0 +1,398 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License.\n",
+    "\n",
+    "# Create Image\n",
+    "In this notebook, we show the following steps for deploying a web service using AzureML:\n",
+    "- Create an image\n",
+    "- Test image locally"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azure_utils.machine_learning.utils import load_configuration, get_workspace_from_config\n",
+    "from azure_utils.utilities import text_to_json\n",
+    "from azureml.core.conda_dependencies import CondaDependencies\n",
+    "from azureml.core.model import Model\n",
+    "from notebooks import DIRECTORY\n",
+    "\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "AML will use the following information to create an image, provision a cluster and deploy a service. Replace the \n",
+    "values in the following cell with your information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cfg = load_configuration(\"../workspace_conf.yml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    },
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "image_name = cfg['image_name']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Get workspace\n",
+    "Load existing workspace from the config file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "## Load model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = 'question_match_model'\n",
+    "\n",
+    "model = Model(ws, name=model_name)\n",
+    "print(model.name, model.version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create an image\n",
+    "We will now modify the `score.py` created in the previous notebook for the `init()` function to use the model we \n",
+    "registered to the workspace earlier."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile score.py\n",
+    "\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "import json\n",
+    "import logging\n",
+    "import timeit as t\n",
+    "from sklearn.externals import joblib\n",
+    "from azureml.core.model import Model\n",
+    "from azureml.contrib.services.aml_request import rawhttp\n",
+    "from sklearn.base import BaseEstimator, TransformerMixin\n",
+    "from azure_utils.machine_learning.duplicate_model import DuplicateModel\n",
+    "\n",
+    "sys.path.append('./scripts/')\n",
+    "\n",
+    "\n",
+    "def init():\n",
+    "    logger = logging.getLogger(\"scoring_script\")\n",
+    "    global model\n",
+    "    model_name = 'question_match_model'\n",
+    "    model_path = Model.get_model_path(model_name)\n",
+    "    questions_path = './notebooks/data_folder/questions.tsv'\n",
+    "    start = t.default_timer()\n",
+    "    model = DuplicateModel(model_path, questions_path)\n",
+    "    end = t.default_timer()\n",
+    "    loadTimeMsg = \"Model loading time: {0} ms\".format(\n",
+    "        round((end - start) * 1000, 2))\n",
+    "    logger.info(loadTimeMsg)\n",
+    "\n",
+    "\n",
+    "@rawhttp\n",
+    "def run(request):\n",
+    "    \"\"\"\n",
+    "    Function runs on each request\n",
+    "    \"\"\"\n",
+    "    body = request.data\n",
+    "    if request.method == 'POST':\n",
+    "        logger = logging.getLogger(\"scoring_script\")\n",
+    "        json_load_text = json.loads(body)\n",
+    "        text_to_score = json_load_text['input']\n",
+    "        start = t.default_timer()\n",
+    "        resp = model.score(text_to_score)\n",
+    "        end = t.default_timer()\n",
+    "        logger.info(\"Prediction took {0} ms\".format(\n",
+    "            round((end - start) * 1000, 2)))\n",
+    "        return (json.dumps(resp))\n",
+    "    if request.method == 'GET':\n",
+    "        resp_body = {\n",
+    "            \"azEnvironment\": \"Azure\",\n",
+    "            \"location\": \"westus2\",\n",
+    "            \"osType\": \"Ubuntu 16.04\",\n",
+    "            \"resourceGroupName\": \"\",\n",
+    "            \"resourceId\": \"\",\n",
+    "            \"sku\": \"\",\n",
+    "            \"subscriptionId\": \"\",\n",
+    "            \"uniqueId\": \"PythonMLRST\",\n",
+    "            \"vmSize\": \"\",\n",
+    "            \"zone\": \"\",\n",
+    "            \"isServer\": False,\n",
+    "            \"version\": \"\"\n",
+    "        }\n",
+    "        return (resp_body)\n",
+    "    return AMLResponse(\"bad request\", 500)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's specifiy the conda and pip dependencies for the image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conda_pack = [\"scikit-learn==0.19.1\", \"pandas==0.23.3\"]\n",
+    "requirements = [\n",
+    "    \"lightgbm==2.1.2\", \"azureml-defaults==1.0.57\", \"azureml-contrib-services\", \n",
+    "    \"Microsoft-AI-Azure-Utility-Samples\"\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lgbmenv = CondaDependencies.create(conda_packages=conda_pack,\n",
+    "                                   pip_packages=requirements)\n",
+    "\n",
+    "with open(\"lgbmenv.yml\", \"w\") as f:\n",
+    "    f.write(lgbmenv.serialize_to_string())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core.image import ContainerImage\n",
+    "\n",
+    "image_config = ContainerImage.image_configuration(\n",
+    "    execution_script=\"score.py\",\n",
+    "    runtime=\"python\",\n",
+    "    conda_file=\"lgbmenv.yml\",\n",
+    "    description=\"Image with lightgbm model\",\n",
+    "    tags={\n",
+    "        \"area\": \"text\",\n",
+    "        \"type\": \"lightgbm\"\n",
+    "    },\n",
+    "    dependencies=[\n",
+    "        \"./notebooks/data_folder/questions.tsv\"\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "image = ContainerImage.create(\n",
+    "    name=image_name,\n",
+    "    # this is the model object\n",
+    "    models=[model],\n",
+    "    image_config=image_config,\n",
+    "    workspace=ws,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "image.wait_for_creation(show_output=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(image.name, image.version)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_version = str(image.version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can find the logs of image creation in the following location."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image.image_build_log_uri"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test image locally"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now, let's use one of the duplicate questions to test our image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dupes_test_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
+    "dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
+    "text_to_score = dupes_test.iloc[0, 4]\n",
+    "text_to_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "json_text = text_to_json(text_to_score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "image.run(input_data=json_text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion\n",
+    "\n",
+    "We have created a docker Image using AzureML and registred this image on Azure Container Registry (ACR). This docker \n",
+    "image encapsulates a trained machine learning model and scoring scripts. In the next step, we can take this image \n",
+    "and deploy it on the compute target of your choice: Azure Kubernetes Service (AKS) Cluster or Azure IoT Edge."
+   ]
+  }
+ ],
+ "metadata": {
+  "authors": [
+   {
+    "name": "raymondl"
+   }
+  ],
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "source": [],
+    "metadata": {
+     "collapsed": false
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/05_DeployOnAKS.ipynb
+++ b/notebooks/05_DeployOnAKS.ipynb
@ -0,0 +1,646 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Deploying a web service to Azure Kubernetes Service (AKS)\n",
+    "In this notebook, we show the following steps for deploying a web service using AzureML:\n",
+    "- Provision an AKS cluster (one time action)\n",
+    "- Deploy the service\n",
+    "- Test the web service\n",
+    "- Scale up the service"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import requests\n",
+    "from azure_utils.machine_learning.utils import get_workspace_from_config\n",
+    "from azure_utils.machine_learning.utils import load_configuration\n",
+    "from azure_utils.utilities import text_to_json\n",
+    "from azureml.core.compute import AksCompute, ComputeTarget\n",
+    "from azureml.core.webservice import Webservice, AksWebservice\n",
+    "\n",
+    "from notebooks import DIRECTORY"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "AML will use the following information to create an image, provision a cluster and deploy a service. Replace the \n",
+    "values in the following cell with your information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cfg = load_configuration(\"../workspace_conf.yml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "image_name = cfg['image_name']\n",
+    "aks_service_name = cfg['aks_service_name']\n",
+    "aks_name = cfg['aks_name']\n",
+    "aks_location = cfg['workspace_region']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get workspace\n",
+    "Load existing workspace from the config file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image = ws.images[image_name]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Restore the statistics data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Provision the AKS Cluster\n",
+    "This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete \n",
+    "the cluster or the resource group that contains it, then you would have to recreate it. Let's first check if there are \n",
+    "enough cores in the subscription for the cluster ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vm_family = \"Dv2\"\n",
+    "vm_size = \"Standard_D4_v2\"\n",
+    "vm_cores = 8\n",
+    "node_count = 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vm_dict = {vm_family: {\"size\": vm_size, \"cores\": vm_cores}}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prov_config = AksCompute.provisioning_configuration(agent_count=node_count,\n",
+    "                                                    vm_size=vm_size,\n",
+    "                                                    location=aks_location)\n",
+    "\n",
+    "# Create the cluster\n",
+    "aks_target = ComputeTarget.create(workspace=ws,\n",
+    "                                  name=aks_name,\n",
+    "                                  provisioning_configuration=prov_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "aks_target.wait_for_completion(show_output=True)\n",
+    "print(aks_target.provisioning_state)\n",
+    "print(aks_target.provisioning_errors)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check that the cluster is created successfully."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_status = aks_target.get_status()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assert aks_status == 'Succeeded', 'AKS failed to create'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Deploy web service to AKS"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "source": [
+    "Next, we deploy the web service. We deploy two pods with 1 CPU core each."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_replicas = 2\n",
+    "cpu_cores = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Set the web service configuration\n",
+    "aks_config = AksWebservice.deploy_configuration(num_replicas=num_replicas,\n",
+    "                                                cpu_cores=cpu_cores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_service = Webservice.deploy_from_image(\n",
+    "    workspace=ws,\n",
+    "    name=aks_service_name,\n",
+    "    image=image,\n",
+    "    deployment_config=aks_config,\n",
+    "    deployment_target=aks_target,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "aks_service.wait_for_deployment(show_output=True)\n",
+    "print(aks_service.state)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can check the logs of the web service with the below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_service.get_logs()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test the web service\n",
+    "We now test the web service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_dupes_to_score = 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dupes_test_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
+    "dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
+    "text_to_score = dupes_test.iloc[0, num_dupes_to_score]\n",
+    "text_to_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "json_text = text_to_json(text_to_score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "prediction = aks_service.run(input_data=json_text)\n",
+    "print(prediction)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's try a few more duplicate questions and display their top 3 original matches. Let's first get the scoring URL \n",
+    "and API key for the web service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_url = aks_service.scoring_uri\n",
+    "api_key = aks_service.get_keys()[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Write the URI and key to the statistics tracker."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "headers = {\n",
+    "    'content-type': 'application/json',\n",
+    "    'Authorization': ('Bearer ' + api_key)\n",
+    "}\n",
+    "r = requests.post(\n",
+    "    scoring_url, data=json_text,\n",
+    "    headers=headers)  # Run the request twice since the first time takes a\n",
+    "%time r = requests.post(scoring_url, data=json_text, headers=headers) # little longer due to the loading of the model\n",
+    "print(r)\n",
+    "r.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dupes_to_score = dupes_test.iloc[:5, num_dupes_to_score]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = [\n",
+    "    requests.post(scoring_url, data=text_to_json(text), headers=headers)\n",
+    "    for text in dupes_to_score\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's print top 3 matches for each duplicate question."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[eval(results[i].json())[0:3] for i in range(0, len(results))]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next let's quickly check what the request response performance is for the deployed model on AKS cluster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text_data = list(map(text_to_json, dupes_to_score))  # Retrieve the text data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timer_results = list()\n",
+    "for text in text_data:\n",
+    "    res=%timeit -r 1 -o -q requests.post(scoring_url, data=text, headers=headers)\n",
+    "    timer_results.append(res.best)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timer_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Average time taken: {0:4.2f} ms\".format(10 ** 3 * np.mean(timer_results)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Scaling"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this part, we scale the number of pods to make sure we fully utilize the AKS cluster. To connect to the Kubernetes \n",
+    "cluster, we will use kubectl, the Kubernetes command-line client. To install, run the following:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!sudo az aks install-cli"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we will get the credentials to connect to the cluster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "os.makedirs(os.path.join(os.path.expanduser('~'),'.kube'), exist_ok=True) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config_path = os.path.join(os.path.expanduser('~'),'.kube/config')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(config_path, 'a') as f:\n",
+    "    f.write(aks_target.get_credentials()['userKubeConfig'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's check the nodes and pods of the cluster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get pods --all-namespaces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get events"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can now scale up the number of pods."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_num_replicas = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get namespaces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl scale --current-replicas=$num_replicas \\\n",
+    "    --replicas=$new_num_replicas {\"deployments/\" + aks_service_name} \\\n",
+    "    --namespace azureml-workspace"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get pods --all-namespaces"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kubectl get deployment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we will test the [throughput of the web service](06_SpeedTestWebApp.ipynb)."
+   ]
+  }
+ ],
+ "metadata": {
+  "authors": [
+   {
+    "name": "raymondl"
+   }
+  ],
+  "celltoolbar": "Tags",
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "source": [],
+    "metadata": {
+     "collapsed": false
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/06_SpeedTestWebApp.ipynb
+++ b/notebooks/06_SpeedTestWebApp.ipynb
@ -0,0 +1,250 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load Test deployed web application"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook pulls some images and tests them against the deployed web application. We submit requests asychronously \n",
+    "which should reduce the contribution of latency."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib.parse import urlparse\n",
+    "\n",
+    "from azure_utils.machine_learning.utils import get_workspace_from_config\n",
+    "from azureml.core.webservice import AksWebservice\n",
+    "from dotenv import get_key"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the web service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_service_name = get_key(env_path, 'aks_service_name')\n",
+    "aks_service = AksWebservice(ws, name=aks_service_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will test our service concurrently but only have 4 concurrent requests at any time. We have only deployed one pod \n",
+    "on one node and increasing the number of concurrent calls does not really increase throughput. Feel free to try \n",
+    "different values and see how the service responds."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "CONCURRENT_REQUESTS = 4  # Number of requests at a time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get the scoring URL and API key of the service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_url = aks_service.scoring_uri\n",
+    "api_key = aks_service.get_keys()[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below we are going to use [Locust](https://locust.io/) to load test our deployed model. First we need to write the \n",
+    "locustfile."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%writefile locustfile.py\n",
+    "from locust import HttpLocust, TaskSet, task\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from utilities import text_to_json\n",
+    "from itertools import cycle\n",
+    "\n",
+    "_NUMBER_OF_REQUESTS = os.getenv('NUMBER_OF_REQUESTS', 100)\n",
+    "dupes_test_path = './data_folder/dupes_test.tsv'\n",
+    "dupes_test = pd.read_csv(dupes_test_path, sep='\\t', encoding='latin1')\n",
+    "dupes_to_score = dupes_test.iloc[:_NUMBER_OF_REQUESTS, 4]\n",
+    "_SCORE_PATH = os.getenv('SCORE_PATH', \"/score\")\n",
+    "_API_KEY = os.getenv('API_KEY')\n",
+    "\n",
+    "\n",
+    "class UserBehavior(TaskSet):\n",
+    "    def on_start(self):\n",
+    "        print('Running setup')\n",
+    "        self._text_generator = cycle(dupes_to_score.apply(text_to_json))\n",
+    "        self._headers = {\n",
+    "            \"content-type\": \"application/json\",\n",
+    "            'Authorization': ('Bearer {}'.format(_API_KEY))\n",
+    "        }\n",
+    "\n",
+    "    @task\n",
+    "    def score(self):\n",
+    "        self.client.post(_SCORE_PATH,\n",
+    "                         data=next(self._text_generator),\n",
+    "                         headers=self._headers)\n",
+    "\n",
+    "\n",
+    "class WebsiteUser(HttpLocust):\n",
+    "    task_set = UserBehavior\n",
+    "    # min and max time to wait before repeating task\n",
+    "    min_wait = 10\n",
+    "    max_wait = 200"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below we define the locust command we want to run. We are going to run at a hatch rate of 10 and the whole test will \n",
+    "last 1 minute. Feel free to adjust the parameters below and see how the results differ. The results of the test will \n",
+    "be saved to two csv files **modeltest_requests.csv** and **modeltest_distribution.csv**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "parsed_url = urlparse(scoring_url)\n",
+    "cmd = \"locust -H {host} --no-web -c {users} -r {rate} -t {duration} --csv=modeltest --only-summary\".format(\n",
+    "    host=\"{url.scheme}://{url.netloc}\".format(url=parsed_url),\n",
+    "    users=CONCURRENT_REQUESTS,  # concurrent users\n",
+    "    rate=10,  # hatch rate (users / second)\n",
+    "    duration='1m',  # test duration\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "! API_KEY={api_key} SCORE_PATH={parsed_url.path} PYTHONPATH={os.path.abspath('../')} {cmd}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here are the summary results of our test and below that the distribution infromation of those tests. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.read_csv(\"modeltest_requests.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.read_csv(\"modeltest_distribution.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To tear down the cluster and all related resources go to the [tear down the cluster](07_TearDown.ipynb) notebook."
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb"
+  },
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/07_RealTimeScoring.ipynb
+++ b/notebooks/07_RealTimeScoring.ipynb
@ -0,0 +1,741 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": false
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "# Explore Duplicate Question Matches\n",
+    "Use this dashboard to explore the relationship between duplicate and original questions."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "## Setup\n",
+    "This section loads needed packages, and defines useful functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from __future__ import print_function\n",
+    "\n",
+    "import math\n",
+    "\n",
+    "import ipywidgets as widgets\n",
+    "import pandas as pd\n",
+    "import requests\n",
+    "from azureml.core.webservice import AksWebservice\n",
+    "from azureml.core.workspace import Workspace\n",
+    "from dotenv import get_key, find_dotenv\n",
+    "from azure_utils.machine_learning.utils import get_workspace_from_config\n",
+    "from azure_utils.utilities import read_questions, text_to_json, get_auth\n",
+    "from notebooks import DIRECTORY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = get_workspace_from_config()\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_service_name = get_key(env_path, 'aks_service_name')\n",
+    "aks_service = AksWebservice(ws, name=aks_service_name)\n",
+    "aks_service.name"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the duplicate questions scoring app's URL."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scoring_url = aks_service.scoring_uri\n",
+    "api_key = aks_service.get_keys()[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "A constructor function for ID-text contents. Constructs buttons and text areas for each text ID and text passage.\n",
+    "* Each buttons's description is set to a text's ID, and its click action is set to the handler.\n",
+    "* Each text area's content is set to a text.\n",
+    "* A dictionary is created to map IDs to text areas."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def buttons_and_texts(data,\n",
+    "                      id,\n",
+    "                      answerid,\n",
+    "                      text,\n",
+    "                      handle_click,\n",
+    "                      layout=widgets.Layout(width=\"100%\"),\n",
+    "                      n=15):\n",
+    "    \"\"\"Construct buttons, text areas, and a mapping from IDs to text areas.\"\"\"\n",
+    "    items = []\n",
+    "    text_map = {}\n",
+    "    for i in range(min(n, len(data))):\n",
+    "        button = widgets.Button(description=data.iloc[i][id])\n",
+    "        button.answerid = data.iloc[i][answerid] if answerid in data else None\n",
+    "        button.open = False\n",
+    "        button.on_click(handle_click)\n",
+    "        items.append(button)\n",
+    "        text_area = widgets.Textarea(data.iloc[i][text],\n",
+    "                                     placeholder=data.iloc[i][id],\n",
+    "                                     layout=layout)\n",
+    "        items.append(text_area)\n",
+    "        text_map[data.iloc[i][id]] = text_area\n",
+    "    return items, text_map"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "A constructor function for the duplicates and questions explorer widget. This builds a box containing duplicates and \n",
+    "question tabs, each in turn containing boxes that contain the buttons and text areas."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def duplicates_questions_widget(duplicates,\n",
+    "                                questions,\n",
+    "                                layout=widgets.Layout(width=\"100%\")):\n",
+    "    \"\"\"Construct a duplicates and questions exploration widget.\"\"\"\n",
+    "    # Construct the duplicates Tab of buttons and text areas.\n",
+    "    duplicates_items, duplicates_map = buttons_and_texts(\n",
+    "        duplicates,\n",
+    "        duplicates_id,\n",
+    "        duplicates_answerid,\n",
+    "        duplicates_text,\n",
+    "        duplicates_click,\n",
+    "        n=duplicates.shape[0],\n",
+    "    )\n",
+    "    duplicates_tab = widgets.Tab(\n",
+    "        [widgets.VBox(duplicates_items, layout=layout)],\n",
+    "        layout=widgets.Layout(width=\"100%\", height=\"500px\", overflow_y=\"auto\"),\n",
+    "    )\n",
+    "    duplicates_tab.set_title(0, duplicates_title)\n",
+    "    # Construct the questions Tab of buttons and text areas.\n",
+    "    questions_items, questions_map = buttons_and_texts(\n",
+    "        questions,\n",
+    "        questions_id,\n",
+    "        questions_answerid,\n",
+    "        questions_text,\n",
+    "        questions_click,\n",
+    "        n=questions.shape[0],\n",
+    "    )\n",
+    "    questions_tab = widgets.Tab(\n",
+    "        [widgets.VBox(questions_items, layout=layout)],\n",
+    "        layout=widgets.Layout(width=\"100%\", height=\"500px\", overflow_y=\"auto\"),\n",
+    "    )\n",
+    "    questions_tab.set_title(0, questions_title)\n",
+    "    # Put both tabs in an HBox.\n",
+    "    duplicates_questions = widgets.HBox([duplicates_tab, questions_tab],\n",
+    "                                        layout=layout)\n",
+    "    return duplicates_map, questions_map, duplicates_questions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "A handler function for a question passage button press. If the passage's text window is open, it is collapsed. \n",
+    "Otherwise, it is opened."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def questions_click(button):\n",
+    "    \"\"\"Respond to a click on a question button.\"\"\"\n",
+    "    global questions_map\n",
+    "    if button.open:\n",
+    "        questions_map[button.description].rows = None\n",
+    "        button.open = False\n",
+    "    else:\n",
+    "        questions_map[button.description].rows = 10\n",
+    "        button.open = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "A handler function for a duplicate obligation button press. If the obligation is not selected, select it and update \n",
+    "the questions tab with its top 15 question passages ordered by match score. Otherwise, if the duplicate's text window \n",
+    "is open, it is collapsed, else it is opened."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def duplicates_click(button):\n",
+    "    \"\"\"Respond to a click on a duplicate button.\"\"\"\n",
+    "    global duplicates_map\n",
+    "    if select_duplicate(button):\n",
+    "        duplicates_map[button.description].rows = 10\n",
+    "        button.open = True\n",
+    "    else:\n",
+    "        if button.open:\n",
+    "            duplicates_map[button.description].rows = None\n",
+    "            button.open = False\n",
+    "        else:\n",
+    "            duplicates_map[button.description].rows = 10\n",
+    "            button.open = True\n",
+    "\n",
+    "\n",
+    "def select_duplicate(button):\n",
+    "    \"\"\"Update the displayed questions to correspond to the button's duplicate\n",
+    "    selections. Returns whether or not the selected duplicate changed.\n",
+    "    \"\"\"\n",
+    "    global selected_button, questions_map, duplicates_questions\n",
+    "    if \"selected_button\" not in globals() or button != selected_button:\n",
+    "        if \"selected_button\" in globals():\n",
+    "            selected_button.style.button_color = None\n",
+    "            selected_button.style.font_weight = \"\"\n",
+    "        selected_button = button\n",
+    "        selected_button.style.button_color = \"yellow\"\n",
+    "        selected_button.style.font_weight = \"bold\"\n",
+    "        duplicates_text = duplicates_map[selected_button.description].value\n",
+    "        questions_scores = score_text(duplicates_text)\n",
+    "        ordered_questions = questions.loc[questions_scores[questions_id]]\n",
+    "        questions_items, questions_map = buttons_and_texts(\n",
+    "            ordered_questions,\n",
+    "            questions_id,\n",
+    "            questions_answerid,\n",
+    "            questions_text,\n",
+    "            questions_click,\n",
+    "            n=questions_display,\n",
+    "        )\n",
+    "        if questions_button_color is True and selected_button.answerid is not None:\n",
+    "            set_button_color(questions_items[::2], selected_button.answerid)\n",
+    "        if questions_button_score is True:\n",
+    "            questions_items = [\n",
+    "                item for button, text_area in zip(*[iter(questions_items)] * 2)\n",
+    "                for item in (add_button_prob(button, questions_scores),\n",
+    "                             text_area)\n",
+    "            ]\n",
+    "        duplicates_questions.children[1].children[0].children = questions_items\n",
+    "        duplicates_questions.children[1].set_title(0,\n",
+    "                                                   selected_button.description)\n",
+    "        return True\n",
+    "    else:\n",
+    "        return False\n",
+    "\n",
+    "\n",
+    "def add_button_prob(button, questions_scores):\n",
+    "    \"\"\"Return an HBox containing button and its probability.\"\"\"\n",
+    "    id = button.description\n",
+    "    prob = widgets.Label(score_label + \": \" + str(\n",
+    "        int(\n",
+    "            math.ceil(score_scale *\n",
+    "                      questions_scores.loc[id][questions_probability]))))\n",
+    "    return widgets.HBox([button, prob])\n",
+    "\n",
+    "\n",
+    "def set_button_color(button, answerid):\n",
+    "    \"\"\"Set each button's color according to its label.\"\"\"\n",
+    "    for i in range(len(button)):\n",
+    "        button[i].style.button_color = (\n",
+    "            \"lightgreen\" if button[i].answerid == answerid else None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Functions for interacting with the web service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def score_text(text):\n",
+    "    \"\"\"Return a data frame with the original question scores for the text.\"\"\"\n",
+    "    headers = {\n",
+    "        \"content-type\": \"application/json\",\n",
+    "        \"Authorization\": (\"Bearer \" + api_key),\n",
+    "    }\n",
+    "    #     jsontext = json.dumps({'input':'{0}'.format(text)})\n",
+    "    jsontext = text_to_json(text)\n",
+    "    result = requests.post(scoring_url, data=jsontext, headers=headers)\n",
+    "    #     scores = result.json()['result'][0]\n",
+    "    scores = eval(result.json())\n",
+    "    scores_df = pd.DataFrame(\n",
+    "        scores,\n",
+    "        columns=[questions_id, questions_answerid, questions_probability])\n",
+    "    scores_df[questions_id] = scores_df[questions_id].astype(str)\n",
+    "    scores_df[questions_answerid] = scores_df[questions_answerid].astype(str)\n",
+    "    scores_df = scores_df.set_index(questions_id, drop=False)\n",
+    "    return scores_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "Control the appearance of cell output boxes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%html\n",
+    "<style>\n",
+    ".output_wrapper, .output {\n",
+    "    height:auto !important;\n",
+    "    max-height:1000px;  /* your desired max-height here */\n",
+    "}\n",
+    ".output_scroll {\n",
+    "    box-shadow:none !important;\n",
+    "    webkit-box-shadow:none !important;\n",
+    "}\n",
+    "</style>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "## Load data\n",
+    "\n",
+    "Load the pre-formatted text of questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "questions_title = 'Questions'\n",
+    "questions_id = 'Id'\n",
+    "questions_answerid = 'AnswerId'\n",
+    "questions_text = 'Text'\n",
+    "questions_probability = 'Probability'\n",
+    "questions_path = DIRECTORY + '/data_folder/questions.tsv'\n",
+    "questions = read_questions(questions_path, questions_id, questions_answerid)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "Load the pre-formatted text of duplicates."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "duplicates_title = 'Duplicates'\n",
+    "duplicates_id = 'Id'\n",
+    "duplicates_answerid = 'AnswerId'\n",
+    "duplicates_text = 'Text'\n",
+    "duplicates_path = DIRECTORY + '/data_folder/dupes_test.tsv'\n",
+    "duplicates = read_questions(duplicates_path, duplicates_id, duplicates_answerid)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": false
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "## Explore original questions matched up with duplicate questions\n",
+    "\n",
+    "Define other variables and settings used in creating the interface."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "questions_display = 15\n",
+    "questions_button_color = True\n",
+    "questions_button_score = True\n",
+    "score_label = 'Score'\n",
+    "score_scale = 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": true
+       }
+      }
+     }
+    }
+   },
+   "source": [
+    "This builds the exploration widget as a box containing duplicates and question tabs, each in turn containing boxes \n",
+    "that have for each ID-text pair a button and a text area."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "extensions": {
+     "jupyter_dashboards": {
+      "version": 1,
+      "views": {
+       "grid_default": {},
+       "report_default": {
+        "hidden": false
+       }
+      }
+     }
+    }
+   },
+   "outputs": [],
+   "source": [
+    "duplicates_map, questions_map, duplicates_questions = duplicates_questions_widget(duplicates, questions)\n",
+    "duplicates_questions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To tear down the cluster and related resources go to the [last notebook](08_TearDown.ipynb)."
+   ]
+  }
+ ],
+ "metadata": {
+  "extensions": {
+   "jupyter_dashboards": {
+    "activeView": "report_default",
+    "version": 1,
+    "views": {
+     "grid_default": {
+      "name": "grid",
+      "type": "grid"
+     },
+     "report_default": {
+      "name": "report",
+      "type": "report"
+     }
+    }
+   }
+  },
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/08_TearDown.ipynb
+++ b/notebooks/08_TearDown.ipynb
@ -0,0 +1,246 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+    "\n",
+    "Licensed under the MIT License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tear it all down\n",
+    "Use this notebook to clean up the web service, image, model and the AKS cluster created by the tutorial."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from azureml.core.compute import AksCompute\n",
+    "from azureml.core.image import Image\n",
+    "from azureml.core.model import Model\n",
+    "from azureml.core.webservice import AksWebservice\n",
+    "from azureml.core.workspace import Workspace\n",
+    "from dotenv import get_key, find_dotenv\n",
+    "from azure_utils.utilities import get_auth"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env_path = find_dotenv(raise_error_if_not_found=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's get the workspace information."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ws = Workspace.from_config(auth=get_auth(env_path))\n",
+    "print(ws.name, ws.resource_group, ws.location, sep=\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the web service to delete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_service_name = get_key(env_path, 'aks_service_name')\n",
+    "aks_service = AksWebservice(ws, name=aks_service_name)\n",
+    "print(aks_service.name, aks_service.tags)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the image to delete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "image_name = get_key(env_path, 'image_name')\n",
+    "image_version = int(get_key(env_path, 'image_version'))\n",
+    "image = Image(ws, name=image_name, version=image_version)\n",
+    "print(image.name, image.version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the model to delete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = 'question_match_model'\n",
+    "model_version = int(get_key(env_path, 'model_version'))\n",
+    "model = Model(ws, name=model_name, version=model_version)\n",
+    "print(model.name, model.version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's retrieve the AKS compute to delete."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aks_name = get_key(env_path, 'aks_name')\n",
+    "aks_target = AksCompute(ws, name=aks_name)\n",
+    "print(aks_target.name, aks_target.get_status())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Delete the service, image and model. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "aks_service.delete()\n",
+    "image.delete()\n",
+    "model.delete()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's delete the AKS compute from the workspace. Since we created the cluster through AML, the corresponding cloud \n",
+    "based objects will also be deleted. If the custer was created externally and attached to the workspace, the below \n",
+    "would raise an exception and nothing will be changed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "aks_target.delete()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you also would like to delete the workspace and all experiments in it, you can use the following."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "ws.delete()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, you can delete the resource group with the following."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resource_group = get_key(env_path, 'resource_group')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!az group delete --yes --name $resource_group"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "az-ml-realtime-score",
+   "language": "python",
+   "name": "az-ml-realtime-score"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/Makefile
+++ b/notebooks/Makefile
@ -0,0 +1,194 @@
+.ONESHELL:
+SHELL=/bin/bash
+
+define PROJECT_HELP_MSG
+Makefile for testing notebooks
+Make sure you have edited the dev_env_template files and renamed it to .dev_env
+All the variables loaded in this makefile must come from the .dev_env file
+
+Usage:
+	make test					run all notebooks
+	make clean					delete env and remove files
+endef
+export PROJECT_HELP_MSG
+env_location=.dev_env
+PWD:=$(shell pwd)
+include ${env_location}
+
+
+help:
+	echo "$$PROJECT_HELP_MSG" | less
+
+
+test: setup test-notebook1 test-notebook2 test-notebook3 test-notebook4 test-notebook5 test-notebook6 test-notebook7 \
+	test-notebook8 test-notebook-iot1 test-notebook9 test-notebook-iot2
+	@echo All Notebooks Passed
+
+setup:
+	conda env create -f environment.yml
+ifndef TENANT_ID
+	@echo starting interactive login
+	az login -o table
+	az account set --subscription ${SUBSCRIPTION_ID}
+else
+	@echo using service principal login
+	az login -t ${TENANT_ID} --service-principal -u ${SP_USERNAME} --password ${SP_PASSWORD}
+endif
+
+
+test-notebook1:
+	source activate MLAKSDeployAML
+	@echo Testing 00_AMLConfiguration.ipynb
+	papermill 00_AMLConfiguration.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3 \
+		-p subscription_id ${SUBSCRIPTION_ID} \
+		-p resource_group ${RESOURCE_GROUP} \
+		-p workspace_name ${WORKSPACE_NAME} \
+		-p workspace_region ${WORKSPACE_REGION} \
+		-p image_name ${IMAGE_NAME} \
+
+test-notebook2:
+	source activate MLAKSDeployAML
+	@echo Testing 01_DataPrep.ipynb
+	papermill 01_DataPrep.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+
+test-notebook3:
+	source activate MLAKSDeployAML
+	@echo Testing 02_TrainOnLocal.ipynb
+	papermill 02_TrainOnLocal.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+
+test-notebook4:
+	source activate MLAKSDeployAML
+	@echo Testing 03_DevelopScoringScript.ipynb
+	papermill 03_DevelopScoringScript.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+	sleep 1m
+
+test-notebook5:
+	source activate MLAKSDeployAML
+	@echo Testing 04_CreateImage.ipynb
+	papermill 04_CreateImage.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+	sleep 30
+
+test-notebook6:
+	source activate MLAKSDeployAML
+	@echo Testing 05_DeployOnAKS.ipynb
+	papermill aks/05_DeployOnAKS.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3 \
+		-p aks_name ${AKS_NAME} \
+		-p aks_location ${WORKSPACE_REGION} \
+		-p aks_service_name ${AKS_SERVICE_NAME}
+
+test-notebook7:
+	source activate MLAKSDeployAML
+	@echo Testing 06_SpeedTestWebApp.ipynb
+	papermill aks/06_SpeedTestWebApp.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+        
+test-notebook8:
+	source activate MLAKSDeployAML
+	@echo Testing 07_RealTimeScoring.ipynb
+	papermill aks/07_RealTimeScoring.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+        
+
+test-notebook-iot1:
+	source activate MLAKSDeployAML
+	@echo Testing 05_DeployOnIOTedge.ipynb
+	export PYTHONPATH=${PWD}:${PYTHONPATH}
+	cd iotedge
+	mkdir ./data_folder
+	cp ../data_folder/dupes_test.tsv ./data_folder
+	papermill 05_DeployOnIOTedge.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3 \
+		-p iot_hub_name fstlstnameiothub \
+		-p device_id mydevice \
+		-p module_name mymodule
+
+test-notebook9:
+	source activate MLAKSDeployAML
+	@echo Testing 08_TearDown.ipynb
+	papermill aks/08_TearDown.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+
+test-notebook-iot2:
+	source activate MLAKSDeployAML
+	@echo Testing 06_TearDown.ipynb
+	export PYTHONPATH=${PWD}:${PYTHONPATH}
+	papermill iotedge/06_TearDown.ipynb test.ipynb \
+		--log-output \
+		--no-progress-bar \
+		-k python3
+
+
+test-cookiecutter-aks:
+	cookiecutter --no-input https://github.com/Microsoft/MLAKSDeployAML.git --checkout yzhang \
+							subscription_id="${SUBSCRIPTION_ID}" \
+							workspace_region=${WORKSPACE_REGION} \
+							deployment_type="aks"
+
+test-cookiecutter-iot:
+	cookiecutter --no-input https://github.com/Microsoft/MLAKSDeployAML.git --checkout yzhang \
+							subscription_id=${SUBSCRIPTION_ID} \
+							workspace_region=${WORKSPACE_REGION} \
+							deployment_type="iotedge"
+
+remove-notebook:
+	rm -f test.ipynb
+
+clean: remove-notebook
+	conda remove --name MLAKSDeployAML -y --all
+	rm -rf aml_config
+	rm -rf __pycache__
+	rm -rf .ipynb_checkpoints
+	rm -rf data_folder
+	rm -rf azureml-models
+	rm -rf score.py lgbmenv.yml model.pkl
+	rm -rf iotedge/deployment.json iotedge/deviceconfig.sh
+	rm -rf iotedge/data_folder
+
+notebook:
+	source activate MLAKSDeployAML
+	jupyter notebook --port 9999 --ip 0.0.0.0 --no-browser
+
+install-jupytext:
+	source activate MLAKSDeployAML
+	conda install -c conda-forge jupytext
+
+convert-to-py:
+	jupytext --set-formats ipynb,py_scripts//py --sync *.ipynb
+
+sync:
+	jupytext --sync *.ipynb
+
+convert-to-ipynb:
+	jupytext --set-formats ipynb *.ipynb
+
+remove-py:
+	rm -r py_scripts
+
+.PHONY: help test setup clean remove-notebook test-notebook1 test-notebook2 test-notebook3 test-notebook4 \
+		test-notebook5 test-notebook6 test-notebook7 test-notebook8 test-notebook-iot1 test-notebook9 test-notebook-iot2
--- a/notebooks/init.py
+++ b/notebooks/init.py
@ -0,0 +1,9 @@
+"""
+az-ml-realtime-score - __init__.py
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
+import os
+DIRECTORY = os.path.dirname(os.path.realpath(__file__))
+WORKING_DIRECTORY = os.getcwd()
--- a/notebooks/dev_env_template
+++ b/notebooks/dev_env_template
@ -0,0 +1,12 @@
+# Fill in the fields below and rename to .dev_env
+# TENANT_ID, SP_USERNAME and SP_PASSWORD are optional. If not supplied Azure cli will default to interactive login
+TENANT_ID=
+SP_USERNAME=
+SP_PASSWORD=
+SUBSCRIPTION_ID=
+RESOURCE_GROUP="deployrg"
+WORKSPACE_NAME="workspace"
+WORKSPACE_REGION="eastus"
+IMAGE_NAME="deployimg"
+AKS_NAME="deployaks"
+AKS_SERVICE_NAME="deployservice"
--- a/pytest.ini
+++ b/pytest.ini
@ -0,0 +1,2 @@
+[pytest]
+junit_family=xunit1
--- a/sample_workspace_conf.yml
+++ b/sample_workspace_conf.yml
@ -0,0 +1,11 @@
+subscription_id: "<>"
+resource_group: "<>"
+workspace_name: "<>"
+workspace_region: "<>"
+
+image_name: "<>"
+aks_service_name: "<>"
+aks_name: "<>"
+aks_location: "<>"
+
+storage_conn_string: "<>"
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1,6 @@
+"""
+ai-architecture-template - __init__.py
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
--- a/tests/test_notebooks.py
+++ b/tests/test_notebooks.py
@ -0,0 +1,28 @@
+"""
+ai-architecture-template - test_notebooks.py
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
+
+import glob
+
+import pytest
+
+from azure_utils.dev_ops.testing_utilities import run_notebook
+from notebooks import DIRECTORY
+
+
+@pytest.mark.parametrize(
+    "notebook",
+    [
+        DIRECTORY + "/00_AMLConfiguration.ipynb",
+        DIRECTORY + "/01_DataPrep.ipynb",
+        DIRECTORY + "/02_TrainOnLocal.ipynb",
+        DIRECTORY + "/03_DevelopScoringScript.ipynb",
+        DIRECTORY + "/04_CreateImage.ipynb",
+        DIRECTORY + "/05_DeployOnAKS.ipynb"
+    ]
+)
+def test_notebook(notebook, add_nunit_attachment):
+    run_notebook(notebook, add_nunit_attachment, kernel_name="az-ml-realtime-score", root=DIRECTORY)