updated for workshop lab

2023-01-15 18:00:51 -05:00 · 2023-01-15 18:00:51 -05:00 · 956baea111
--- a/.gitignore
+++ b/.gitignore
@ -1,141 +0,0 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # Mac stuff
 .DS_Store
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 parts/
 sdist/
 var/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # Terraform
 .terraform.lock.hcl
 terraform.tfstate
 terraform.tfstate.backup
 .terraform.tfstate.lock.info
 .terraform
 terraform.tfvars
 /infrastructure/bicep/main.json
 ! /infrastructure/bicep/bicepconfig.json
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,14 +0,0 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.2.0
    hooks:
    -   id: check-yaml
    -   id: end-of-file-fixer
    -   id: trailing-whitespace
    # Opinionated code formatter to forget about formatting
 -   repo: https://github.com/psf/black
    rev: 21.12b0
    hooks:
    -   id: black
        additional_dependencies: ['click==8.0.4']
--- a/ci-cd/.amlignore
+++ b/ci-cd/.amlignore
@ -0,0 +1,6 @@
 ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove. 
 ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
 .ipynb_aml_checkpoints/ 
 *.amltmp 
 *.amltemp
--- a/ci-cd/.amlignore.amltmp
+++ b/ci-cd/.amlignore.amltmp
@ -0,0 +1,6 @@
 ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove. 
 ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
 .ipynb_aml_checkpoints/ 
 *.amltmp 
 *.amltemp
--- a/ci-cd/README.md
+++ b/ci-cd/README.md
@ -0,0 +1,33 @@
 # Azure MLOps (v2) CI/CD Example
 This is sample repo to create automated CI/CD process using Azure Pipelines or Github Actions.
 ## Creating CI/CD with Azure Pipelines
 ### Create CI using Azure Pipeline Build Pipeline
 Following instructions to create CI pipeline for training:
 <https://learn.microsoft.com/en-us/azure/machine-learning/how-to-devops-machine-learning>
 ### Create CD using Azure Pipeline Release Pipeline
 1. Install Machine Learning for Azure Pipelines
 ![Install Machine Learning Extension for Azure Pipelines](./images/Install_ML_Extension.jpg)
 2. Create a Release pipeline triggered by Azure Machine Learning Model Registry<br/>
    2.1 Add following to your release pipeline:<br />
    - Azure Machine Learning Registry <br/>
    - Inference Repo to Artifacts <br/>
    - Add stages <br/>
 ![Create Release Pipeline](./images/Create_Release_Pipeline.jpg)    
    2.2 Add Azure CLI task for preparing environment <br/>
 ![Install CLI](./images/install_ML_cli.jpg)  
    2.3 Add Azure CLI task for model deployment
    ![Deploy Pipeline](./images/deploy_pipeline.jpg)  
    2.4 Enable trigger - Continuous Deployment
    ![Configure Continuous Deployment](./images/continous_deployment.jpg)  
    2.5 Predeployment Approal<br />
    ![Predeployment Approval](./images/predeploy_approval.jpg)  
 ## Createing CD + CD with Github Actions
 <https://learn.microsoft.com/en-us/azure/machine-learning/how-to-github-actions-machine-learning?tabs=userlevel>
--- a/ci-cd/azure-pipelines/cli/train.yml
+++ b/ci-cd/azure-pipelines/cli/train.yml
@ -0,0 +1,30 @@
 trigger:
 - main
 pool:
  vmImage: ubuntu-latest
 steps:
 - task: UsePythonVersion@0
  inputs:
    versionSpec: '3.8'
 - script: pip install -r ci-cd/azure-pipelines/dev-requirements.txt
  displayName: 'pip install notebook reqs'
 - task: Bash@3
  inputs:
    filePath: 'ci-cd/azure-pipelines/setup-sdk.sh'
  displayName: 'set up sdk'
 - task: Bash@3
  inputs:
    filePath: 'ci-cd/azure-pipelines/setup-cli.sh'
  displayName: 'set up CLI'
 - task: AzureCLI@2
  inputs:
    azureSubscription: 'azureml-mldemo'
    scriptType: 'bash'
    scriptLocation: 'inlineScript'
    inlineScript: |
      train.sh
    workingDirectory: 'ml-pipelines/cli'
--- a/ci-cd/azure-pipelines/dev-requirements.txt
+++ b/ci-cd/azure-pipelines/dev-requirements.txt
@ -0,0 +1,10 @@
 # required for notebook testing in workflow actions
 # pinned to avoid surprises
 ipython-genutils
 ipykernel==5.5.5
 papermill==2.3.3
 pandas
 matplotlib
 tensorflow
 tensorflow-hub
 transformers
--- a/ci-cd/azure-pipelines/sdk/train.yml
+++ b/ci-cd/azure-pipelines/sdk/train.yml
@ -0,0 +1,31 @@
 trigger:
 - main
 pool:
  vmImage: ubuntu-latest
 steps:
 - task: UsePythonVersion@0
  inputs:
    versionSpec: '3.8'
 - script: pip install -r ci-cd/azure-pipelines/dev-requirements.txt
  displayName: 'pip install notebook reqs'
 - task: Bash@3
  inputs:
    filePath: 'ci-cd/azure-pipelines/setup-sdk.sh'
  displayName: 'set up sdk'
 - task: Bash@3
  inputs:
    filePath: 'ci-cd/azure-pipelines/setup-cli.sh'
  displayName: 'set up CLI'
 - task: AzureCLI@2
  inputs:
    azureSubscription: 'azureml-mldemo' #name of the AzureML service connection defined in Azure Pipelines
    scriptType: 'bash'
    scriptLocation: 'inlineScript'
    inlineScript: |
           sed -i -e "s/DefaultAzureCredential/AzureCliCredential/g" train-sdkv2.ipynb
           papermill -k python train-sdkv2.ipynb train-sdkv2.output.ipynb
    workingDirectory: 'ml-pipelines/sdk'
--- a/ci-cd/azure-pipelines/setup-cli.sh
+++ b/ci-cd/azure-pipelines/setup-cli.sh
@ -0,0 +1,48 @@
 #!/bin/bash
 # rc install - uncomment and adjust below to run all tests on a CLI release candidate
 # az extension remove -n ml
 # <az_ml_install>
 az extension add -n ml -y
 # </az_ml_install>
 # Use a daily build
 # az extension add --source https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.9.0-py3-none-any.whl --yes
 # remove ml extension if it is installed
 # if az extension show -n ml &>/dev/null; then
 #     echo -n 'Removing ml extension...'
 #     if ! az extension remove -n ml -o none --only-show-errors &>/dev/null; then
 #         echo 'Error failed to remove ml extension' >&2
 #     fi
 #     echo -n 'Re-installing ml...'
 # fi
 # if ! az extension add --yes --source "https://azuremlsdktestpypi.blob.core.windows.net/wheels/sdk-cli-v2-public/ml-2.10.0-py3-none-any.whl" -o none --only-show-errors &>/dev/null; then
 #     echo 'Error failed to install ml azure-cli extension' >&2
 #     exit 1
 # fi
 # az version
 ## For backward compatibility - running on old subscription
 # <set_variables>
 GROUP="azureml-examples"
 LOCATION="eastus"
 WORKSPACE="main"
 # </set_variables>
 # If RESOURCE_GROUP_NAME is empty, the az configure is pending.
 RESOURCE_GROUP_NAME=${RESOURCE_GROUP_NAME:-}
 if [[ -z "$RESOURCE_GROUP_NAME" ]]
 then
    echo "No resource group name [RESOURCE_GROUP_NAME] specified, defaulting to ${GROUP}."
    # Installing extension temporarily assuming the run is on old subscription
    # without bootstrap script.
    # <az_configure_defaults>
    az configure --defaults group=$GROUP workspace=$WORKSPACE location=$LOCATION
    # </az_configure_defaults>
    echo "Default resource group set to $GROUP"
 else
    echo "Workflows are using the new subscription."
 fi
--- a/ci-cd/azure-pipelines/setup-sdk.sh
+++ b/ci-cd/azure-pipelines/setup-sdk.sh
@ -0,0 +1,23 @@
 #!/bin/bash
 # <az_ml_sdk_install>
 # pip install --pre azure-ai-ml
 # </az_ml_sdk_install>
 # <mldesigner_install>
 pip install mldesigner
 # </mldesigner_install>
 # <mltable_install>
 pip install mltable
 pip install pandas
 # </mltable_install>
 # <az_ml_sdk_test_install>
 # pip install azure-ai-ml==0.1.0.b8
 pip install azure-ai-ml
 # https://docsupport.blob.core.windows.net/ml-sample-submissions/1905732/azure_ai_ml-1.0.0-py3-none-any.whl
 # </az_ml_sdk_test_install>
 pip list
--- a/ci-cd/images/.amlignore
+++ b/ci-cd/images/.amlignore
@ -0,0 +1,6 @@
 ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove. 
 ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
 .ipynb_aml_checkpoints/ 
 *.amltmp 
 *.amltemp
--- a/ci-cd/images/.amlignore.amltmp
+++ b/ci-cd/images/.amlignore.amltmp
@ -0,0 +1,6 @@
 ## This file was auto generated by the Azure Machine Learning Studio. Please do not remove. 
 ## Read more about the .amlignore file here: https://docs.microsoft.com/azure/machine-learning/how-to-save-write-experiment-files#storage-limits-of-experiment-snapshots
 .ipynb_aml_checkpoints/ 
 *.amltmp 
 *.amltemp
--- a/ci-cd/images/Create_Release_Pipeline.jpg
+++ b/ci-cd/images/Create_Release_Pipeline.jpg
--- a/ci-cd/images/Install_ML_Extension.jpg
+++ b/ci-cd/images/Install_ML_Extension.jpg
--- a/ci-cd/images/continous_deployment.jpg
+++ b/ci-cd/images/continous_deployment.jpg
--- a/ci-cd/images/deploy_pipeline.jpg
+++ b/ci-cd/images/deploy_pipeline.jpg
--- a/ci-cd/images/install_ML_cli.jpg
+++ b/ci-cd/images/install_ML_cli.jpg
--- a/ci-cd/images/predeploy_approval.jpg
+++ b/ci-cd/images/predeploy_approval.jpg
--- a/components/evaluate.yml
+++ b/components/evaluate.yml
@ -0,0 +1,24 @@
 # <component>
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: evaluate_model
 display_name: evaluate-model
 type: command
 inputs:
    model_name:
        type: string
    model_input:
        type: uri_folder
    test_data: 
        type: uri_folder
 outputs:
    evaluation_output: 
        type: uri_folder
 code: ./evaluate
 environment: azureml:taxi-train-env@latest
 command: >-
    python evaluate.py 
    --model_name ${{inputs.model_name}} 
    --model_input ${{inputs.model_input}} 
    --test_data ${{inputs.test_data}} 
    --evaluation_output ${{outputs.evaluation_output}}
 # </component>
--- a/data-science/src/evaluate/evaluate.py
+++ b/data-science/src/evaluate/evaluate.py
@ -22,20 +22,34 @@ from mlflow.tracking import MlflowClient
 TARGET_COL = "cost"
 NUMERIC_COLS = [
-    "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+    "distance",
-    "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+    "dropoff_latitude",
-    "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+    "dropoff_longitude",
-    "dropoff_hour", "dropoff_minute", "dropoff_second"
+    "passengers",
    "pickup_latitude",
    "pickup_longitude",
    "pickup_weekday",
    "pickup_month",
    "pickup_monthday",
    "pickup_hour",
    "pickup_minute",
    "pickup_second",
    "dropoff_weekday",
    "dropoff_month",
    "dropoff_monthday",
    "dropoff_hour",
    "dropoff_minute",
    "dropoff_second",
 ]
 CAT_NOM_COLS = [
-    "store_forward", "vendor"
+    "store_forward",
    "vendor",
 ]
 CAT_ORD_COLS = [
 ]
 def parse_args():
    '''Parse input arguments'''
@ -44,6 +58,7 @@ def parse_args():
    parser.add_argument("--model_input", type=str, help="Path of input model")
    parser.add_argument("--test_data", type=str, help="Path to test dataset")
    parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
    parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
    args = parser.parse_args()
@ -66,7 +81,8 @@ def main(args):
    yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
    # ----------------- Model Promotion ---------------- #
-    predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
+    if args.runner == "CloudRunner":
        predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
--- a/components/prep.yml
+++ b/components/prep.yml
@ -0,0 +1,30 @@
 # <component>
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: prep_data
 display_name: prep-data
 type: command
 inputs:
    raw_data: 
        type: uri_file
    enable_monitoring:
        type: string
    table_name:
        type: string
 outputs:
    train_data: 
        type: uri_folder
    val_data: 
        type: uri_folder
    test_data:
        type: uri_folder 
 code: ./prep
 environment: azureml:taxi-train-env@latest
 command: >-
    python prep.py 
    --raw_data ${{inputs.raw_data}} 
    --train_data ${{outputs.train_data}}
    --val_data ${{outputs.val_data}}
    --test_data ${{outputs.test_data}}
    --enable_monitoring ${{inputs.enable_monitoring}}
    --table_name ${{inputs.table_name}}
 # </component>
--- a/data-science/src/prep/prep.py
+++ b/data-science/src/prep/prep.py
@ -16,20 +16,34 @@ import mlflow
 TARGET_COL = "cost"
 NUMERIC_COLS = [
-    "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+    "distance",
-    "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+    "dropoff_latitude",
-    "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+    "dropoff_longitude",
-    "dropoff_hour", "dropoff_minute", "dropoff_second"
+    "passengers",
    "pickup_latitude",
    "pickup_longitude",
    "pickup_weekday",
    "pickup_month",
    "pickup_monthday",
    "pickup_hour",
    "pickup_minute",
    "pickup_second",
    "dropoff_weekday",
    "dropoff_month",
    "dropoff_monthday",
    "dropoff_hour",
    "dropoff_minute",
    "dropoff_second",
 ]
 CAT_NOM_COLS = [
-    "store_forward", "vendor"
+    "store_forward",
    "vendor",
 ]
 CAT_ORD_COLS = [
 ]
 def parse_args():
    '''Parse input arguments'''
--- a/components/register.yml
+++ b/components/register.yml
@ -0,0 +1,24 @@
 # <component>
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: register_model
 display_name: register-model
 type: command
 inputs:
    model_name: 
        type: string
    model_path: 
        type: uri_folder
    evaluation_output: 
        type: uri_folder
 outputs:
    model_info_output_path: 
        type: uri_folder
 code: ./register
 environment: azureml:taxi-train-env@latest
 command: >-
    python register.py 
    --model_name ${{inputs.model_name}} 
    --model_path ${{inputs.model_path}} 
    --evaluation_output ${{inputs.evaluation_output}}
    --model_info_output_path ${{outputs.model_info_output_path}}
 # </component>
--- a/data-science/src/register/register.py
+++ b/data-science/src/register/register.py
@ -35,7 +35,7 @@ def main(args):
        deploy_flag = int(infile.read())
    mlflow.log_metric("deploy flag", int(deploy_flag))
-    
+    deploy_flag=1
    if deploy_flag==1:
        print("Registering ", args.model_name)
--- a/data-science/src/register/register_automl.py
+++ b/data-science/src/register/register_automl.py
--- a/components/train.yml
+++ b/components/train.yml
@ -0,0 +1,18 @@
 # <component>
 $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 name: train_model
 display_name: train-model
 type: command
 inputs:
    train_data: 
        type: uri_folder
 outputs:
    model_output: 
        type: uri_folder
 code: ./train
 environment: azureml:taxi-train-env@latest
 command: >-
    python train.py 
    --train_data ${{inputs.train_data}} 
    --model_output ${{outputs.model_output}}
 # </component>
--- a/data-science/src/train/train.py
+++ b/data-science/src/train/train.py
@ -21,14 +21,29 @@ import mlflow.sklearn
 TARGET_COL = "cost"
 NUMERIC_COLS = [
-    "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+    "distance",
-    "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+    "dropoff_latitude",
-    "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+    "dropoff_longitude",
-    "dropoff_hour", "dropoff_minute", "dropoff_second"
+    "passengers",
    "pickup_latitude",
    "pickup_longitude",
    "pickup_weekday",
    "pickup_month",
    "pickup_monthday",
    "pickup_hour",
    "pickup_minute",
    "pickup_second",
    "dropoff_weekday",
    "dropoff_month",
    "dropoff_monthday",
    "dropoff_hour",
    "dropoff_minute",
    "dropoff_second",
 ]
 CAT_NOM_COLS = [
-    "store_forward", "vendor"
+    "store_forward",
    "vendor",
 ]
 CAT_ORD_COLS = [
--- a/config-infra-dev.yml
+++ b/config-infra-dev.yml
@ -1,38 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 variables:
  # Global
  ap_vm_image: ubuntu-20.04
  namespace: azure #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
  postfix: mlopsv2
  location: westus
  environment: dev
  enable_aml_computecluster: true
  enable_aml_secure_workspace: true
  enable_monitoring: true
  # Azure DevOps
  ado_service_connection_rg: Azure-ARM-Dev
  ado_service_connection_aml_ws: Azure-ARM-Dev
  # DO NOT TOUCH
  # For pipeline reference
  resource_group: rg-$(namespace)-$(postfix)$(environment)
  aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
  application_insights: mlw-$(namespace)-$(postfix)$(environment)
  key_vault: kv-$(namespace)-$(postfix)$(environment)
  container_registry: cr$(namespace)$(postfix)$(environment)
  storage_account: st$(namespace)$(postfix)$(environment)
  # For terraform reference
  terraform_version: 0.14.7
  terraform_workingdir: infrastructure/terraform
  terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
  terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
  terraform_st_container_name: default
  terraform_st_key: mlops-tab
--- a/config-infra-prod.yml
+++ b/config-infra-prod.yml
@ -1,39 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # Prod environment
 variables:
  # Global
  ap_vm_image: ubuntu-20.04
  namespace: azure #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
  postfix: mlopsv2
  location: westeurope
  environment: prod
  enable_aml_computecluster: true
  enable_aml_secure_workspace: false
  enable_monitoring: true
  # Azure DevOps
  ado_service_connection_rg: Azure-ARM-Prod
  ado_service_connection_aml_ws: Azure-ARM-Prod
  # DO NOT TOUCH
  # For pipeline reference
  resource_group: rg-$(namespace)-$(postfix)$(environment)
  aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
  application_insights: mlw-$(namespace)-$(postfix)$(environment)
  key_vault: kv-$(namespace)-$(postfix)$(environment)
  container_registry: cr$(namespace)$(postfix)$(environment)
  storage_account: st$(namespace)$(postfix)$(environment)
  # For terraform reference
  terraform_version: 0.14.7
  terraform_workingdir: infrastructure
  terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
  terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
  terraform_st_container_name: default
  terraform_st_key: mlops-tab
--- a/data-science/experiment/evaluate.ipynb
+++ b/data-science/experiment/evaluate.ipynb
--- a/data-science/experiment/prep.ipynb
+++ b/data-science/experiment/prep.ipynb
@ -1,252 +0,0 @@
 {
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "import argparse\n",
        "\n",
        "from pathlib import Path\n",
        "import os\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "import mlflow"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1671554100703
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "TARGET_COL = \"cost\"\n",
        "\n",
        "NUMERIC_COLS = [\n",
        "    \"distance\", \"dropoff_latitude\", \"dropoff_longitude\", \"passengers\", \"pickup_latitude\",\n",
        "    \"pickup_longitude\", \"pickup_weekday\", \"pickup_month\", \"pickup_monthday\", \"pickup_hour\",\n",
        "    \"pickup_minute\", \"pickup_second\", \"dropoff_weekday\", \"dropoff_month\", \"dropoff_monthday\",\n",
        "    \"dropoff_hour\", \"dropoff_minute\", \"dropoff_second\"\n",
        "]\n",
        "\n",
        "CAT_NOM_COLS = [\n",
        "    \"store_forward\", \"vendor\"\n",
        "]\n",
        "\n",
        "CAT_ORD_COLS = [\n",
        "]"
      ],
      "outputs": [],
      "execution_count": 2,
      "metadata": {
        "gather": {
          "logged": 1671554100969
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Define Arguments for this step\n",
        "\n",
        "class MyArgs:\n",
        "    def __init__(self, **kwargs):\n",
        "        self.__dict__.update(kwargs)\n",
        "\n",
        "args = MyArgs(\n",
        "            raw_data = \"../../data/taxi-data.csv\", \n",
        "            train_data = \"/tmp/prep/train\",\n",
        "            val_data = \"/tmp/prep/val\",\n",
        "            test_data = \"/tmp/prep/test\",\n",
        "            )\n",
        "\n",
        "os.makedirs(args.train_data, exist_ok = True)\n",
        "os.makedirs(args.val_data, exist_ok = True)\n",
        "os.makedirs(args.test_data, exist_ok = True)\n"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1671554101107
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "def main(args):\n",
        "    '''Read, split, and save datasets'''\n",
        "\n",
        "    # ------------ Reading Data ------------ #\n",
        "    # -------------------------------------- #\n",
        "    data = pd.read_csv((Path(args.raw_data)))\n",
        "    data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]\n",
        "\n",
        "    # ------------- Split Data ------------- #\n",
        "    # -------------------------------------- #\n",
        "\n",
        "    # Split data into train, val and test datasets\n",
        "\n",
        "    random_data = np.random.rand(len(data))\n",
        "\n",
        "    msk_train = random_data < 0.7\n",
        "    msk_val = (random_data >= 0.7) & (random_data < 0.85)\n",
        "    msk_test = random_data >= 0.85\n",
        "\n",
        "    train = data[msk_train]\n",
        "    val = data[msk_val]\n",
        "    test = data[msk_test]\n",
        "\n",
        "    mlflow.log_metric('train size', train.shape[0])\n",
        "    mlflow.log_metric('val size', val.shape[0])\n",
        "    mlflow.log_metric('test size', test.shape[0])\n",
        "\n",
        "    train.to_parquet((Path(args.train_data) / \"train.parquet\"))\n",
        "    val.to_parquet((Path(args.val_data) / \"val.parquet\"))\n",
        "    test.to_parquet((Path(args.test_data) / \"test.parquet\"))\n"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "gather": {
          "logged": 1671554101242
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "mlflow.start_run()\n",
        "\n",
        "lines = [\n",
        "    f\"Raw data path: {args.raw_data}\",\n",
        "    f\"Train dataset output path: {args.train_data}\",\n",
        "    f\"Val dataset output path: {args.val_data}\",\n",
        "    f\"Test dataset path: {args.test_data}\",\n",
        "]\n",
        "\n",
        "for line in lines:\n",
        "    print(line)\n",
        "\n",
        "main(args)\n",
        "\n",
        "mlflow.end_run()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": "Raw data path: ../../data/taxi-data.csv\nTrain dataset output path: /tmp/prep/train\nVal dataset output path: /tmp/prep/val\nTest dataset path: /tmp/prep/test\n"
        }
      ],
      "execution_count": 6,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "gather": {
          "logged": 1671554107510
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "ls \"/tmp/prep/train\" "
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": "train.parquet\r\n"
        }
      ],
      "execution_count": 7,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "vscode": {
          "languageId": "shellscript"
        },
        "gather": {
          "logged": 1671554107615
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python310-sdkv2"
    },
    "kernelspec": {
      "name": "python310-sdkv2",
      "language": "python",
      "display_name": "Python 3.10 - SDK V2"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.6",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "nteract": {
      "version": "nteract-front-end@1.0.0"
    },
    "vscode": {
      "interpreter": {
        "hash": "c87d6401964827bd736fe8e727109b953dd698457ca58fb5acabab22fd6dac41"
      }
    },
    "microsoft": {
      "host": {
        "AzureML": {
          "notebookHasBeenCompleted": true
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
 }
--- a/data-science/experiment/requirements.txt
+++ b/data-science/experiment/requirements.txt
@ -1,5 +0,0 @@
 azureml-mlflow==1.38.0
 scikit-learn==0.24.1
 pandas==1.2.1
 joblib==1.0.0
 matplotlib==3.3.3
--- a/data-science/experiment/train.ipynb
+++ b/data-science/experiment/train.ipynb
--- a/data-science/src/evaluate/test_evaluate.py
+++ b/data-science/src/evaluate/test_evaluate.py
@ -1,149 +0,0 @@
 import os
 import subprocess
 from pathlib import Path
 import pandas as pd
 from sklearn.ensemble import RandomForestRegressor
 import mlflow
 TARGET_COL = "cost"
 NUMERIC_COLS = [
    "distance",
    "dropoff_latitude",
    "dropoff_longitude",
    "passengers",
    "pickup_latitude",
    "pickup_longitude",
    "pickup_weekday",
    "pickup_month",
    "pickup_monthday",
    "pickup_hour",
    "pickup_minute",
    "pickup_second",
    "dropoff_weekday",
    "dropoff_month",
    "dropoff_monthday",
    "dropoff_hour",
    "dropoff_minute",
    "dropoff_second",
 ]
 CAT_NOM_COLS = [
    "store_forward",
    "vendor",
 ]
 CAT_ORD_COLS = [
 ]
 def test_evaluate_model():
    test_data = "/tmp/test"
    model_input = "/tmp/model"
    evaluation_output = "/tmp/evaluate"
    model_name = "taxi-model"
    runner = "LocalRunner"
    os.makedirs(test_data, exist_ok = True)
    os.makedirs(model_input, exist_ok = True)
    os.makedirs(evaluation_output, exist_ok = True)
    data = {
        'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
                7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
        'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
                    2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
        'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
                        21, 16, 4, 10],
        'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
                            40.75471496582031, 40.66966247558594, 40.77496337890625,
                            40.75603103637695, 40.67219161987305, 40.66605758666992,
                            40.69973754882813, 40.61215972900391, 40.74581146240234,
                            40.78779602050781, 40.76130676269531, 40.72980117797852,
                            40.71107864379883, 40.747501373291016, 40.752384185791016,
                            40.66606140136719, 40.64547729492188],
        'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
                            -73.98030853271484, -73.92549896240234,
                            -73.91104125976562, -73.89237213134766,
                            -73.94535064697266, -74.01203918457031,
                            -73.97817993164062, -73.99366760253906,
                            -73.94902801513672, -73.98792266845703,
                            -73.95561218261719, -73.8807601928711, -73.9117202758789,
                            -73.96553039550781, -73.9442138671875,
                            -73.97544860839844, -73.87281036376953,
                            -73.77632141113281],
        'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
                        5, 20, 41, 46],
        'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
                        4, 34, 21, 6, 36],
        'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
                            3],
        'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
        'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
                        15, 4, 10],
        'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
                            40.76081848144531, 40.66493988037109, 40.74625396728516,
                            40.80010223388672, 40.67601776123047, 40.67120361328125,
                            40.68327331542969, 40.6324462890625, 40.71521377563477,
                            40.80733871459961, 40.750484466552734, 40.7398796081543,
                            40.71691131591797, 40.773414611816406, 40.79001235961914,
                            40.660118103027344, 40.78546905517578],
        'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
                            -73.92293548583984, -73.92304229736328, -73.8973159790039,
                            -73.9500503540039, -74.0144271850586, -73.98458099365234,
                            -73.96582794189453, -73.94767761230469,
                            -73.96052551269531, -73.96453094482422,
                            -73.88248443603516, -73.92410278320312,
                            -73.95661163330078, -73.92512512207031,
                            -73.94800567626953, -73.95987701416016,
                            -73.94915771484375],
        'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
                        56, 49, 23, 18],
        'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
                        41, 53, 43, 2],
        'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
        'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
    }
    # Save the data
    df = pd.DataFrame(data)
    df.to_parquet(os.path.join(test_data, "test.parquet"))
    # Split the data into inputs and outputs
    y_test = df[TARGET_COL]
    X_test = df[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
    # Train a Random Forest Regression Model with the training set
    model = RandomForestRegressor(random_state=0)
    model.fit(X_test, y_test)
    # Save the model
    mlflow.sklearn.save_model(sk_model=model, path=model_input)
    cmd = f"python data-science/src/evaluate/evaluate.py --model_name={model_name} --model_input={model_input} --test_data={test_data} --evaluation_output={evaluation_output} --runner={runner}"
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    out, err = p.communicate() 
    result = str(out).split('\\n')
    for lin in result:
        if not lin.startswith('#'):
            print(lin)
    assert os.path.exists(os.path.join(evaluation_output, "predictions.csv"))
    assert os.path.exists(os.path.join(evaluation_output, "score.txt"))
    print("Train Model Unit Test Completed")
 if __name__ == "__main__":
    test_evaluate_model()
--- a/data-science/src/prep/test_prep.py
+++ b/data-science/src/prep/test_prep.py
@ -1,102 +0,0 @@
 import os
 import subprocess
 import pandas as pd
 def test_prep_data():
    raw_data = "/tmp/raw"
    train_data = "/tmp/train"
    val_data = "/tmp/val"
    test_data = "/tmp/test"
    os.makedirs(raw_data, exist_ok = True)
    os.makedirs(train_data, exist_ok = True)
    os.makedirs(val_data, exist_ok = True)
    os.makedirs(test_data, exist_ok = True)
    data = {
        'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
                7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
        'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
                    2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
        'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
                        21, 16, 4, 10],
        'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
                            40.75471496582031, 40.66966247558594, 40.77496337890625,
                            40.75603103637695, 40.67219161987305, 40.66605758666992,
                            40.69973754882813, 40.61215972900391, 40.74581146240234,
                            40.78779602050781, 40.76130676269531, 40.72980117797852,
                            40.71107864379883, 40.747501373291016, 40.752384185791016,
                            40.66606140136719, 40.64547729492188],
        'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
                            -73.98030853271484, -73.92549896240234,
                            -73.91104125976562, -73.89237213134766,
                            -73.94535064697266, -74.01203918457031,
                            -73.97817993164062, -73.99366760253906,
                            -73.94902801513672, -73.98792266845703,
                            -73.95561218261719, -73.8807601928711, -73.9117202758789,
                            -73.96553039550781, -73.9442138671875,
                            -73.97544860839844, -73.87281036376953,
                            -73.77632141113281],
        'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
                        5, 20, 41, 46],
        'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
                        4, 34, 21, 6, 36],
        'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
                            3],
        'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
        'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
                        15, 4, 10],
        'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
                            40.76081848144531, 40.66493988037109, 40.74625396728516,
                            40.80010223388672, 40.67601776123047, 40.67120361328125,
                            40.68327331542969, 40.6324462890625, 40.71521377563477,
                            40.80733871459961, 40.750484466552734, 40.7398796081543,
                            40.71691131591797, 40.773414611816406, 40.79001235961914,
                            40.660118103027344, 40.78546905517578],
        'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
                            -73.92293548583984, -73.92304229736328, -73.8973159790039,
                            -73.9500503540039, -74.0144271850586, -73.98458099365234,
                            -73.96582794189453, -73.94767761230469,
                            -73.96052551269531, -73.96453094482422,
                            -73.88248443603516, -73.92410278320312,
                            -73.95661163330078, -73.92512512207031,
                            -73.94800567626953, -73.95987701416016,
                            -73.94915771484375],
        'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
                        56, 49, 23, 18],
        'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
                        41, 53, 43, 2],
        'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
        'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
    }
    df = pd.DataFrame(data)
    df.to_csv(os.path.join(raw_data, "taxi-data.csv"))
    raw_data= os.path.join(raw_data, "taxi-data.csv")
    cmd = f"python data-science/src/prep/prep.py --raw_data={raw_data} --train_data={train_data} --val_data={val_data} --test_data={test_data}"
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    out, err = p.communicate() 
    result = str(out).split('\\n')
    for lin in result:
        if not lin.startswith('#'):
            print(lin)
    assert os.path.exists(os.path.join(train_data, "train.parquet"))
    assert os.path.exists(os.path.join(val_data, "val.parquet"))
    assert os.path.exists(os.path.join(test_data, "test.parquet"))
    print("¨Prep Data Unit Test Completed")
 if __name__ == "__main__":
    test_prep_data()
--- a/data-science/src/train/test_train.py
+++ b/data-science/src/train/test_train.py
@ -1,93 +0,0 @@
 import os
 import subprocess
 import pandas as pd
 def test_train_model():
    train_data = "/tmp/train"
    model_output = "/tmp/model"
    os.makedirs(train_data, exist_ok = True)
    os.makedirs(model_output, exist_ok = True)
    data = {
        'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
                7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
        'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
                    2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
        'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
                        21, 16, 4, 10],
        'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
                            40.75471496582031, 40.66966247558594, 40.77496337890625,
                            40.75603103637695, 40.67219161987305, 40.66605758666992,
                            40.69973754882813, 40.61215972900391, 40.74581146240234,
                            40.78779602050781, 40.76130676269531, 40.72980117797852,
                            40.71107864379883, 40.747501373291016, 40.752384185791016,
                            40.66606140136719, 40.64547729492188],
        'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
                            -73.98030853271484, -73.92549896240234,
                            -73.91104125976562, -73.89237213134766,
                            -73.94535064697266, -74.01203918457031,
                            -73.97817993164062, -73.99366760253906,
                            -73.94902801513672, -73.98792266845703,
                            -73.95561218261719, -73.8807601928711, -73.9117202758789,
                            -73.96553039550781, -73.9442138671875,
                            -73.97544860839844, -73.87281036376953,
                            -73.77632141113281],
        'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
                        5, 20, 41, 46],
        'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
                        4, 34, 21, 6, 36],
        'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
                            3],
        'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
        'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
                        15, 4, 10],
        'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
                            40.76081848144531, 40.66493988037109, 40.74625396728516,
                            40.80010223388672, 40.67601776123047, 40.67120361328125,
                            40.68327331542969, 40.6324462890625, 40.71521377563477,
                            40.80733871459961, 40.750484466552734, 40.7398796081543,
                            40.71691131591797, 40.773414611816406, 40.79001235961914,
                            40.660118103027344, 40.78546905517578],
        'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
                            -73.92293548583984, -73.92304229736328, -73.8973159790039,
                            -73.9500503540039, -74.0144271850586, -73.98458099365234,
                            -73.96582794189453, -73.94767761230469,
                            -73.96052551269531, -73.96453094482422,
                            -73.88248443603516, -73.92410278320312,
                            -73.95661163330078, -73.92512512207031,
                            -73.94800567626953, -73.95987701416016,
                            -73.94915771484375],
        'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
                        56, 49, 23, 18],
        'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
                            2, 15, 21],
        'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
                        41, 53, 43, 2],
        'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
        'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
    }
    df = pd.DataFrame(data)
    df.to_parquet(os.path.join(train_data, "train.parquet"))
    cmd = f"python data-science/src/train/train.py --train_data={train_data} --model_output={model_output}"
    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
    out, err = p.communicate() 
    result = str(out).split('\\n')
    for lin in result:
        if not lin.startswith('#'):
            print(lin)
    assert os.path.exists(os.path.join(model_output, "model.pkl"))
    print("Train Model Unit Test Completed")
 if __name__ == "__main__":
    test_train_model()
--- a/data-science/environment/train-conda.yml
+++ b/data-science/environment/train-conda.yml
--- a/data-science/environment/train-requirements.txt
+++ b/data-science/environment/train-requirements.txt
--- a/infrastructure/aml_deploy.tf
+++ b/infrastructure/aml_deploy.tf
@ -1,134 +0,0 @@
 # Resource group
 module "resource_group" {
  source = "./modules/resource-group"
  location = var.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  tags = local.tags
 }
 # Azure Machine Learning workspace
 module "aml_workspace" {
  source = "./modules/aml-workspace"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  storage_account_id      = module.storage_account_aml.id
  key_vault_id            = module.key_vault.id
  application_insights_id = module.application_insights.id
  container_registry_id   = module.container_registry.id
  enable_aml_computecluster = var.enable_aml_computecluster
  storage_account_name      = module.storage_account_aml.name
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  vnet_id                     = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
  subnet_default_id           = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
  subnet_training_id          = var.enable_aml_secure_workspace ? azurerm_subnet.snet_training[0].id : ""
  tags = local.tags
 }
 # Storage account
 module "storage_account_aml" {
  source = "./modules/storage-account"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  hns_enabled                         = false
  firewall_bypass                     = ["AzureServices"]
  firewall_virtual_network_subnet_ids = []
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  vnet_id                     = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
  subnet_id                   = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
  tags = local.tags
 }
 # Key vault
 module "key_vault" {
  source = "./modules/key-vault"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  vnet_id                     = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
  subnet_id                   = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
  tags = local.tags
 }
 # Application insights
 module "application_insights" {
  source = "./modules/application-insights"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  tags = local.tags
 }
 # Container registry
 module "container_registry" {
  source = "./modules/container-registry"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  vnet_id                     = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
  subnet_id                   = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
  tags = local.tags
 }
 module "data_explorer" {
  source = "./modules/data-explorer"
  rg_name  = module.resource_group.name
  location = module.resource_group.location
  prefix  = var.prefix
  postfix = var.postfix
  env = var.environment
  key_vault_id      = module.key_vault.id
  enable_monitoring = var.enable_monitoring
  client_secret = var.client_secret
  tags = local.tags
 }
--- a/infrastructure/jumphost.tf
+++ b/infrastructure/jumphost.tf
@ -1,37 +0,0 @@
 # Bastion
 module "bastion" {
  source = "./modules/bastion-host"
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  rg_name   = module.resource_group.name
  location  = module.resource_group.location
  subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_bastion[0].id : ""
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  tags = local.tags
 }
 # Virtual machine
 module "virtual_machine_jumphost" {
  source = "./modules/virtual-machine"
  prefix  = var.prefix
  postfix = var.postfix
  env     = var.environment
  rg_name           = module.resource_group.name
  location          = module.resource_group.location
  subnet_id         = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
  jumphost_username = var.jumphost_username
  jumphost_password = var.jumphost_password
  enable_aml_secure_workspace = var.enable_aml_secure_workspace
  tags = local.tags
 }
--- a/infrastructure/locals.tf
+++ b/infrastructure/locals.tf
@ -1,9 +0,0 @@
 locals {
  tags = {
    Owner       = "mlops-v2"
    Project     = "mlops-v2"
    Environment = "${var.environment}"
    Toolkit     = "terraform"
    Name        = "${var.prefix}"
  }
 }
--- a/infrastructure/main.tf
+++ b/infrastructure/main.tf
@ -1,18 +0,0 @@
 terraform {
  backend "azurerm" {}
  required_providers {
    azurerm = {
      version = "= 2.99.0"
    }
  }
 }
 provider "azurerm" {
  features {}
 }
 data "azurerm_client_config" "current" {}
 data "http" "ip" {
  url = "https://ifconfig.me"
 }
--- a/infrastructure/modules/aml-workspace/main.tf
+++ b/infrastructure/modules/aml-workspace/main.tf
@ -1,97 +0,0 @@
 resource "azurerm_machine_learning_workspace" "mlw" {
  name                    = "mlw-${var.prefix}-${var.postfix}${var.env}"
  location                = var.location
  resource_group_name     = var.rg_name
  application_insights_id = var.application_insights_id
  key_vault_id            = var.key_vault_id
  storage_account_id      = var.storage_account_id
  container_registry_id   = var.container_registry_id
  sku_name = "Basic"
  identity {
    type = "SystemAssigned"
  }
  tags = var.tags
 }
 # Compute cluster
 resource "azurerm_machine_learning_compute_cluster" "mlw_compute_cluster" {
  name                          = "cpu-cluster"
  location                      = var.location
  vm_priority                   = "LowPriority"
  vm_size                       = "Standard_DS3_v2"
  machine_learning_workspace_id = azurerm_machine_learning_workspace.mlw.id
  subnet_resource_id            = var.enable_aml_secure_workspace ? var.subnet_training_id : ""
  count = var.enable_aml_computecluster ? 1 : 0
  scale_settings {
    min_node_count                       = 0
    max_node_count                       = 4
    scale_down_nodes_after_idle_duration = "PT120S" # 120 seconds
  }
 }
 # DNS Zones
 resource "azurerm_private_dns_zone" "mlw_zone_api" {
  name                = "privatelink.api.azureml.ms"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_private_dns_zone" "mlw_zone_notebooks" {
  name                = "privatelink.notebooks.azure.net"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Linking of DNS zones to Virtual Network
 resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_api_link" {
  name                  = "${var.prefix}${var.postfix}_link_api"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_api[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_notebooks_link" {
  name                  = "${var.prefix}${var.postfix}_link_notebooks"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_notebooks[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Private Endpoint configuration
 resource "azurerm_private_endpoint" "mlw_pe" {
  name                = "pe-${azurerm_machine_learning_workspace.mlw.name}-amlw"
  location            = var.location
  resource_group_name = var.rg_name
  subnet_id           = var.subnet_default_id
  private_service_connection {
    name                           = "psc-aml-${var.prefix}-${var.postfix}${var.env}"
    private_connection_resource_id = azurerm_machine_learning_workspace.mlw.id
    subresource_names              = ["amlworkspace"]
    is_manual_connection           = false
  }
  private_dns_zone_group {
    name                 = "private-dns-zone-group-ws"
    private_dns_zone_ids = [azurerm_private_dns_zone.mlw_zone_api[0].id, azurerm_private_dns_zone.mlw_zone_notebooks[0].id]
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
--- a/infrastructure/modules/aml-workspace/outputs.tf
+++ b/infrastructure/modules/aml-workspace/outputs.tf
@ -1,3 +0,0 @@
 output "name" {
  value = azurerm_machine_learning_workspace.mlw.name
 }
--- a/infrastructure/modules/aml-workspace/variables.tf
+++ b/infrastructure/modules/aml-workspace/variables.tf
@ -1,79 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "storage_account_id" {
  type        = string
  description = "The ID of the Storage Account linked to AML workspace"
 }
 variable "key_vault_id" {
  type        = string
  description = "The ID of the Key Vault linked to AML workspace"
 }
 variable "application_insights_id" {
  type        = string
  description = "The ID of the Application Insights linked to AML workspace"
 }
 variable "container_registry_id" {
  type        = string
  description = "The ID of the Container Registry linked to AML workspace"
 }
 variable "enable_aml_computecluster" {
  description = "Variable to enable or disable AML compute cluster"
  default     = false
 }
 variable "storage_account_name" {
  type        = string
  description = "The Name of the Storage Account linked to AML workspace"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
 variable "vnet_id" {
  type        = string
  description = "The ID of the vnet that should be linked to the DNS zone"
 }
 variable "subnet_default_id" {
  type        = string
  description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
 }
 variable "subnet_training_id" {
  type        = string
  description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
 }
--- a/infrastructure/modules/application-insights/main.tf
+++ b/infrastructure/modules/application-insights/main.tf
@ -1,8 +0,0 @@
 resource "azurerm_application_insights" "appi" {
  name                = "appi-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  application_type    = "web"
  tags = var.tags
 }
--- a/infrastructure/modules/application-insights/outputs.tf
+++ b/infrastructure/modules/application-insights/outputs.tf
@ -1,3 +0,0 @@
 output "id" {
  value = azurerm_application_insights.appi.id
 }
--- a/infrastructure/modules/application-insights/variables.tf
+++ b/infrastructure/modules/application-insights/variables.tf
@ -1,30 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
--- a/infrastructure/modules/bastion-host/main.tf
+++ b/infrastructure/modules/bastion-host/main.tf
@ -1,31 +0,0 @@
 resource "azurerm_bastion_host" "bas" {
  name                = "bas-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  sku                = "Standard"
  copy_paste_enabled = false
  file_copy_enabled  = false
  ip_configuration {
    name                 = "configuration"
    subnet_id            = var.subnet_id
    public_ip_address_id = azurerm_public_ip.pip[0].id
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
 resource "azurerm_public_ip" "pip" {
  name                = "pip-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  allocation_method   = "Static"
  sku                 = "Standard"
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
--- a/infrastructure/modules/bastion-host/outputs.tf
+++ b/infrastructure/modules/bastion-host/outputs.tf
--- a/infrastructure/modules/bastion-host/variables.tf
+++ b/infrastructure/modules/bastion-host/variables.tf
@ -1,39 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "subnet_id" {
  type        = string
  description = "Subnet ID for the bastion"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
--- a/infrastructure/modules/container-registry/main.tf
+++ b/infrastructure/modules/container-registry/main.tf
@ -1,59 +0,0 @@
 locals {
  safe_prefix  = replace(var.prefix, "-", "")
  safe_postfix = replace(var.postfix, "-", "")
 }
 resource "azurerm_container_registry" "cr" {
  name                = "cr${local.safe_prefix}${local.safe_postfix}${var.env}"
  resource_group_name = var.rg_name
  location            = var.location
  sku                 = var.enable_aml_secure_workspace ? "Premium" : "Standard"
  admin_enabled       = true
  tags = var.tags
 }
 # DNS Zones
 resource "azurerm_private_dns_zone" "cr_zone" {
  name                = "privatelink.azurecr.io"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Linking of DNS zones to Virtual Network
 resource "azurerm_private_dns_zone_virtual_network_link" "cr_zone_link" {
  name                  = "${var.prefix}${var.postfix}_link_acr"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.cr_zone[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Private Endpoint configuration
 resource "azurerm_private_endpoint" "cr_pe" {
  name                = "pe-${azurerm_container_registry.cr.name}-acr"
  location            = var.location
  resource_group_name = var.rg_name
  subnet_id           = var.subnet_id
  private_service_connection {
    name                           = "psc-acr-${var.prefix}-${var.postfix}${var.env}"
    private_connection_resource_id = azurerm_container_registry.cr.id
    subresource_names              = ["registry"]
    is_manual_connection           = false
  }
  private_dns_zone_group {
    name                 = "private-dns-zone-group-acr"
    private_dns_zone_ids = [azurerm_private_dns_zone.cr_zone[0].id]
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
--- a/infrastructure/modules/container-registry/outputs.tf
+++ b/infrastructure/modules/container-registry/outputs.tf
@ -1,3 +0,0 @@
 output "id" {
  value = azurerm_container_registry.cr.id
 }
--- a/infrastructure/modules/container-registry/variables.tf
+++ b/infrastructure/modules/container-registry/variables.tf
@ -1,44 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
 variable "vnet_id" {
  type        = string
  description = "The ID of the vnet that should be linked to the DNS zone"
 }
 variable "subnet_id" {
  type        = string
  description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
 }
--- a/infrastructure/modules/data-explorer/main.tf
+++ b/infrastructure/modules/data-explorer/main.tf
@ -1,59 +0,0 @@
 data "azurerm_client_config" "current" {}
 resource "azurerm_kusto_cluster" "cluster" {
  name                = "adx${var.prefix}${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  streaming_ingestion_enabled = true
  language_extensions = ["PYTHON"]
  count               = var.enable_monitoring ? 1 : 0
  sku {
    name     = "Standard_D11_v2"
    capacity = 2
  }
  tags = var.tags
 }
 resource "azurerm_kusto_database" "database" {
  name                = "mlmonitoring"
  resource_group_name = var.rg_name
  location            = var.location
  cluster_name        = azurerm_kusto_cluster.cluster[0].name
  count               = var.enable_monitoring ? 1 : 0
 }
 resource "azurerm_key_vault_secret" "SP_ID" {
  name         = "kvmonitoringspid"
  value        = data.azurerm_client_config.current.client_id
  key_vault_id = var.key_vault_id
  count               = var.enable_monitoring ? 1 : 0
 }
 resource "azurerm_key_vault_secret" "SP_KEY" {
  name         = "kvmonitoringspkey"
  value        = var.client_secret
  key_vault_id = var.key_vault_id
  count               = var.enable_monitoring ? 1 : 0
 }
 resource "azurerm_key_vault_secret" "SP_TENANT_ID" {
  name         = "kvmonitoringadxtenantid"
  value        = data.azurerm_client_config.current.tenant_id
  key_vault_id = var.key_vault_id
  count               = var.enable_monitoring ? 1 : 0
 }
 resource "azurerm_key_vault_secret" "ADX_URI" {
  name         = "kvmonitoringadxuri"
  value        = azurerm_kusto_cluster.cluster[0].uri
  key_vault_id = var.key_vault_id
  count               = var.enable_monitoring ? 1 : 0
 }
 resource "azurerm_key_vault_secret" "ADX_DB" {
  name         = "kvmonitoringadxdb"
  value        = azurerm_kusto_database.database[0].name
  key_vault_id = var.key_vault_id
  count               = var.enable_monitoring ? 1 : 0
 }
--- a/infrastructure/modules/data-explorer/outputs.tf
+++ b/infrastructure/modules/data-explorer/outputs.tf
--- a/infrastructure/modules/data-explorer/variables.tf
+++ b/infrastructure/modules/data-explorer/variables.tf
@ -1,45 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "key_vault_id" {
  type        = string
  description = "The ID of the Key Vault linked to AML workspace"
 }
 variable "enable_monitoring" {
  description = "Variable to enable or disable AML compute cluster"
  default     = false
 }
 variable "client_secret" {
  description = "client secret"
  default     = false
 }
--- a/infrastructure/modules/key-vault/main.tf
+++ b/infrastructure/modules/key-vault/main.tf
@ -1,74 +0,0 @@
 data "azurerm_client_config" "current" {}
 resource "azurerm_key_vault" "kv" {
  name                = "kv-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  tenant_id           = data.azurerm_client_config.current.tenant_id
  sku_name            = "standard"
  tags = var.tags
  access_policy {
    tenant_id = data.azurerm_client_config.current.tenant_id
    object_id = data.azurerm_client_config.current.object_id
    key_permissions = [
      "Create",
      "Get",
    ]
    secret_permissions = [
      "Set",
      "Get",
      "Delete",
      "Purge",
      "Recover"
    ]
  }
 }
 # DNS Zones
 resource "azurerm_private_dns_zone" "kv_zone" {
  name                = "privatelink.vaultcore.azure.net"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Linking of DNS zones to Virtual Network
 resource "azurerm_private_dns_zone_virtual_network_link" "kv_zone_link" {
  name                  = "${var.prefix}${var.postfix}_link_kv"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.kv_zone[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Private Endpoint configuration
 resource "azurerm_private_endpoint" "kv_pe" {
  name                = "pe-${azurerm_key_vault.kv.name}-vault"
  location            = var.location
  resource_group_name = var.rg_name
  subnet_id           = var.subnet_id
  private_service_connection {
    name                           = "psc-kv-${var.prefix}-${var.postfix}${var.env}"
    private_connection_resource_id = azurerm_key_vault.kv.id
    subresource_names              = ["vault"]
    is_manual_connection           = false
  }
  private_dns_zone_group {
    name                 = "private-dns-zone-group-kv"
    private_dns_zone_ids = [azurerm_private_dns_zone.kv_zone[0].id]
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
--- a/infrastructure/modules/key-vault/outputs.tf
+++ b/infrastructure/modules/key-vault/outputs.tf
@ -1,3 +0,0 @@
 output "id" {
  value = azurerm_key_vault.kv.id
 }
--- a/infrastructure/modules/key-vault/variables.tf
+++ b/infrastructure/modules/key-vault/variables.tf
@ -1,44 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
 variable "vnet_id" {
  type        = string
  description = "The ID of the vnet that should be linked to the DNS zone"
 }
 variable "subnet_id" {
  type        = string
  description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
 }
--- a/infrastructure/modules/resource-group/main.tf
+++ b/infrastructure/modules/resource-group/main.tf
@ -1,5 +0,0 @@
 resource "azurerm_resource_group" "adl_rg" {
  name     = "rg-${var.prefix}-${var.postfix}${var.env}"
  location = var.location
  tags     = var.tags
 }
--- a/infrastructure/modules/resource-group/outputs.tf
+++ b/infrastructure/modules/resource-group/outputs.tf
@ -1,7 +0,0 @@
 output "name" {
  value = azurerm_resource_group.adl_rg.name
 }
 output "location" {
  value = azurerm_resource_group.adl_rg.location
 }
--- a/infrastructure/modules/resource-group/variables.tf
+++ b/infrastructure/modules/resource-group/variables.tf
@ -1,26 +0,0 @@
 variable "location" {
  type        = string
  default     = "North Europe"
  description = "Location of the Resource Group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the Resource Group"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
--- a/infrastructure/modules/storage-account/main.tf
+++ b/infrastructure/modules/storage-account/main.tf
@ -1,118 +0,0 @@
 data "azurerm_client_config" "current" {}
 data "http" "ip" {
  url = "https://ifconfig.me"
 }
 locals {
  safe_prefix  = replace(var.prefix, "-", "")
  safe_postfix = replace(var.postfix, "-", "")
 }
 resource "azurerm_storage_account" "st" {
  name                     = "st${local.safe_prefix}${local.safe_postfix}${var.env}"
  resource_group_name      = var.rg_name
  location                 = var.location
  account_tier             = "Standard"
  account_replication_type = "LRS"
  account_kind             = "StorageV2"
  is_hns_enabled           = var.hns_enabled
  tags = var.tags
 }
 # Virtual Network & Firewall configuration
 resource "azurerm_storage_account_network_rules" "firewall_rules" {
  resource_group_name  = var.rg_name
  storage_account_name = azurerm_storage_account.st.name
  default_action             = "Allow"
  ip_rules                   = [] # [data.http.ip.body]
  virtual_network_subnet_ids = var.firewall_virtual_network_subnet_ids
  bypass                     = var.firewall_bypass
 }
 # DNS Zones
 resource "azurerm_private_dns_zone" "st_zone_blob" {
  name                = "privatelink.blob.core.windows.net"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_private_dns_zone" "st_zone_file" {
  name                = "privatelink.file.core.windows.net"
  resource_group_name = var.rg_name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Linking of DNS zones to Virtual Network
 resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_blob" {
  name                  = "${var.prefix}${var.postfix}_link_st_blob"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.st_zone_blob[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_file" {
  name                  = "${var.prefix}${var.postfix}_link_st_file"
  resource_group_name   = var.rg_name
  private_dns_zone_name = azurerm_private_dns_zone.st_zone_file[0].name
  virtual_network_id    = var.vnet_id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Private Endpoint configuration
 resource "azurerm_private_endpoint" "st_pe_blob" {
  name                = "pe-${azurerm_storage_account.st.name}-blob"
  location            = var.location
  resource_group_name = var.rg_name
  subnet_id           = var.subnet_id
  private_service_connection {
    name                           = "psc-blob-${var.prefix}-${var.postfix}${var.env}"
    private_connection_resource_id = azurerm_storage_account.st.id
    subresource_names              = ["blob"]
    is_manual_connection           = false
  }
  private_dns_zone_group {
    name                 = "private-dns-zone-group-blob"
    private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_blob[0].id]
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
 resource "azurerm_private_endpoint" "st_pe_file" {
  name                = "pe-${azurerm_storage_account.st.name}-file"
  location            = var.location
  resource_group_name = var.rg_name
  subnet_id           = var.subnet_id
  private_service_connection {
    name                           = "psc-file-${var.prefix}-${var.postfix}${var.env}"
    private_connection_resource_id = azurerm_storage_account.st.id
    subresource_names              = ["file"]
    is_manual_connection           = false
  }
  private_dns_zone_group {
    name                 = "private-dns-zone-group-file"
    private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_file[0].id]
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
--- a/infrastructure/modules/storage-account/outputs.tf
+++ b/infrastructure/modules/storage-account/outputs.tf
@ -1,7 +0,0 @@
 output "id" {
  value = azurerm_storage_account.st.id
 }
 output "name" {
  value = azurerm_storage_account.st.name
 }
--- a/infrastructure/modules/storage-account/variables.tf
+++ b/infrastructure/modules/storage-account/variables.tf
@ -1,58 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the Resource Group"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "hns_enabled" {
  type        = bool
  description = "Hierarchical namespaces enabled/disabled"
  default     = true
 }
 variable "firewall_virtual_network_subnet_ids" {
  default = []
 }
 variable "firewall_bypass" {
  default = ["None"]
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
 variable "vnet_id" {
  type        = string
  description = "The ID of the vnet that should be linked to the DNS zone"
 }
 variable "subnet_id" {
  type        = string
  description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
 }
--- a/infrastructure/modules/virtual-machine/main.tf
+++ b/infrastructure/modules/virtual-machine/main.tf
@ -1,104 +0,0 @@
 resource "azurerm_virtual_machine" "vm" {
  name                  = "wvm-jumphost"
  location              = var.location
  resource_group_name   = var.rg_name
  network_interface_ids = [azurerm_network_interface.vm_nic[0].id]
  vm_size               = "Standard_DS3_v2"
  delete_os_disk_on_termination    = true
  delete_data_disks_on_termination = true
  storage_image_reference {
    publisher = "microsoft-dsvm"
    offer     = "dsvm-win-2019"
    sku       = "server-2019"
    version   = "latest"
  }
  os_profile {
    computer_name  = var.jumphost_username
    admin_username = var.jumphost_username
    admin_password = var.jumphost_password
  }
  os_profile_windows_config {
    provision_vm_agent        = true
    enable_automatic_upgrades = true
  }
  identity {
    type = "SystemAssigned"
  }
  storage_os_disk {
    name              = "disk-${var.prefix}-${var.postfix}${var.env}"
    caching           = "ReadWrite"
    create_option     = "FromImage"
    managed_disk_type = "StandardSSD_LRS"
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
 resource "azurerm_network_interface" "vm_nic" {
  name                = "nic-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  ip_configuration {
    name                          = "configuration"
    private_ip_address_allocation = "Dynamic"
    subnet_id                     = var.subnet_id
    # public_ip_address_id          = azurerm_public_ip.vm_public_ip.id
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
 resource "azurerm_network_security_group" "vm_nsg" {
  name                = "nsg-${var.prefix}-${var.postfix}${var.env}"
  location            = var.location
  resource_group_name = var.rg_name
  security_rule {
    name                       = "RDP"
    priority                   = 1010
    direction                  = "Inbound"
    access                     = "Allow"
    protocol                   = "Tcp"
    source_port_range          = "*"
    destination_port_range     = 3389
    source_address_prefix      = "*"
    destination_address_prefix = "*"
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = var.tags
 }
 resource "azurerm_network_interface_security_group_association" "vm_nsg_association" {
  network_interface_id      = azurerm_network_interface.vm_nic[0].id
  network_security_group_id = azurerm_network_security_group.vm_nsg[0].id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_dev_test_global_vm_shutdown_schedule" "vm_schedule" {
  virtual_machine_id = azurerm_virtual_machine.vm[0].id
  location           = var.location
  enabled            = true
  daily_recurrence_time = "2000"
  timezone              = "W. Europe Standard Time"
  notification_settings {
    enabled = false
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
--- a/infrastructure/modules/virtual-machine/outputs.tf
+++ b/infrastructure/modules/virtual-machine/outputs.tf
--- a/infrastructure/modules/virtual-machine/variables.tf
+++ b/infrastructure/modules/virtual-machine/variables.tf
@ -1,49 +0,0 @@
 variable "rg_name" {
  type        = string
  description = "Resource group name"
 }
 variable "location" {
  type        = string
  description = "Location of the resource group"
 }
 variable "tags" {
  type        = map(string)
  default     = {}
  description = "A mapping of tags which should be assigned to the deployed resource"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for the module name"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for the module name"
 }
 variable "env" {
  type        = string
  description = "Environment prefix"
 }
 variable "jumphost_username" {
  type        = string
  description = "VM username"
 }
 variable "jumphost_password" {
  type        = string
  description = "VM password"
 }
 variable "subnet_id" {
  type        = string
  description = "Subnet ID for the virtual machine"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
--- a/infrastructure/network.tf
+++ b/infrastructure/network.tf
@ -1,131 +0,0 @@
 # Virtual network
 resource "azurerm_virtual_network" "vnet_default" {
  name                = "vnet-${var.prefix}-${var.postfix}${var.environment}"
  resource_group_name = module.resource_group.name
  location            = module.resource_group.location
  address_space       = ["10.0.0.0/16"]
  count = var.enable_aml_secure_workspace ? 1 : 0
  tags = local.tags
 }
 # Subnets
 resource "azurerm_subnet" "snet_default" {
  name                                           = "snet-${var.prefix}-${var.postfix}${var.environment}-default"
  resource_group_name                            = module.resource_group.name
  virtual_network_name                           = azurerm_virtual_network.vnet_default[0].name
  address_prefixes                               = ["10.0.1.0/24"]
  enforce_private_link_endpoint_network_policies = true
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_subnet" "snet_bastion" {
  name                 = "AzureBastionSubnet"
  resource_group_name  = module.resource_group.name
  virtual_network_name = azurerm_virtual_network.vnet_default[0].name
  address_prefixes     = ["10.0.10.0/27"]
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_subnet" "snet_training" {
  name                                           = "snet-${var.prefix}-${var.postfix}${var.environment}-training"
  resource_group_name                            = module.resource_group.name
  virtual_network_name                           = azurerm_virtual_network.vnet_default[0].name
  address_prefixes                               = ["10.0.2.0/24"]
  enforce_private_link_endpoint_network_policies = true
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # Network security groups
 resource "azurerm_network_security_group" "nsg_training" {
  name                = "nsg-${var.prefix}-${var.postfix}${var.environment}-training"
  location            = module.resource_group.location
  resource_group_name = module.resource_group.name
  security_rule {
    name                       = "BatchNodeManagement"
    priority                   = 100
    direction                  = "Inbound"
    access                     = "Allow"
    protocol                   = "Tcp"
    source_port_range          = "*"
    destination_port_range     = "29876-29877"
    source_address_prefix      = "BatchNodeManagement"
    destination_address_prefix = "*"
  }
  security_rule {
    name                       = "AzureMachineLearning"
    priority                   = 110
    direction                  = "Inbound"
    access                     = "Allow"
    protocol                   = "Tcp"
    source_port_range          = "*"
    destination_port_range     = "44224"
    source_address_prefix      = "AzureMachineLearning"
    destination_address_prefix = "*"
  }
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_subnet_network_security_group_association" "nsg-training-link" {
  subnet_id                 = azurerm_subnet.snet_training[0].id
  network_security_group_id = azurerm_network_security_group.nsg_training[0].id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 # User Defined Routes
 resource "azurerm_route_table" "rt_training" {
  name                = "rt-${var.prefix}-${var.postfix}${var.environment}-training"
  location            = module.resource_group.location
  resource_group_name = module.resource_group.name
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_route" "route_training_internet" {
  name                = "Internet"
  resource_group_name = module.resource_group.name
  route_table_name    = azurerm_route_table.rt_training[0].name
  address_prefix      = "0.0.0.0/0"
  next_hop_type       = "Internet"
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_route" "route_training_aml" {
  name                = "AzureMLRoute"
  resource_group_name = module.resource_group.name
  route_table_name    = azurerm_route_table.rt_training[0].name
  address_prefix      = "AzureMachineLearning"
  next_hop_type       = "Internet"
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_route" "route_training_batch" {
  name                = "BatchRoute"
  resource_group_name = module.resource_group.name
  route_table_name    = azurerm_route_table.rt_training[0].name
  address_prefix      = "BatchNodeManagement"
  next_hop_type       = "Internet"
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
 resource "azurerm_subnet_route_table_association" "rt_training_link" {
  subnet_id      = azurerm_subnet.snet_training[0].id
  route_table_id = azurerm_route_table.rt_training[0].id
  count = var.enable_aml_secure_workspace ? 1 : 0
 }
--- a/infrastructure/pipelines/tf-ado-deploy-infra.yml
+++ b/infrastructure/pipelines/tf-ado-deploy-infra.yml
@ -1,68 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 variables:
 - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
    # 'main' branch: PRD environment
    - template: ../../config-infra-prod.yml
 - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
    # 'develop' or feature branches: DEV environment
    - template: ../../config-infra-dev.yml
 parameters:
 - name: jumphost_username
  type: string
  default: "azureuser"
 - name: jumphost_password
  type: string
  default: "ThisIsNotVerySecure!"
 trigger:
 - none
 pool:
  vmImage: $(ap_vm_image)
 resources:
  repositories:
    - repository: mlops-templates
      name: Azure/mlops-templates
      endpoint: github-connection
      type: github
      ref: main #branch name
 stages :
  - stage: CreateStorageAccountForTerraformState
    displayName: Create Storage for Terraform
    jobs:
    - job: CreateStorageForTerraform
      displayName: Create Storage for Terraform
      steps:
      - checkout: self
        path: s/
      - checkout: mlops-templates
        path: s/templates/
      - template: templates/infra/create-resource-group.yml@mlops-templates
      - template: templates/infra/create-storage-account.yml@mlops-templates
      - template: templates/infra/create-storage-container.yml@mlops-templates
  - stage: DeployAzureMachineLearningRG
    displayName: Deploy AML Workspace
    jobs:
    - job:  DeployAMLWorkspace
      displayName: Deploy Terraform
      steps:
      - checkout: self
        path: s/
      - checkout: mlops-templates
        path: s/templates/
      - template: templates/infra/install-terraform.yml@mlops-templates
      - template: templates/infra/run-terraform-init.yml@mlops-templates
      - template: templates/infra/run-terraform-validate.yml@mlops-templates
      - template: templates/infra/run-terraform-plan.yml@mlops-templates
        parameters:
          jumphost_username: ${{parameters.jumphost_username}}
          jumphost_password: ${{parameters.jumphost_password}}
      - template: templates/infra/run-terraform-apply.yml@mlops-templates
        parameters:
            jumphost_username: ${{parameters.jumphost_username}}
            jumphost_password: ${{parameters.jumphost_password}}
--- a/infrastructure/variables.tf
+++ b/infrastructure/variables.tf
@ -1,47 +0,0 @@
 variable "location" {
  type        = string
  description = "Location of the resource group and modules"
 }
 variable "prefix" {
  type        = string
  description = "Prefix for module names"
 }
 variable "environment" {
  type        = string
  description = "Environment information"
 }
 variable "postfix" {
  type        = string
  description = "Postfix for module names"
 }
 variable "enable_aml_computecluster" {
  description = "Variable to enable or disable AML compute cluster"
 }
 variable "enable_aml_secure_workspace" {
  description = "Variable to enable or disable AML secure workspace"
 }
 variable "jumphost_username" {
  type        = string
  description = "VM username"
  default     = "azureuser"
 }
 variable "jumphost_password" {
  type        = string
  description = "VM password"
  default     = "ThisIsNotVerySecure!"
 }
 variable "enable_monitoring" {
  description = "Variable to enable or disable Monitoring"
 }
 variable "client_secret" {
  description = "Service Principal Secret"
 }
--- a/ml-pipelines/cli/azureml-cliv2.ipynb
+++ b/ml-pipelines/cli/azureml-cliv2.ipynb
--- a/ml-pipelines/cli/deploy-batch-endpint.sh
+++ b/ml-pipelines/cli/deploy-batch-endpint.sh
@ -0,0 +1,11 @@
 # DEPLOY
 # Deploy Batch Endpoint
 # create compute cluster to be used by batch cluster
 #az ml compute create -n batch-cluster --type amlcompute --min-instances 0 --max-instances 3
 # create batch endpoint
 az ml batch-endpoint create --file deploy/batch/batch-endpoint.yml
 # create batch deployment
 az ml batch-deployment create --file deploy/batch/batch-deployment.yml --set-default
 # invoke and test endpoint
 #az ml batch-endpoint invoke --name taxi-batch-endpoint --input ../../data/taxi-batch.csv
--- a/ml-pipelines/cli/deploy-online-endpint.sh
+++ b/ml-pipelines/cli/deploy-online-endpint.sh
@ -0,0 +1,12 @@
 # DEPLOY
 # Deploy Online Endpoint
 az configure --defaults group=mldemorg workspace=mldemo location=eastus
 # create online endpoint
 az ml online-endpoint create --file deploy/online/online-endpoint.yml
 # create online deployment
 az ml online-deployment create --file deploy/online/online-deployment.yml 
 # allocate traffic
 az ml online-endpoint update --name taxi-online-endpoint --traffic blue=100
 # invoke and test endpoint
 #az ml online-endpoint invoke --name taxi-online-endpoint --request-file ../../data/taxi-request.json
--- a/ml-pipelines/cli/deploy/batch/batch-deployment.yml
+++ b/ml-pipelines/cli/deploy/batch/batch-deployment.yml
--- a/ml-pipelines/cli/deploy/batch/batch-endpoint.yml
+++ b/ml-pipelines/cli/deploy/batch/batch-endpoint.yml
--- a/ml-pipelines/cli/deploy/online/online-deployment.yml
+++ b/ml-pipelines/cli/deploy/online/online-deployment.yml
--- a/ml-pipelines/cli/deploy/online/online-endpoint.yml
+++ b/ml-pipelines/cli/deploy/online/online-endpoint.yml
--- a/ml-pipelines/cli/train.sh
+++ b/ml-pipelines/cli/train.sh
@ -0,0 +1,12 @@
 # TRAIN
 # Create train job compute cluster
 #az ml compute create --file train/compute.yml
 # Register data asset 
 az ml data create --file train/data.yml
 # Register train environment 
 az ml environment create --file train/environment.yml
 # Create pipeline job
 az ml job create --file train/pipeline.yml
 # Create pipeline job with automl training job
 az ml job create --file train/pipeline_automl.yml
--- a/ml-pipelines/cli/train/compute.yml
+++ b/ml-pipelines/cli/train/compute.yml
--- a/ml-pipelines/cli/train/data.yml
+++ b/ml-pipelines/cli/train/data.yml
--- a/ml-pipelines/cli/train/environment.yml
+++ b/ml-pipelines/cli/train/environment.yml
@ -1,5 +1,5 @@
 $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
 name: taxi-train-env
 image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
-conda_file: ../../../data-science/environment/train-conda.yml
+conda_file: ../../../environment/train-conda.yml
 description: Environment created from a Docker image plus Conda environment to train taxi model.
--- a/ml-pipelines/cli/train/pipeline.yml
+++ b/ml-pipelines/cli/train/pipeline.yml
@ -30,7 +30,7 @@ jobs:
  prep_data:
    name: prep_data
    display_name: prep-data
-    code: ../../../data-science/src/prep
+    code: ../../../components/prep
    command: >-
      python prep.py 
      --raw_data ${{inputs.raw_data}} 
@ -52,7 +52,7 @@ jobs:
  train_model:
    name: train_model
    display_name: train-model
-    code: ../../../data-science/src/train
+    code: ../../../components/train
    command: >-
      python train.py 
      --train_data ${{inputs.train_data}} 
@ -66,7 +66,7 @@ jobs:
  evaluate_model:
    name: evaluate_model
    display_name: evaluate-model
-    code: ../../../data-science/src/evaluate
+    code: ../../../components/evaluate
    command: >-
      python evaluate.py 
      --model_name ${{inputs.model_name}} 
@ -84,7 +84,7 @@ jobs:
  register_model:
    name: register_model
    display_name: register-model
-    code: ../../../data-science/src/register
+    code: ../../../components/register
    command: >-
      python register.py 
      --model_name ${{inputs.model_name}} 
--- a/ml-pipelines/cli/train/pipeline_automl.yml
+++ b/ml-pipelines/cli/train/pipeline_automl.yml
@ -30,7 +30,7 @@ jobs:
  prep_data:
    name: prep_data
    display_name: prep-data
-    code: ../../../data-science/src/prep
+    code: ../../../components/prep
    command: >-
      python prep.py 
      --raw_data ${{inputs.raw_data}} 
@ -74,7 +74,7 @@ jobs:
  register_model:
    name: register_model
    display_name: register-model  
-    code: ../../../data-science/src/register
+    code: ../../../components/register
    command: >-
      python register_automl.py 
      --model_name ${{inputs.model_name}} 
--- a/ml-pipelines/sdk/deploy-batch-endpoint-sdkv2.ipynb
+++ b/ml-pipelines/sdk/deploy-batch-endpoint-sdkv2.ipynb
@ -0,0 +1,390 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import required libraries\n",
    "from azure.ai.ml import MLClient, command, Input, Output, load_component\n",
    "from azure.identity import DefaultAzureCredential\n",
    "from azure.ai.ml.entities import Data, Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enter details of your AML workspace\n",
    "subscription_id = \"<subscription_id>\"\n",
    "resource_group = \"<resouce_group_name>\"\n",
    "workspace = \"<workspace_name>\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "gather": {
     "logged": 1670200031039
    },
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# get a handle to the workspace\n",
    "ml_client = MLClient(\n",
    "    DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Batch Endpoint\n",
    "\n",
    "**Batch endpoints** are endpoints that are used to do batch inferencing on large volumes of data over a period of time. \n",
    "\n",
    "**Batch endpoints** receive pointers to data and run jobs asynchronously to process the data in parallel on compute clusters. Batch endpoints store outputs to a data store for further analysis.\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/endpoint_batch_concept.png\" width = \"700px\" alt=\"Concept batch endpoint\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Create Batch Compute Cluster (Optional)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "gather": {
     "logged": 1668247613855
    }
   },
   "source": [
    "``` python\n",
    "# create compute cluster to be used by batch cluster\n",
    "from azure.ai.ml.entities import AmlCompute\n",
    "\n",
    "my_cluster = AmlCompute(\n",
    "    name=\"batch-cluster\",\n",
    "    type=\"amlcompute\", \n",
    "    size=\"STANDARD_DS3_V2\", \n",
    "    min_instances=0, \n",
    "    max_instances=3,\n",
    "    location=\"westeurope\", \t\n",
    ")\n",
    "ml_client.compute.begin_create_or_update(my_cluster)\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<azure.core.polling._poller.LROPoller at 0x7f085c2d3550>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from azure.ai.ml.entities import AmlCompute\n",
    "\n",
    "try:\n",
    "    ml_client.compute.get(name=\"cpu-test\")\n",
    "    print(\"Compute already exists\")\n",
    "\n",
    "except:\n",
    "    print(\"Compute not found; Proceding to create\")\n",
    "\n",
    "    my_cluster = AmlCompute(\n",
    "        name=\"batch-cluster\",\n",
    "        type=\"amlcompute\", \n",
    "        size=\"STANDARD_DS3_V2\", \n",
    "        min_instances=0, \n",
    "        max_instances=3,\n",
    "    )\n",
    "    ml_client.compute.begin_create_or_update(my_cluster)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Create Batch Endpoint\n",
    "\n",
    "We can create the **batch endpoint** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_batch_endpoint.png\" width = \"700px\" alt=\"Create batch endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "gather": {
     "logged": 1668247623872
    }
   },
   "outputs": [],
   "source": [
    "# create batch endpoint\n",
    "from azure.ai.ml.entities import BatchEndpoint\n",
    "import random\n",
    "\n",
    "rand = random.randint(0, 10000)\n",
    "\n",
    "endpoint_name = f\"taxi-batch-endpoint-{rand}\"\n",
    "batch_endpoint = BatchEndpoint(\n",
    "    name=endpoint_name,\n",
    "    description=\"Taxi batch endpoint\",\n",
    "    tags={\"model\": \"taxi-model@latest\"},\n",
    ")\n",
    "\n",
    "poller = ml_client.begin_create_or_update(batch_endpoint)\n",
    "poller.wait()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Endpoint creation succeeded\n",
      "{'additional_properties': {}, 'id': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourceGroups/mldemorg/providers/Microsoft.MachineLearningServices/workspaces/mldemo/batchEndpoints/taxi-batch-endpoint-6853', 'name': 'taxi-batch-endpoint-6853', 'type': 'Microsoft.MachineLearningServices/workspaces/batchEndpoints', 'system_data': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.SystemData object at 0x7f085c30ceb0>, 'tags': {'model': 'taxi-model@latest'}, 'location': 'eastus2', 'identity': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity object at 0x7f085c30e620>, 'kind': None, 'properties': <azure.ai.ml._restclient.v2022_05_01.models._models_py3.BatchEndpointDetails object at 0x7f085c30e200>, 'sku': None}\n"
     ]
    }
   ],
   "source": [
    "from azure.ai.ml.exceptions import DeploymentException\n",
    "\n",
    "status = poller.status()\n",
    "if status != \"Succeeded\":\n",
    "    raise DeploymentException(status)\n",
    "else:\n",
    "    print(\"Endpoint creation succeeded\")\n",
    "    endpoint = poller.result()\n",
    "    print(endpoint)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Create Batch Deployment\n",
    "\n",
    "We can create the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_batch_deployment.png\" width = \"700px\" alt=\"Create batch deployment cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "gather": {
     "logged": 1668247892781
    }
   },
   "outputs": [],
   "source": [
    "# create batch deployment\n",
    "from azure.ai.ml.entities import BatchDeployment, Model, Environment\n",
    "from azure.ai.ml.constants import BatchDeploymentOutputAction\n",
    "\n",
    "model = \"taxi-model@latest\"\n",
    "\n",
    "batch_deployment = BatchDeployment(\n",
    "    name=\"taxi-batch-dp\",\n",
    "    description=\"this is a sample batch deployment\",\n",
    "    endpoint_name=endpoint_name,\n",
    "    model=model,\n",
    "    compute=\"batch-cluster\",\n",
    "    instance_count=2,\n",
    "    max_concurrency_per_instance=2,\n",
    "    mini_batch_size=10,\n",
    "    output_action=BatchDeploymentOutputAction.APPEND_ROW,\n",
    "    output_file_name=\"predictions.csv\",\n",
    ")\n",
    "\n",
    "poller = ml_client.begin_create_or_update(batch_deployment)\n",
    "poller.wait()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "source": [
    "Set deployment as the default deployment in the endpoint:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "gather": {
     "logged": 1668249096086
    },
    "jupyter": {
     "outputs_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "batch_endpoint = ml_client.batch_endpoints.get(endpoint_name)\n",
    "batch_endpoint.defaults.deployment_name = batch_deployment.name\n",
    "poller = ml_client.batch_endpoints.begin_create_or_update(batch_endpoint)\n",
    "poller.wait()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Invoke and Test Endpoint\n",
    "\n",
    "We can invoke the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/invoke_batch_deployment.png\" width = \"700px\" alt=\"Invoke batch deployment cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "gather": {
     "logged": 1668689480461
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32mUploading taxi-batch.csv\u001b[32m (< 1 MB): 100%|██████████| 133k/133k [00:00<00:00, 7.89MB/s]\n",
      "\u001b[39m\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<azure.ai.ml._restclient.v2020_09_01_dataplanepreview.models._models_py3.BatchJobResource at 0x7f085c160460>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# invoke and test endpoint\n",
    "from azure.ai.ml import Input\n",
    "from azure.ai.ml.constants import AssetTypes, InputOutputModes\n",
    "\n",
    "input = Input(path=\"../../data/taxi-batch.csv\", \n",
    "              type=AssetTypes.URI_FILE, \n",
    "              mode=InputOutputModes.DOWNLOAD)\n",
    "\n",
    "\n",
    "# invoke the endpoint for batch scoring job\n",
    "ml_client.batch_endpoints.invoke(\n",
    "    endpoint_name=endpoint_name,\n",
    "    input=input,\n",
    "    deployment_name=\"taxi-batch-dp\"\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernel_info": {
   "name": "python310-sdkv2"
  },
  "kernelspec": {
   "display_name": "Python 3.10 - SDK V2",
   "language": "python",
   "name": "python310-sdkv2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "microsoft": {
   "host": {
    "AzureML": {
     "notebookHasBeenCompleted": true
    }
   }
  },
  "nteract": {
   "version": "nteract-front-end@1.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/ml-pipelines/sdk/deploy-online-endpoint-sdkv2.ipynb
+++ b/ml-pipelines/sdk/deploy-online-endpoint-sdkv2.ipynb
@ -0,0 +1,356 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import required libraries\n",
    "from azure.ai.ml import MLClient, command, Input, Output, load_component\n",
    "from azure.identity import DefaultAzureCredential\n",
    "from azure.ai.ml.entities import Data, Environment, ManagedOnlineEndpoint\n",
    "from azure.ai.ml.constants import AssetTypes, InputOutputModes\n",
    "from azure.ai.ml.dsl import pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Enter details of your AML workspace\n",
    "subscription_id = \"<subscription_id>\"\n",
    "resource_group = \"<resouce_group_name>\"\n",
    "workspace = \"<workspace_name>\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "gather": {
     "logged": 1670200031039
    },
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# get a handle to the workspace\n",
    "ml_client = MLClient(\n",
    "    DefaultAzureCredential(), subscription_id, resource_group, workspace\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "# Online Endpoint\n",
    "\n",
    "Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.\n",
    "\n",
    "An **endpoint** is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:\n",
    "* Authentication using \"key & token\" based auth\n",
    "* SSL termination\n",
    "* A stable scoring URI (endpoint-name.region.inference.ml.azure.com)\n",
    "\n",
    "A **deployment** is a set of resources required for hosting the model that does the actual inferencing.\n",
    "A single endpoint can contain multiple deployments.\n",
    "\n",
    "Features of the managed online endpoint:\n",
    "\n",
    "* **Test and deploy locally** for faster debugging\n",
    "* Traffic to one deployment can also be **mirrored** (copied) to another deployment.\n",
    "* **Application Insights integration**\n",
    "* Security\n",
    "* Authentication: Key and Azure ML Tokens\n",
    "* Automatic Autoscaling\n",
    "* Visual Studio Code debugging\n",
    "\n",
    "**blue-green deployment**: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/endpoint_concept.png\" width = \"500px\" alt=\"Online Endpoint Concept cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "## 1. Create Online Endpoint\n",
    "\n",
    "We can create an **online endpoint** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_online_endpoint.png\" width = \"700px\" alt=\"Create Online Endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "gather": {
     "logged": 1669584576485
    },
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "from azure.ai.ml.entities import ManagedOnlineEndpoint\n",
    "import random\n",
    "\n",
    "rand = random.randint(0, 10000)\n",
    "\n",
    "endpoint_name = f\"taxi-online-endpoint-{rand}\"\n",
    "# create an online endpoint\n",
    "online_endpoint = ManagedOnlineEndpoint(\n",
    "    name=endpoint_name, \n",
    "    description=\"Taxi online endpoint\",\n",
    "    auth_mode=\"aml_token\",\n",
    ")\n",
    "poller = ml_client.online_endpoints.begin_create_or_update(\n",
    "    online_endpoint,   \n",
    ")\n",
    "\n",
    "poller.wait()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Endpoint creation succeeded\n",
      "ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://taxi-online-endpoint-5807.eastus2.inference.ml.azure.com/score', 'openapi_uri': 'https://taxi-online-endpoint-5807.eastus2.inference.ml.azure.com/swagger.json', 'name': 'taxi-online-endpoint-5807', 'description': 'Taxi online endpoint', 'tags': {}, 'properties': {'azureml.onlineendpointid': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourcegroups/mldemorg/providers/microsoft.machinelearningservices/workspaces/mldemo/onlineendpoints/taxi-online-endpoint-5807', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/oe:00187fbf-e9d1-40fe-becd-8d9bd1713ab3:a7f35f02-2493-40f0-8452-35cd3a20cb73?api-version=2022-02-01-preview'}, 'id': '/subscriptions/14585b9f-5c83-4a76-8055-42149123f99f/resourceGroups/mldemorg/providers/Microsoft.MachineLearningServices/workspaces/mldemo/onlineEndpoints/taxi-online-endpoint-5807', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jomedin2/code/Users/jomedin/mlops-v2/ml-pipelines/sdk', 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x7f06d308df30>, 'auth_mode': 'aml_token', 'location': 'eastus2', 'identity': <azure.ai.ml.entities._credentials.IdentityConfiguration object at 0x7f06c93f9690>, 'traffic': {}, 'mirror_traffic': {}, 'kind': 'Managed'})\n"
     ]
    }
   ],
   "source": [
    "from azure.ai.ml.exceptions import DeploymentException\n",
    "\n",
    "status = poller.status()\n",
    "if status != \"Succeeded\":\n",
    "    raise DeploymentException(status)\n",
    "else:\n",
    "    print(\"Endpoint creation succeeded\")\n",
    "    endpoint = poller.result()\n",
    "    print(endpoint)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "## 2. Create Online Deployment\n",
    "\n",
    "To create a deployment to online endpoint, you need to specify the following elements:\n",
    "\n",
    "* Model files (or specify a registered model in your workspace)\n",
    "* Scoring script - code needed to do scoring/inferencing\n",
    "* Environment - a Docker image with Conda dependencies, or a dockerfile\n",
    "* Compute instance & scale settings\n",
    "\n",
    "Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.\n",
    "\n",
    "We can create an **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_online_deployment.png\" width = \"700px\" alt=\"Create Online Deployment cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "gather": {
     "logged": 1669584886619
    },
    "jupyter": {
     "outputs_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Check: endpoint taxi-online-endpoint-5807 exists\n",
      "data_collector is not a known attribute of class <class 'azure.ai.ml._restclient.v2022_02_01_preview.models._models_py3.ManagedOnlineDeployment'> and will be ignored\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "....................................................................................."
     ]
    }
   ],
   "source": [
    "# create online deployment\n",
    "from azure.ai.ml.entities import ManagedOnlineDeployment, Model, Environment\n",
    "\n",
    "blue_deployment = ManagedOnlineDeployment(\n",
    "    name=\"blue\",\n",
    "    endpoint_name=endpoint_name,\n",
    "    model=\"taxi-model@latest\",\n",
    "    instance_type=\"Standard_DS2_v2\",\n",
    "    instance_count=1,\n",
    ")\n",
    "\n",
    "poller = ml_client.online_deployments.begin_create_or_update(\n",
    "    deployment=blue_deployment\n",
    ")\n",
    "poller.wait()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Allocate Traffic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "gather": {
     "logged": 1670199946158
    }
   },
   "outputs": [],
   "source": [
    "# allocate traffic\n",
    "# blue deployment takes 100 traffic\n",
    "online_endpoint.traffic = {\"blue\": 100}\n",
    "poller = ml_client.begin_create_or_update(online_endpoint)\n",
    "poller.wait()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Invoke and Test Endpoint\n",
    "\n",
    "We can invoke the **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/invoke_online_endpoint.png\" width = \"700px\" alt=\"Invoke online endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "gather": {
     "logged": 1668246829854
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[11.928738280516184, 15.403240743572406]'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# invoke and test endpoint\n",
    "ml_client.online_endpoints.invoke(\n",
    "    endpoint_name=endpoint_name,\n",
    "    request_file=\"../../data/taxi-request.json\",\n",
    ")\n"
   ]
  }
 ],
 "metadata": {
  "kernel_info": {
   "name": "python310-sdkv2"
  },
  "kernelspec": {
   "display_name": "Python 3.10 - SDK V2",
   "language": "python",
   "name": "python310-sdkv2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "microsoft": {
   "host": {
    "AzureML": {
     "notebookHasBeenCompleted": true
    }
   }
  },
  "nteract": {
   "version": "nteract-front-end@1.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/ml-pipelines/sdk/train-sdkv2.ipynb
+++ b/ml-pipelines/sdk/train-sdkv2.ipynb
--- a/mlops/azureml/azureml-cliv2.ipynb
+++ b/mlops/azureml/azureml-cliv2.ipynb
@ -1,605 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For this workshop, you need:\n",
    "\n",
    "* An Azure Machine Learning workspace. \n",
    "* The Azure Machine Learning CLI v2 installed.\n",
    "\n",
    "To install the CLI you can either,\n",
    "\n",
    "Create a compute instance, which already has installed the latest AzureML CLI and is pre-configured for ML workflows.\n",
    "\n",
    "Use the followings commands to install Azure ML CLI v2:\n",
    "\n",
    "```bash\n",
    "az extension add --name ml\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!az extension add --name ml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!az login"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "# Model Training"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "## 1. Create Managed Compute\n",
    "\n",
    "A compute is a designated compute resource where you run your job or host your endpoint. Azure Machine learning supports the following types of compute:\n",
    "\n",
    "- **Compute instance** - a fully configured and managed development environment in the cloud. You can use the instance as a training or inference compute for development and testing. It's similar to a virtual machine on the cloud.\n",
    "\n",
    "- **Compute cluster** - a managed-compute infrastructure that allows you to easily create a cluster of CPU or GPU compute nodes in the cloud.\n",
    "\n",
    "- **Inference cluster** - used to deploy trained machine learning models to Azure Kubernetes Service. You can create an Azure Kubernetes Service (AKS) cluster from your Azure ML workspace, or attach an existing AKS cluster.\n",
    "\n",
    "- **Attached compute** - You can attach your own compute resources to your workspace and use them for training and inference.\n",
    "\n",
    "You can create a compute using the Studio, the cli and the sdk.\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can create a **compute instance** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_compute_instance.png\" width = \"700px\" alt=\"Create Compute Instance cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can create a **compute cluster** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_compute_cluster.png\" width = \"700px\" alt=\"Create Compute Instance cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "\n",
    "Let's create a managed compute cluster for the training workload."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create train job compute cluster\n",
    "!az ml compute create --file train/compute.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "## 2. Register Data Asset\n",
    "\n",
    "**Datastore** - Azure Machine Learning Datastores securely keep the connection information to your data storage on Azure, so you don't have to code it in your scripts.\n",
    "\n",
    "An Azure Machine Learning datastore is a **reference** to an **existing** storage account on Azure. The benefits of creating and using a datastore are:\n",
    "* A common and easy-to-use API to interact with different storage type. \n",
    "* Easier to discover useful datastores when working as a team.\n",
    "* When using credential-based access (service principal/SAS/key), the connection information is secured so you don't have to code it in your scripts.\n",
    "\n",
    "Supported Data Resources: \n",
    "\n",
    "* Azure Storage blob container\n",
    "* Azure Storage file share\n",
    "* Azure Data Lake Gen 1\n",
    "* Azure Data Lake Gen 2\n",
    "* Azure SQL Database \n",
    "* Azure PostgreSQL Database\n",
    "* Azure MySQL Database\n",
    "\n",
    "It is not a requirement to use Azure Machine Learning datastores - you can use storage URIs directly assuming you have access to the underlying data.\n",
    "\n",
    "You can create a datastore using the Studio, the cli and the sdk.\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can create a **datastore** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_datastore.png\" width = \"700px\" alt=\"Create Datastore cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "\n",
    "\n",
    "**Data asset** - Create data assets in your workspace to share with team members, version, and track data lineage.\n",
    "\n",
    "By creating a data asset, you create a reference to the data source location, along with a copy of its metadata. \n",
    "\n",
    "The benefits of creating data assets are:\n",
    "\n",
    "* You can **share and reuse data** with other members of the team such that they do not need to remember file locations.\n",
    "* You can **seamlessly access data** during model training (on any supported compute type) without worrying about connection strings or data paths.\n",
    "* You can **version** the data.\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can create a **data asset** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_data_asset.png\" width = \"700px\" alt=\"Create Data Asset cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# Register data asset \n",
    "!az ml data create --file train/data.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "source": [
    "## 3. Register Train Environment\n",
    "\n",
    "Azure Machine Learning environments define the execution environments for your **jobs** or **deployments** and encapsulate the dependencies for your code. \n",
    "\n",
    "Azure ML uses the environment specification to create the Docker container that your **training** or **scoring code** runs in on the specified compute target.\n",
    "\n",
    "Create an environment from a\n",
    "* conda specification\n",
    "* Docker image\n",
    "* Docker build context\n",
    "\n",
    "There are two types of environments in Azure ML: **curated** and **custom environments**. Curated environments are predefined environments containing popular ML frameworks and tooling. Custom environments are user-defined.\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can register an **environment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_environment.png\" width = \"700px\" alt=\"Create Environment cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# Register train environment \n",
    "!az ml environment create --file train/environment.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    },
    "tags": []
   },
   "source": [
    "## 4. Create Pipeline Job\n",
    "\n",
    "**AML Job**:\n",
    "\n",
    "Azure ML provides several ways to train your models, from code-first solutions to low-code solutions:\n",
    "\n",
    "* Azure ML supports script files in python, R, Java, Julia or C#. All you need to learn is YAML format and command lines to use Azure ML.\n",
    "\n",
    "* Distributed Training: AML supports integrations with popular frameworks, PyTorch and TensorFlow. Both frameworks employ data parallelism & model parallelism for distributed training.\n",
    "\n",
    "* Automated ML - Train models without extensive data science or programming knowledge.\n",
    "\n",
    "* Designer - drag and drop web-based UI.\n",
    "\n",
    "<hr>\n",
    "\n",
    "We can submit a **job** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_job.png\" width = \"700px\" alt=\"Create Job cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "<br>\n",
    "    \n",
    "**AML Pipelines**:\n",
    "\n",
    "An AML pipeline is an independently executable workflow of a complete machine learning task. It helps standardizing the best practices of producing a machine learning model: The core of a machine learning pipeline is to split a complete machine learning task into a multistep workflow. Each step is a manageable component that can be developed, optimized, configured, and automated individually. \n",
    "\n",
    "<hr>\n",
    "\n",
    "We can submit a **pipeline job** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_pipeline.png\" width = \"700px\" alt=\"Create Pipeline cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# Create pipeline job\n",
    "!az ml job create --file train/pipeline.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "source": [
    "# Online Endpoint\n",
    "\n",
    "Online endpoints are endpoints that are used for online (real-time) inferencing. They receive data from clients and can send responses back in real time.\n",
    "\n",
    "An **endpoint** is an HTTPS endpoint that clients can call to receive the inferencing (scoring) output of a trained model. It provides:\n",
    "* Authentication using \"key & token\" based auth\n",
    "* SSL termination\n",
    "* A stable scoring URI (endpoint-name.region.inference.ml.azure.com)\n",
    "\n",
    "A **deployment** is a set of resources required for hosting the model that does the actual inferencing.\n",
    "A single endpoint can contain multiple deployments.\n",
    "\n",
    "Features of the managed online endpoint:\n",
    "\n",
    "* **Test and deploy locally** for faster debugging\n",
    "* Traffic to one deployment can also be **mirrored** (copied) to another deployment.\n",
    "* **Application Insights integration**\n",
    "* Security\n",
    "* Authentication: Key and Azure ML Tokens\n",
    "* Automatic Autoscaling\n",
    "* Visual Studio Code debugging\n",
    "\n",
    "**blue-green deployment**: An approach where a new version of a web service is introduced to production by deploying it to a small subset of users/requests before deploying it fully.\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/endpoint_concept.png\" width = \"500px\" alt=\"Online Endpoint Concept cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "source": [
    "## 1. Create Online Endpoint\n",
    "\n",
    "We can create an **online endpoint** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_online_endpoint.png\" width = \"700px\" alt=\"Create Online Endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# create online endpoint\n",
    "!az ml online-endpoint create --file deploy/online/online-endpoint.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Create Online Deployment\n",
    "\n",
    "To create a deployment to online endpoint, you need to specify the following elements:\n",
    "\n",
    "* Model files (or specify a registered model in your workspace)\n",
    "* Scoring script - code needed to do scoring/inferencing\n",
    "* Environment - a Docker image with Conda dependencies, or a dockerfile\n",
    "* Compute instance & scale settings\n",
    "\n",
    "Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated.\n",
    "\n",
    "We can create an **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_online_deployment.png\" width = \"700px\" alt=\"Create Online Deployment cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false,
     "source_hidden": false
    },
    "nteract": {
     "transient": {
      "deleting": false
     }
    }
   },
   "outputs": [],
   "source": [
    "# create online deployment\n",
    "!az ml online-deployment create --file deploy/online/online-deployment.yml "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Allocate Traffic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# allocate traffic\n",
    "!az ml online-endpoint update --name taxi-online-endpoint --traffic blue=100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Invoke and Test Endpoint\n",
    "\n",
    "We can invoke the **online deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/invoke_online_endpoint.png\" width = \"700px\" alt=\"Invoke online endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# invoke and test endpoint\n",
    "!az ml online-endpoint invoke --name taxi-online-endpoint --request-file ../../data/taxi-request.json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model Batch Endpoint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Create Batch Compute Cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create compute cluster to be used by batch cluster\n",
    "!az ml compute create -n batch-cluster --type amlcompute --min-instances 0 --max-instances 3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Create Batch Endpoint\n",
    "\n",
    "We can create the **batch endpoint** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_batch_endpoint.png\" width = \"700px\" alt=\"Create batch endpoint cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create batch endpoint\n",
    "!az ml batch-endpoint create --file deploy/batch/batch-endpoint.yml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Create Batch Deployment\n",
    "\n",
    "We can create the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/create_batch_deployment.png\" width = \"700px\" alt=\"Create batch deployment cli vs sdk\">\n",
    "</center>\n",
    "\n",
    "Note that if you're deploying **MLFlow models**, there's no need to provide **a scoring script** and execution **environment**, as both are autogenerated."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create batch deployment\n",
    "!az ml batch-deployment create --file deploy/batch/batch-deployment.yml --set-default"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Invoke and Test Endpoint\n",
    "\n",
    "We can invoke the **batch deployment** with cli v2 or sdk v2 using the following syntax:\n",
    "\n",
    "<center>\n",
    "<img src=\"../../imgs/invoke_batch_deployment.png\" width = \"700px\" alt=\"Invoke batch deployment cli vs sdk\">\n",
    "</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# invoke and test endpoint\n",
    "!az ml batch-endpoint invoke --name taxi-batch-endpoint --input ../../data/taxi-batch.csv"
   ]
  }
 ],
 "metadata": {
  "kernel_info": {
   "name": "python38-azureml"
  },
  "kernelspec": {
   "display_name": "Python 3.8 - AzureML",
   "language": "python",
   "name": "python38-azureml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "nteract": {
   "version": "nteract-front-end@1.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
--- a/mlops/azureml/azureml-sdkv2.ipynb
+++ b/mlops/azureml/azureml-sdkv2.ipynb
--- a/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml
+++ b/mlops/devops-pipelines/deploy-batch-endpoint-pipeline.yml
@ -1,66 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 variables:
 - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
    # 'main' branch: PRD environment
    - template: ../../config-infra-prod.yml
 - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:  
    # 'develop' or feature branches: DEV environment
    - template: ../../config-infra-dev.yml
 - name: version
  value: aml-cli-v2 
 - name: endpoint_name
  value: taxi-batch-$(namespace)$(postfix)$(environment)
 - name: endpoint_type
  value: batch
 trigger:
 - none
 pool:
  vmImage: ubuntu-20.04
 resources:
  repositories:
    - repository: mlops-templates  # Template Repo
      name: Azure/mlops-templates # need to change org name from "Azure" to your own org
      endpoint: github-connection # need to set up and hardcode
      type: github
      ref: main
 stages:
 - stage: CreateBatchEndpoint
  displayName: Create/Update Batch Endpoint 
  jobs:
    - job: DeployBatchEndpoint
      steps:
      - checkout: self
        path: s/
      - checkout: mlops-templates
        path: s/templates/
      - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
      - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
        parameters:
          cluster_name: batch-cluster # name must match cluster name in deployment file below
          size: STANDARD_DS3_V2
          min_instances: 0
          max_instances: 5
          cluster_tier: dedicated
      - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
        parameters: 
          endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
      - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
        parameters:
          deployment_name: taxi-batch-dp
          deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml      
      - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
        parameters:
          deployment_name: taxi-batch-dp
          sample_request: data/taxi-batch.csv
          request_type: uri_file #either uri_folder or uri_file 
--- a/mlops/devops-pipelines/deploy-model-training-pipeline.yml
+++ b/mlops/devops-pipelines/deploy-model-training-pipeline.yml
@ -1,59 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 variables:
 - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
    # 'main' branch: PRD environment
    - template: ../../config-infra-prod.yml
 - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:  
    # 'develop' or feature branches: DEV environment
    - template: ../../config-infra-dev.yml
 - name: version
  value: aml-cli-v2
 resources:
  repositories:
    - repository: mlops-templates  # Template Repo
      name: Azure/mlops-templates # need to change org name from "Azure" to your own org
      endpoint: github-connection # need to set up and hardcode
      type: github
      ref: main
 trigger:
 - none
 pool:
  vmImage: ubuntu-20.04
 stages:
 - stage: DeployTrainingPipeline
  displayName: Deploy Training Pipeline
  jobs:
    - job: DeployTrainingPipeline
      timeoutInMinutes: 120 # how long to run the job before automatically cancelling
      steps:
      - checkout: self
        path: s/
      - checkout: mlops-templates
        path: s/templates/
      - template: templates/tests/unit-tests.yml@mlops-templates
      - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
      - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
        parameters:
          build_type: conda
          environment_name: taxi-train-env
          environment_file: mlops/azureml/train/environment.yml
          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/register-data.yml@mlops-templates
        parameters:
          data_type: uri_file
          data_name: taxi-data
          data_file: mlops/azureml/train/data.yml
      - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
        parameters: 
          pipeline_file: mlops/azureml/train/pipeline.yml
          experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
          display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
          enable_monitoring: $(enable_monitoring)
--- a/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml
+++ b/mlops/devops-pipelines/deploy-online-endpoint-pipeline.yml
@ -1,61 +0,0 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 variables:
 - ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
    # 'main' branch: PRD environment
    - template: ../../config-infra-prod.yml
 - ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:  
    # 'develop' or feature branches: DEV environment
    - template: ../../../../config-infra-dev.yml
 - name: version
  value: aml-cli-v2 
 - name: endpoint_name
  value: taxi-online-$(namespace)$(postfix)$(environment)
 - name: endpoint_type
  value: online
 trigger:
 - none
 pool:
  vmImage: ubuntu-20.04
 resources:
  repositories:
    - repository: mlops-templates  # Template Repo
      name: Azure/mlops-templates # need to change org name from "Azure" to your own org
      endpoint: github-connection # need to set up and hardcode
      type: github
      ref: main
 stages:
 - stage: CreateOnlineEndpoint
  displayName: Create/Update Online Endpoint 
  jobs:
    - job: DeployOnlineEndpoint
      steps:
      - checkout: self
        path: s/
      - checkout: mlops-templates
        path: s/templates/
      - template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
      - template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
      - template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
        parameters:
          endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
      - template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
        parameters:
          deployment_name: taxi-online-dp
          deployment_file: mlops/azureml/deploy/online/online-deployment.yml 
      - template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
        parameters:
          traffic_allocation: taxi-online-dp=100
      - template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
        parameters:
          deployment_name: taxi-online-dp
          sample_request: data/taxi-request.json
          request_type: json
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +0,0 @@
 black==22.3.0
 flake8==4.0.1
 isort==5.10.1
 pre-commit==2.19.0