initial commit
This commit is contained in:
Коммит
12bd3f85d3
|
@ -0,0 +1,141 @@
|
|||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# Mac stuff
|
||||
.DS_Store
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
pip-wheel-metadata/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# Terraform
|
||||
.terraform.lock.hcl
|
||||
terraform.tfstate
|
||||
terraform.tfstate.backup
|
||||
.terraform.tfstate.lock.info
|
||||
.terraform
|
||||
terraform.tfvars
|
||||
|
||||
/infrastructure/bicep/main.json
|
||||
! /infrastructure/bicep/bicepconfig.json
|
|
@ -0,0 +1,14 @@
|
|||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.2.0
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
|
||||
# Opinionated code formatter to forget about formatting
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 21.12b0
|
||||
hooks:
|
||||
- id: black
|
||||
additional_dependencies: ['click==8.0.4']
|
|
@ -0,0 +1,9 @@
|
|||
# Microsoft Open Source Code of Conduct
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
|
||||
Resources:
|
||||
|
||||
- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
|
||||
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
|
||||
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
|
@ -0,0 +1,3 @@
|
|||
# Azure MLOps (v2) solution accelerator
|
||||
|
||||
[Main README file](https://github.com/Azure/mlops-v2/blob/main/README.md)
|
|
@ -0,0 +1,41 @@
|
|||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
|
@ -0,0 +1,25 @@
|
|||
# TODO: The maintainer of this repo has not yet edited this file
|
||||
|
||||
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
|
||||
|
||||
- **No CSS support:** Fill out this template with information about how to file issues and get help.
|
||||
- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
|
||||
- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
|
||||
|
||||
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
|
||||
|
||||
# Support
|
||||
|
||||
## How to file issues and get help
|
||||
|
||||
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
|
||||
issues before filing new issues to avoid duplicates. For new issues, file your bug or
|
||||
feature request as a new Issue.
|
||||
|
||||
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
|
||||
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
|
||||
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
|
||||
|
||||
## Microsoft Support Policy
|
||||
|
||||
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
|
|
@ -0,0 +1,38 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
|
||||
# Global
|
||||
ap_vm_image: ubuntu-20.04
|
||||
|
||||
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 0659
|
||||
location: westus
|
||||
|
||||
environment: dev
|
||||
enable_aml_computecluster: true
|
||||
enable_aml_secure_workspace: true
|
||||
enable_monitoring: false
|
||||
|
||||
# Azure DevOps
|
||||
ado_service_connection_rg: Azure-ARM-Dev
|
||||
ado_service_connection_aml_ws: Azure-ARM-Dev
|
||||
|
||||
# DO NOT TOUCH
|
||||
|
||||
# For pipeline reference
|
||||
resource_group: rg-$(namespace)-$(postfix)$(environment)
|
||||
aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
|
||||
application_insights: mlw-$(namespace)-$(postfix)$(environment)
|
||||
key_vault: kv-$(namespace)-$(postfix)$(environment)
|
||||
container_registry: cr$(namespace)$(postfix)$(environment)
|
||||
storage_account: st$(namespace)$(postfix)$(environment)
|
||||
|
||||
# For terraform reference
|
||||
terraform_version: 0.14.7
|
||||
terraform_workingdir: infrastructure/terraform
|
||||
terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
|
||||
terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
|
||||
terraform_st_container_name: default
|
||||
terraform_st_key: mlops-tab
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
# Prod environment
|
||||
variables:
|
||||
|
||||
# Global
|
||||
ap_vm_image: ubuntu-20.04
|
||||
|
||||
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 0518
|
||||
location: westeurope
|
||||
environment: prod
|
||||
enable_aml_computecluster: true
|
||||
enable_aml_secure_workspace: false
|
||||
enable_monitoring: true
|
||||
|
||||
|
||||
# Azure DevOps
|
||||
ado_service_connection_rg: Azure-ARM-Prod
|
||||
ado_service_connection_aml_ws: Azure-ARM-Prod
|
||||
|
||||
# DO NOT TOUCH
|
||||
|
||||
# For pipeline reference
|
||||
resource_group: rg-$(namespace)-$(postfix)$(environment)
|
||||
aml_workspace: mlw-$(namespace)-$(postfix)$(environment)
|
||||
application_insights: mlw-$(namespace)-$(postfix)$(environment)
|
||||
key_vault: kv-$(namespace)-$(postfix)$(environment)
|
||||
container_registry: cr$(namespace)$(postfix)$(environment)
|
||||
storage_account: st$(namespace)$(postfix)$(environment)
|
||||
|
||||
# For terraform reference
|
||||
terraform_version: 0.14.7
|
||||
terraform_workingdir: infrastructure
|
||||
terraform_st_resource_group: rg-$(namespace)-$(postfix)$(environment)-tf
|
||||
terraform_st_storage_account: st$(namespace)$(postfix)$(environment)tf
|
||||
terraform_st_container_name: default
|
||||
terraform_st_key: mlops-tab
|
|
@ -0,0 +1,16 @@
|
|||
channels:
|
||||
- defaults
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.7.5
|
||||
- pip
|
||||
- pip:
|
||||
- azureml-mlflow==1.38.0
|
||||
- azureml-sdk==1.38.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
|
@ -0,0 +1,16 @@
|
|||
channels:
|
||||
- defaults
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.7.5
|
||||
- pip
|
||||
- pip:
|
||||
- azureml-mlflow==1.38.0
|
||||
- azureml-sdk==1.38.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,264 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import argparse\n",
|
||||
"\n",
|
||||
"from pathlib import Path\n",
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import mlflow"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"TARGET_COL = \"cost\"\n",
|
||||
"\n",
|
||||
"NUMERIC_COLS = [\n",
|
||||
" \"distance\",\n",
|
||||
" \"dropoff_latitude\",\n",
|
||||
" \"dropoff_longitude\",\n",
|
||||
" \"passengers\",\n",
|
||||
" \"pickup_latitude\",\n",
|
||||
" \"pickup_longitude\",\n",
|
||||
" \"pickup_weekday\",\n",
|
||||
" \"pickup_month\",\n",
|
||||
" \"pickup_monthday\",\n",
|
||||
" \"pickup_hour\",\n",
|
||||
" \"pickup_minute\",\n",
|
||||
" \"pickup_second\",\n",
|
||||
" \"dropoff_weekday\",\n",
|
||||
" \"dropoff_month\",\n",
|
||||
" \"dropoff_monthday\",\n",
|
||||
" \"dropoff_hour\",\n",
|
||||
" \"dropoff_minute\",\n",
|
||||
" \"dropoff_second\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"CAT_NOM_COLS = [\n",
|
||||
" \"store_forward\",\n",
|
||||
" \"vendor\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"CAT_ORD_COLS = [\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define Arguments for this step\n",
|
||||
"\n",
|
||||
"class MyArgs:\n",
|
||||
" def __init__(self, /, **kwargs):\n",
|
||||
" self.__dict__.update(kwargs)\n",
|
||||
"\n",
|
||||
"args = MyArgs(\n",
|
||||
" raw_data = \"../../data/\", \n",
|
||||
" train_data = \"/tmp/prep/train\",\n",
|
||||
" val_data = \"/tmp/prep/val\",\n",
|
||||
" test_data = \"/tmp/prep/test\",\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"os.makedirs(args.train_data, exist_ok = True)\n",
|
||||
"os.makedirs(args.val_data, exist_ok = True)\n",
|
||||
"os.makedirs(args.test_data, exist_ok = True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def main(args):\n",
|
||||
" '''Read, split, and save datasets'''\n",
|
||||
"\n",
|
||||
" # ------------ Reading Data ------------ #\n",
|
||||
" # -------------------------------------- #\n",
|
||||
"\n",
|
||||
" print(\"mounted_path files: \")\n",
|
||||
" arr = os.listdir(args.raw_data)\n",
|
||||
" print(arr)\n",
|
||||
"\n",
|
||||
" data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))\n",
|
||||
" data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]\n",
|
||||
"\n",
|
||||
" # ------------- Split Data ------------- #\n",
|
||||
" # -------------------------------------- #\n",
|
||||
"\n",
|
||||
" # Split data into train, val and test datasets\n",
|
||||
"\n",
|
||||
" random_data = np.random.rand(len(data))\n",
|
||||
"\n",
|
||||
" msk_train = random_data < 0.7\n",
|
||||
" msk_val = (random_data >= 0.7) & (random_data < 0.85)\n",
|
||||
" msk_test = random_data >= 0.85\n",
|
||||
"\n",
|
||||
" train = data[msk_train]\n",
|
||||
" val = data[msk_val]\n",
|
||||
" test = data[msk_test]\n",
|
||||
"\n",
|
||||
" mlflow.log_metric('train size', train.shape[0])\n",
|
||||
" mlflow.log_metric('val size', val.shape[0])\n",
|
||||
" mlflow.log_metric('test size', test.shape[0])\n",
|
||||
"\n",
|
||||
" train.to_parquet((Path(args.train_data) / \"train.parquet\"))\n",
|
||||
" val.to_parquet((Path(args.val_data) / \"val.parquet\"))\n",
|
||||
" test.to_parquet((Path(args.test_data) / \"test.parquet\"))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Raw data path: ../../data/\n",
|
||||
"Train dataset output path: /tmp/prep/train\n",
|
||||
"Val dataset output path: /tmp/prep/val\n",
|
||||
"Test dataset path: /tmp/prep/test\n",
|
||||
"mounted_path files: \n",
|
||||
"['taxi-batch.csv', 'taxi-data.csv', 'taxi-request.json']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mlflow.start_run()\n",
|
||||
"\n",
|
||||
"lines = [\n",
|
||||
" f\"Raw data path: {args.raw_data}\",\n",
|
||||
" f\"Train dataset output path: {args.train_data}\",\n",
|
||||
" f\"Val dataset output path: {args.val_data}\",\n",
|
||||
" f\"Test dataset path: {args.test_data}\",\n",
|
||||
"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for line in lines:\n",
|
||||
" print(line)\n",
|
||||
"\n",
|
||||
"main(args)\n",
|
||||
"\n",
|
||||
"mlflow.end_run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
},
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Volume in drive C is Local Disk\n",
|
||||
" Volume Serial Number is 583C-74B4\n",
|
||||
"\n",
|
||||
" Directory of c:\\tmp\\prep\\train\n",
|
||||
"\n",
|
||||
"10/07/2022 12:08 AM <DIR> .\n",
|
||||
"10/07/2022 12:08 AM <DIR> ..\n",
|
||||
"10/07/2022 12:08 AM 277,190 train.parquet\n",
|
||||
" 1 File(s) 277,190 bytes\n",
|
||||
" 2 Dir(s) 788,218,421,248 bytes free\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ls \"/tmp/prep/train\" "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernel_info": {
|
||||
"name": "local-env"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.6 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c87d6401964827bd736fe8e727109b953dd698457ca58fb5acabab22fd6dac41"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import argparse\n",
|
||||
"from pathlib import Path\n",
|
||||
"import pickle\n",
|
||||
"import mlflow\n",
|
||||
"\n",
|
||||
"import os \n",
|
||||
"import json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define Arguments for this step\n",
|
||||
"\n",
|
||||
"class MyArgs:\n",
|
||||
" def __init__(self, /, **kwargs):\n",
|
||||
" self.__dict__.update(kwargs)\n",
|
||||
"\n",
|
||||
"args = MyArgs(\n",
|
||||
" model_name = \"taxi-model\",\n",
|
||||
" model_path = \"/tmp/train\",\n",
|
||||
" evaluation_output = \"/tmp/evaluate\", \n",
|
||||
" model_info_output_path = \"/tmp/model_info_output_path\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def main(args):\n",
|
||||
" '''Loads model, registers it if deply flag is True'''\n",
|
||||
" \n",
|
||||
" with open((Path(args.evaluation_output) / \"deploy_flag\"), 'rb') as infile:\n",
|
||||
" deploy_flag = int(infile.read())\n",
|
||||
"\n",
|
||||
" mlflow.log_metric(\"deploy flag\", int(deploy_flag))\n",
|
||||
" \n",
|
||||
" if deploy_flag==1:\n",
|
||||
"\n",
|
||||
" print(\"Registering \", args.model_name)\n",
|
||||
"\n",
|
||||
" # load model\n",
|
||||
" model = mlflow.sklearn.load_model(args.model_path) \n",
|
||||
"\n",
|
||||
" # log model using mlflow\n",
|
||||
" mlflow.sklearn.log_model(model, args.model_name)\n",
|
||||
"\n",
|
||||
" # register logged model using mlflow\n",
|
||||
" run_id = mlflow.active_run().info.run_id\n",
|
||||
" model_uri = f'runs:/{run_id}/{args.model_name}'\n",
|
||||
" mlflow_model = mlflow.register_model(model_uri, args.model_name)\n",
|
||||
" model_version = mlflow_model.version\n",
|
||||
"\n",
|
||||
" # write model info\n",
|
||||
" print(\"Writing JSON\")\n",
|
||||
" dict = {\"id\": \"{0}:{1}\".format(args.model_name, model_version)}\n",
|
||||
" output_path = os.path.join(args.model_info_output_path, \"model_info.json\")\n",
|
||||
" with open(output_path, \"w\") as of:\n",
|
||||
" json.dump(dict, fp=of)\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" print(\"Model will not be registered!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mlflow.start_run()\n",
|
||||
"\n",
|
||||
"lines = [\n",
|
||||
" f\"Model name: {args.model_name}\",\n",
|
||||
" f\"Model path: {args.model_path}\",\n",
|
||||
" f\"Evaluation output path: {args.evaluation_output}\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for line in lines:\n",
|
||||
" print(line)\n",
|
||||
"\n",
|
||||
"main(args)\n",
|
||||
"\n",
|
||||
"mlflow.end_run()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernel_info": {
|
||||
"name": "local-env"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.6 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c87d6401964827bd736fe8e727109b953dd698457ca58fb5acabab22fd6dac41"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
azureml-mlflow==1.38.0
|
||||
scikit-learn==0.24.1
|
||||
pandas==1.2.1
|
||||
joblib==1.0.0
|
||||
matplotlib==3.3.3
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,191 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
"""
|
||||
Evaluates trained ML model using test dataset.
|
||||
Saves predictions, evaluation results and deploy flag.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
import mlflow.pyfunc
|
||||
from mlflow.tracking import MlflowClient
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
def parse_args():
|
||||
'''Parse input arguments'''
|
||||
|
||||
parser = argparse.ArgumentParser("predict")
|
||||
parser.add_argument("--model_name", type=str, help="Name of registered model")
|
||||
parser.add_argument("--model_input", type=str, help="Path of input model")
|
||||
parser.add_argument("--test_data", type=str, help="Path to test dataset")
|
||||
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
|
||||
parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def main(args):
|
||||
'''Read trained model and test dataset, evaluate model and save result'''
|
||||
|
||||
# Load the test data
|
||||
test_data = pd.read_parquet(Path(args.test_data))
|
||||
|
||||
# Split the data into inputs and outputs
|
||||
y_test = test_data[TARGET_COL]
|
||||
X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
# Load the model from input port
|
||||
model = mlflow.sklearn.load_model(args.model_input)
|
||||
|
||||
# ---------------- Model Evaluation ---------------- #
|
||||
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
|
||||
|
||||
# ----------------- Model Promotion ---------------- #
|
||||
if args.runner == "CloudRunner":
|
||||
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
|
||||
|
||||
|
||||
|
||||
def model_evaluation(X_test, y_test, model, evaluation_output):
|
||||
|
||||
# Get predictions to y_test (y_test)
|
||||
yhat_test = model.predict(X_test)
|
||||
|
||||
# Save the output data with feature columns, predicted cost, and actual cost in csv file
|
||||
output_data = X_test.copy()
|
||||
output_data["real_label"] = y_test
|
||||
output_data["predicted_label"] = yhat_test
|
||||
output_data.to_csv((Path(evaluation_output) / "predictions.csv"))
|
||||
|
||||
# Evaluate Model performance with the test set
|
||||
r2 = r2_score(y_test, yhat_test)
|
||||
mse = mean_squared_error(y_test, yhat_test)
|
||||
rmse = np.sqrt(mse)
|
||||
mae = mean_absolute_error(y_test, yhat_test)
|
||||
|
||||
# Print score report to a text file
|
||||
(Path(evaluation_output) / "score.txt").write_text(
|
||||
f"Scored with the following model:\n{format(model)}"
|
||||
)
|
||||
with open((Path(evaluation_output) / "score.txt"), "a") as outfile:
|
||||
outfile.write("Mean squared error: {mse.2f} \n")
|
||||
outfile.write("Root mean squared error: {rmse.2f} \n")
|
||||
outfile.write("Mean absolute error: {mae.2f} \n")
|
||||
outfile.write("Coefficient of determination: {r2.2f} \n")
|
||||
|
||||
mlflow.log_metric("test r2", r2)
|
||||
mlflow.log_metric("test mse", mse)
|
||||
mlflow.log_metric("test rmse", rmse)
|
||||
mlflow.log_metric("test mae", mae)
|
||||
|
||||
# Visualize results
|
||||
plt.scatter(y_test, yhat_test, color='black')
|
||||
plt.plot(y_test, y_test, color='blue', linewidth=3)
|
||||
plt.xlabel("Real value")
|
||||
plt.ylabel("Predicted value")
|
||||
plt.title("Comparing Model Predictions to Real values - Test Data")
|
||||
plt.savefig("predictions.png")
|
||||
mlflow.log_artifact("predictions.png")
|
||||
|
||||
return yhat_test, r2
|
||||
|
||||
def model_promotion(model_name, evaluation_output, X_test, y_test, yhat_test, score):
|
||||
|
||||
scores = {}
|
||||
predictions = {}
|
||||
|
||||
client = MlflowClient()
|
||||
|
||||
for model_run in client.search_model_versions(f"name='{model_name}'"):
|
||||
model_version = model_run.version
|
||||
mdl = mlflow.pyfunc.load_model(
|
||||
model_uri=f"models:/{model_name}/{model_version}")
|
||||
predictions[f"{model_name}:{model_version}"] = mdl.predict(X_test)
|
||||
scores[f"{model_name}:{model_version}"] = r2_score(
|
||||
y_test, predictions[f"{model_name}:{model_version}"])
|
||||
|
||||
if scores:
|
||||
if score >= max(list(scores.values())):
|
||||
deploy_flag = 1
|
||||
else:
|
||||
deploy_flag = 0
|
||||
else:
|
||||
deploy_flag = 1
|
||||
print(f"Deploy flag: {deploy_flag}")
|
||||
|
||||
with open((Path(evaluation_output) / "deploy_flag"), 'w') as outfile:
|
||||
outfile.write(f"{int(deploy_flag)}")
|
||||
|
||||
# add current model score and predictions
|
||||
scores["current model"] = score
|
||||
predictions["currrent model"] = yhat_test
|
||||
|
||||
perf_comparison_plot = pd.DataFrame(
|
||||
scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
|
||||
perf_comparison_plot.figure.savefig("perf_comparison.png")
|
||||
perf_comparison_plot.figure.savefig(Path(evaluation_output) / "perf_comparison.png")
|
||||
|
||||
mlflow.log_metric("deploy flag", bool(deploy_flag))
|
||||
mlflow.log_artifact("perf_comparison.png")
|
||||
|
||||
return predictions, deploy_flag
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
mlflow.start_run()
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Model name: {args.model_name}",
|
||||
f"Model path: {args.model_input}",
|
||||
f"Test data path: {args.test_data}",
|
||||
f"Evaluation output path: {args.evaluation_output}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
main(args)
|
||||
|
||||
mlflow.end_run()
|
|
@ -0,0 +1,149 @@
|
|||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
|
||||
import mlflow
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
def test_evaluate_model():
|
||||
|
||||
test_data = "/tmp/test"
|
||||
model_input = "/tmp/model"
|
||||
evaluation_output = "/tmp/evaluate"
|
||||
model_name = "taxi-model"
|
||||
runner = "LocalRunner"
|
||||
|
||||
os.makedirs(test_data, exist_ok = True)
|
||||
os.makedirs(model_input, exist_ok = True)
|
||||
os.makedirs(evaluation_output, exist_ok = True)
|
||||
|
||||
|
||||
data = {
|
||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
||||
21, 16, 4, 10],
|
||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
||||
40.66606140136719, 40.64547729492188],
|
||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
||||
-73.98030853271484, -73.92549896240234,
|
||||
-73.91104125976562, -73.89237213134766,
|
||||
-73.94535064697266, -74.01203918457031,
|
||||
-73.97817993164062, -73.99366760253906,
|
||||
-73.94902801513672, -73.98792266845703,
|
||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
||||
-73.96553039550781, -73.9442138671875,
|
||||
-73.97544860839844, -73.87281036376953,
|
||||
-73.77632141113281],
|
||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
||||
5, 20, 41, 46],
|
||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
||||
4, 34, 21, 6, 36],
|
||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
||||
3],
|
||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
||||
15, 4, 10],
|
||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
||||
40.660118103027344, 40.78546905517578],
|
||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
||||
-73.96582794189453, -73.94767761230469,
|
||||
-73.96052551269531, -73.96453094482422,
|
||||
-73.88248443603516, -73.92410278320312,
|
||||
-73.95661163330078, -73.92512512207031,
|
||||
-73.94800567626953, -73.95987701416016,
|
||||
-73.94915771484375],
|
||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
||||
56, 49, 23, 18],
|
||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
||||
41, 53, 43, 2],
|
||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
||||
}
|
||||
|
||||
|
||||
# Save the data
|
||||
df = pd.DataFrame(data)
|
||||
df.to_parquet(os.path.join(test_data, "test.parquet"))
|
||||
|
||||
# Split the data into inputs and outputs
|
||||
y_test = df[TARGET_COL]
|
||||
X_test = df[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
# Train a Random Forest Regression Model with the training set
|
||||
model = RandomForestRegressor(random_state=0)
|
||||
model.fit(X_test, y_test)
|
||||
|
||||
# Save the model
|
||||
mlflow.sklearn.save_model(sk_model=model, path=model_input)
|
||||
|
||||
|
||||
cmd = f"python data-science/src/evaluate/evaluate.py --model_name={model_name} --model_input={model_input} --test_data={test_data} --evaluation_output={evaluation_output} --runner={runner}"
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
out, err = p.communicate()
|
||||
result = str(out).split('\\n')
|
||||
for lin in result:
|
||||
if not lin.startswith('#'):
|
||||
print(lin)
|
||||
|
||||
assert os.path.exists(os.path.join(evaluation_output, "predictions.csv"))
|
||||
assert os.path.exists(os.path.join(evaluation_output, "score.txt"))
|
||||
|
||||
print("Train Model Unit Test Completed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_evaluate_model()
|
|
@ -0,0 +1,132 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
"""
|
||||
Prepares raw data and provides training, validation and test datasets
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import mlflow
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
def parse_args():
|
||||
'''Parse input arguments'''
|
||||
|
||||
parser = argparse.ArgumentParser("prep")
|
||||
parser.add_argument("--raw_data", type=str, help="Path to raw data")
|
||||
parser.add_argument("--train_data", type=str, help="Path to train dataset")
|
||||
parser.add_argument("--val_data", type=str, help="Path to test dataset")
|
||||
parser.add_argument("--test_data", type=str, help="Path to test dataset")
|
||||
|
||||
parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
|
||||
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def log_training_data(df, table_name):
|
||||
from obs.collector import Online_Collector
|
||||
collector = Online_Collector(table_name)
|
||||
collector.batch_collect(df)
|
||||
|
||||
def main(args):
|
||||
'''Read, split, and save datasets'''
|
||||
|
||||
# ------------ Reading Data ------------ #
|
||||
# -------------------------------------- #
|
||||
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.raw_data)
|
||||
print(arr)
|
||||
|
||||
data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
|
||||
data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
|
||||
|
||||
# ------------- Split Data ------------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
# Split data into train, val and test datasets
|
||||
|
||||
random_data = np.random.rand(len(data))
|
||||
|
||||
msk_train = random_data < 0.7
|
||||
msk_val = (random_data >= 0.7) & (random_data < 0.85)
|
||||
msk_test = random_data >= 0.85
|
||||
|
||||
train = data[msk_train]
|
||||
val = data[msk_val]
|
||||
test = data[msk_test]
|
||||
|
||||
mlflow.log_metric('train size', train.shape[0])
|
||||
mlflow.log_metric('val size', val.shape[0])
|
||||
mlflow.log_metric('test size', test.shape[0])
|
||||
|
||||
train.to_parquet((Path(args.train_data) / "train.parquet"))
|
||||
val.to_parquet((Path(args.val_data) / "val.parquet"))
|
||||
test.to_parquet((Path(args.test_data) / "test.parquet"))
|
||||
|
||||
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
|
||||
log_training_data(data, args.table_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
mlflow.start_run()
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Raw data path: {args.raw_data}",
|
||||
f"Train dataset output path: {args.train_data}",
|
||||
f"Val dataset output path: {args.val_data}",
|
||||
f"Test dataset path: {args.test_data}",
|
||||
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
main(args)
|
||||
|
||||
mlflow.end_run()
|
||||
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
import os
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
|
||||
def test_prep_data():
|
||||
|
||||
raw_data = "/tmp/raw"
|
||||
train_data = "/tmp/train"
|
||||
val_data = "/tmp/val"
|
||||
test_data = "/tmp/test"
|
||||
|
||||
os.makedirs(raw_data, exist_ok = True)
|
||||
os.makedirs(train_data, exist_ok = True)
|
||||
os.makedirs(val_data, exist_ok = True)
|
||||
os.makedirs(test_data, exist_ok = True)
|
||||
|
||||
|
||||
data = {
|
||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
||||
21, 16, 4, 10],
|
||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
||||
40.66606140136719, 40.64547729492188],
|
||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
||||
-73.98030853271484, -73.92549896240234,
|
||||
-73.91104125976562, -73.89237213134766,
|
||||
-73.94535064697266, -74.01203918457031,
|
||||
-73.97817993164062, -73.99366760253906,
|
||||
-73.94902801513672, -73.98792266845703,
|
||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
||||
-73.96553039550781, -73.9442138671875,
|
||||
-73.97544860839844, -73.87281036376953,
|
||||
-73.77632141113281],
|
||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
||||
5, 20, 41, 46],
|
||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
||||
4, 34, 21, 6, 36],
|
||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
||||
3],
|
||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
||||
15, 4, 10],
|
||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
||||
40.660118103027344, 40.78546905517578],
|
||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
||||
-73.96582794189453, -73.94767761230469,
|
||||
-73.96052551269531, -73.96453094482422,
|
||||
-73.88248443603516, -73.92410278320312,
|
||||
-73.95661163330078, -73.92512512207031,
|
||||
-73.94800567626953, -73.95987701416016,
|
||||
-73.94915771484375],
|
||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
||||
56, 49, 23, 18],
|
||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
||||
41, 53, 43, 2],
|
||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
||||
}
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
df.to_csv(os.path.join(raw_data, "taxi-data.csv"))
|
||||
|
||||
cmd = f"python data-science/src/prep/prep.py --raw_data={raw_data} --train_data={train_data} --val_data={val_data} --test_data={test_data}"
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
out, err = p.communicate()
|
||||
result = str(out).split('\\n')
|
||||
for lin in result:
|
||||
if not lin.startswith('#'):
|
||||
print(lin)
|
||||
|
||||
assert os.path.exists(os.path.join(train_data, "train.parquet"))
|
||||
assert os.path.exists(os.path.join(val_data, "val.parquet"))
|
||||
assert os.path.exists(os.path.join(test_data, "test.parquet"))
|
||||
|
||||
print("¨Prep Data Unit Test Completed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
test_prep_data()
|
|
@ -0,0 +1,85 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
"""
|
||||
Registers trained ML model if deploy flag is True.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
import mlflow
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
def parse_args():
|
||||
'''Parse input arguments'''
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
|
||||
parser.add_argument('--model_path', type=str, help='Model directory')
|
||||
parser.add_argument('--evaluation_output', type=str, help='Path of eval results')
|
||||
parser.add_argument(
|
||||
"--model_info_output_path", type=str, help="Path to write model info JSON"
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
print(f'Arguments: {args}')
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main(args):
|
||||
'''Loads model, registers it if deply flag is True'''
|
||||
|
||||
with open((Path(args.evaluation_output) / "deploy_flag"), 'rb') as infile:
|
||||
deploy_flag = int(infile.read())
|
||||
|
||||
mlflow.log_metric("deploy flag", int(deploy_flag))
|
||||
deploy_flag=1
|
||||
if deploy_flag==1:
|
||||
|
||||
print("Registering ", args.model_name)
|
||||
|
||||
# load model
|
||||
model = mlflow.sklearn.load_model(args.model_path)
|
||||
|
||||
# log model using mlflow
|
||||
mlflow.sklearn.log_model(model, args.model_name)
|
||||
|
||||
# register logged model using mlflow
|
||||
run_id = mlflow.active_run().info.run_id
|
||||
model_uri = f'runs:/{run_id}/{args.model_name}'
|
||||
mlflow_model = mlflow.register_model(model_uri, args.model_name)
|
||||
model_version = mlflow_model.version
|
||||
|
||||
# write model info
|
||||
print("Writing JSON")
|
||||
dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
|
||||
output_path = os.path.join(args.model_info_output_path, "model_info.json")
|
||||
with open(output_path, "w") as of:
|
||||
json.dump(dict, fp=of)
|
||||
|
||||
else:
|
||||
print("Model will not be registered!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
mlflow.start_run()
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Model name: {args.model_name}",
|
||||
f"Model path: {args.model_path}",
|
||||
f"Evaluation output path: {args.evaluation_output}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
main(args)
|
||||
|
||||
mlflow.end_run()
|
|
@ -0,0 +1,93 @@
|
|||
import os
|
||||
import subprocess
|
||||
import pandas as pd
|
||||
|
||||
def test_train_model():
|
||||
|
||||
train_data = "/tmp/train"
|
||||
model_output = "/tmp/model"
|
||||
|
||||
os.makedirs(train_data, exist_ok = True)
|
||||
os.makedirs(model_output, exist_ok = True)
|
||||
|
||||
data = {
|
||||
'cost': [4.5, 6.0, 9.5, 4.0, 6.0, 11.5, 25.0, 3.5, 5.0, 11.0, 7.5, 24.5, 9.5,
|
||||
7.5, 6.0, 5.0, 9.0, 25.5, 17.5, 52.0],
|
||||
'distance': [0.83, 1.27, 1.8, 0.5, 0.9, 2.72, 6.83, 0.45, 0.77, 2.2, 1.5, 6.27,
|
||||
2.0, 1.54, 1.24, 0.75, 2.2, 7.0, 5.1, 18.51],
|
||||
'dropoff_hour': [21, 21, 9, 17, 10, 13, 17, 10, 2, 1, 16, 18, 20, 20, 1, 17,
|
||||
21, 16, 4, 10],
|
||||
'dropoff_latitude': [40.69454574584961, 40.81214904785156, 40.67874145507813,
|
||||
40.75471496582031, 40.66966247558594, 40.77496337890625,
|
||||
40.75603103637695, 40.67219161987305, 40.66605758666992,
|
||||
40.69973754882813, 40.61215972900391, 40.74581146240234,
|
||||
40.78779602050781, 40.76130676269531, 40.72980117797852,
|
||||
40.71107864379883, 40.747501373291016, 40.752384185791016,
|
||||
40.66606140136719, 40.64547729492188],
|
||||
'dropoff_longitude': [-73.97611236572266, -73.95975494384766,
|
||||
-73.98030853271484, -73.92549896240234,
|
||||
-73.91104125976562, -73.89237213134766,
|
||||
-73.94535064697266, -74.01203918457031,
|
||||
-73.97817993164062, -73.99366760253906,
|
||||
-73.94902801513672, -73.98792266845703,
|
||||
-73.95561218261719, -73.8807601928711, -73.9117202758789,
|
||||
-73.96553039550781, -73.9442138671875,
|
||||
-73.97544860839844, -73.87281036376953,
|
||||
-73.77632141113281],
|
||||
'dropoff_minute': [5, 54, 57, 52, 34, 20, 5, 8, 37, 27, 21, 5, 26, 46, 25, 1,
|
||||
5, 20, 41, 46],
|
||||
'dropoff_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'dropoff_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'dropoff_second': [52, 37, 28, 20, 59, 20, 38, 52, 43, 24, 59, 29, 58, 11, 3,
|
||||
4, 34, 21, 6, 36],
|
||||
'dropoff_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4,
|
||||
3],
|
||||
'passengers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1],
|
||||
'pickup_hour': [21, 21, 9, 17, 10, 13, 16, 10, 2, 1, 16, 17, 20, 20, 1, 16, 20,
|
||||
15, 4, 10],
|
||||
'pickup_latitude': [40.6938362121582, 40.80146789550781, 40.6797981262207,
|
||||
40.76081848144531, 40.66493988037109, 40.74625396728516,
|
||||
40.80010223388672, 40.67601776123047, 40.67120361328125,
|
||||
40.68327331542969, 40.6324462890625, 40.71521377563477,
|
||||
40.80733871459961, 40.750484466552734, 40.7398796081543,
|
||||
40.71691131591797, 40.773414611816406, 40.79001235961914,
|
||||
40.660118103027344, 40.78546905517578],
|
||||
'pickup_longitude': [-73.98726654052734, -73.94845581054688, -73.9554443359375,
|
||||
-73.92293548583984, -73.92304229736328, -73.8973159790039,
|
||||
-73.9500503540039, -74.0144271850586, -73.98458099365234,
|
||||
-73.96582794189453, -73.94767761230469,
|
||||
-73.96052551269531, -73.96453094482422,
|
||||
-73.88248443603516, -73.92410278320312,
|
||||
-73.95661163330078, -73.92512512207031,
|
||||
-73.94800567626953, -73.95987701416016,
|
||||
-73.94915771484375],
|
||||
'pickup_minute': [2, 49, 46, 49, 28, 8, 32, 6, 34, 14, 14, 35, 17, 38, 20, 56,
|
||||
56, 49, 23, 18],
|
||||
'pickup_month': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
'pickup_monthday': [3, 19, 5, 8, 29, 30, 8, 4, 9, 14, 12, 9, 14, 17, 10, 9, 8,
|
||||
2, 15, 21],
|
||||
'pickup_second': [35, 17, 18, 12, 21, 46, 18, 22, 5, 45, 12, 52, 20, 8, 28, 54,
|
||||
41, 53, 43, 2],
|
||||
'pickup_weekday': [6, 1, 1, 4, 4, 5, 4, 0, 5, 3, 1, 5, 3, 6, 6, 5, 4, 5, 4, 3],
|
||||
'store_forward': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
'vendor': [2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2]
|
||||
}
|
||||
|
||||
df = pd.DataFrame(data)
|
||||
df.to_parquet(os.path.join(train_data, "train.parquet"))
|
||||
|
||||
cmd = f"python data-science/src/train/train.py --train_data={train_data} --model_output={model_output}"
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
|
||||
out, err = p.communicate()
|
||||
result = str(out).split('\\n')
|
||||
for lin in result:
|
||||
if not lin.startswith('#'):
|
||||
print(lin)
|
||||
|
||||
assert os.path.exists(os.path.join(model_output, "model.pkl"))
|
||||
|
||||
print("Train Model Unit Test Completed")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_train_model()
|
|
@ -0,0 +1,162 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
"""
|
||||
Trains ML model using training dataset. Saves trained model.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
|
||||
def parse_args():
|
||||
'''Parse input arguments'''
|
||||
|
||||
parser = argparse.ArgumentParser("train")
|
||||
parser.add_argument("--train_data", type=str, help="Path to train dataset")
|
||||
parser.add_argument("--model_output", type=str, help="Path of output model")
|
||||
|
||||
# classifier specific arguments
|
||||
parser.add_argument('--regressor__n_estimators', type=int, default=500,
|
||||
help='Number of trees')
|
||||
parser.add_argument('--regressor__bootstrap', type=int, default=1,
|
||||
help='Method of selecting samples for training each tree')
|
||||
parser.add_argument('--regressor__max_depth', type=int, default=10,
|
||||
help=' Maximum number of levels in tree')
|
||||
parser.add_argument('--regressor__max_features', type=str, default='auto',
|
||||
help='Number of features to consider at every split')
|
||||
parser.add_argument('--regressor__min_samples_leaf', type=int, default=4,
|
||||
help='Minimum number of samples required at each leaf node')
|
||||
parser.add_argument('--regressor__min_samples_split', type=int, default=5,
|
||||
help='Minimum number of samples required to split a node')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def main(args):
|
||||
'''Read train dataset, train model, save trained model'''
|
||||
|
||||
# Read train data
|
||||
train_data = pd.read_parquet(Path(args.train_data))
|
||||
|
||||
# Split the data into input(X) and output(y)
|
||||
y_train = train_data[TARGET_COL]
|
||||
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
# Train a Random Forest Regression Model with the training set
|
||||
model = RandomForestRegressor(n_estimators = args.regressor__n_estimators,
|
||||
bootstrap = args.regressor__bootstrap,
|
||||
max_depth = args.regressor__max_depth,
|
||||
max_features = args.regressor__max_features,
|
||||
min_samples_leaf = args.regressor__min_samples_leaf,
|
||||
min_samples_split = args.regressor__min_samples_split,
|
||||
random_state=0)
|
||||
|
||||
# log model hyperparameters
|
||||
mlflow.log_param("model", "RandomForestRegressor")
|
||||
mlflow.log_param("n_estimators", args.regressor__n_estimators)
|
||||
mlflow.log_param("bootstrap", args.regressor__bootstrap)
|
||||
mlflow.log_param("max_depth", args.regressor__max_depth)
|
||||
mlflow.log_param("max_features", args.regressor__max_features)
|
||||
mlflow.log_param("min_samples_leaf", args.regressor__min_samples_leaf)
|
||||
mlflow.log_param("min_samples_split", args.regressor__min_samples_split)
|
||||
|
||||
# Train model with the train set
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# Predict using the Regression Model
|
||||
yhat_train = model.predict(X_train)
|
||||
|
||||
# Evaluate Regression performance with the train set
|
||||
r2 = r2_score(y_train, yhat_train)
|
||||
mse = mean_squared_error(y_train, yhat_train)
|
||||
rmse = np.sqrt(mse)
|
||||
mae = mean_absolute_error(y_train, yhat_train)
|
||||
|
||||
# log model performance metrics
|
||||
mlflow.log_metric("train r2", r2)
|
||||
mlflow.log_metric("train mse", mse)
|
||||
mlflow.log_metric("train rmse", rmse)
|
||||
mlflow.log_metric("train mae", mae)
|
||||
|
||||
# Visualize results
|
||||
plt.scatter(y_train, yhat_train, color='black')
|
||||
plt.plot(y_train, y_train, color='blue', linewidth=3)
|
||||
plt.xlabel("Real value")
|
||||
plt.ylabel("Predicted value")
|
||||
plt.savefig("regression_results.png")
|
||||
mlflow.log_artifact("regression_results.png")
|
||||
|
||||
# Save the model
|
||||
mlflow.sklearn.save_model(sk_model=model, path=args.model_output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
mlflow.start_run()
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Train dataset input path: {args.train_data}",
|
||||
f"Model output path: {args.model_output}",
|
||||
f"n_estimators: {args.regressor__n_estimators}",
|
||||
f"bootstrap: {args.regressor__bootstrap}",
|
||||
f"max_depth: {args.regressor__max_depth}",
|
||||
f"max_features: {args.regressor__max_features}",
|
||||
f"min_samples_leaf: {args.regressor__min_samples_leaf}",
|
||||
f"min_samples_split: {args.regressor__min_samples_split}"
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
main(args)
|
||||
|
||||
mlflow.end_run()
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,3 @@
|
|||
{"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
|
||||
[3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
# Resource group
|
||||
|
||||
module "resource_group" {
|
||||
source = "./modules/resource-group"
|
||||
|
||||
location = var.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Azure Machine Learning workspace
|
||||
|
||||
module "aml_workspace" {
|
||||
source = "./modules/aml-workspace"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
storage_account_id = module.storage_account_aml.id
|
||||
key_vault_id = module.key_vault.id
|
||||
application_insights_id = module.application_insights.id
|
||||
container_registry_id = module.container_registry.id
|
||||
|
||||
enable_aml_computecluster = var.enable_aml_computecluster
|
||||
storage_account_name = module.storage_account_aml.name
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
||||
subnet_default_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
||||
subnet_training_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_training[0].id : ""
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Storage account
|
||||
|
||||
module "storage_account_aml" {
|
||||
source = "./modules/storage-account"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
hns_enabled = false
|
||||
firewall_bypass = ["AzureServices"]
|
||||
firewall_virtual_network_subnet_ids = []
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Key vault
|
||||
|
||||
module "key_vault" {
|
||||
source = "./modules/key-vault"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Application insights
|
||||
|
||||
module "application_insights" {
|
||||
source = "./modules/application-insights"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Container registry
|
||||
|
||||
module "container_registry" {
|
||||
source = "./modules/container-registry"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
vnet_id = var.enable_aml_secure_workspace ? azurerm_virtual_network.vnet_default[0].id : ""
|
||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
module "data_explorer" {
|
||||
source = "./modules/data-explorer"
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
key_vault_id = module.key_vault.id
|
||||
enable_monitoring = var.enable_monitoring
|
||||
|
||||
client_secret = var.client_secret
|
||||
|
||||
tags = local.tags
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
# Bastion
|
||||
|
||||
module "bastion" {
|
||||
source = "./modules/bastion-host"
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_bastion[0].id : ""
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Virtual machine
|
||||
|
||||
module "virtual_machine_jumphost" {
|
||||
source = "./modules/virtual-machine"
|
||||
|
||||
prefix = var.prefix
|
||||
postfix = var.postfix
|
||||
env = var.environment
|
||||
|
||||
rg_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
subnet_id = var.enable_aml_secure_workspace ? azurerm_subnet.snet_default[0].id : ""
|
||||
jumphost_username = var.jumphost_username
|
||||
jumphost_password = var.jumphost_password
|
||||
|
||||
enable_aml_secure_workspace = var.enable_aml_secure_workspace
|
||||
|
||||
tags = local.tags
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
locals {
|
||||
tags = {
|
||||
Owner = "mlops-v2"
|
||||
Project = "mlops-v2"
|
||||
Environment = "${var.environment}"
|
||||
Toolkit = "terraform"
|
||||
Name = "${var.prefix}"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
terraform {
|
||||
backend "azurerm" {}
|
||||
required_providers {
|
||||
azurerm = {
|
||||
version = "= 2.99.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "azurerm" {
|
||||
features {}
|
||||
}
|
||||
|
||||
data "azurerm_client_config" "current" {}
|
||||
|
||||
data "http" "ip" {
|
||||
url = "https://ifconfig.me"
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
resource "azurerm_machine_learning_workspace" "mlw" {
|
||||
name = "mlw-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
application_insights_id = var.application_insights_id
|
||||
key_vault_id = var.key_vault_id
|
||||
storage_account_id = var.storage_account_id
|
||||
container_registry_id = var.container_registry_id
|
||||
|
||||
sku_name = "Basic"
|
||||
|
||||
identity {
|
||||
type = "SystemAssigned"
|
||||
}
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
# Compute cluster
|
||||
|
||||
resource "azurerm_machine_learning_compute_cluster" "mlw_compute_cluster" {
|
||||
name = "cpu-cluster"
|
||||
location = var.location
|
||||
vm_priority = "LowPriority"
|
||||
vm_size = "Standard_DS3_v2"
|
||||
machine_learning_workspace_id = azurerm_machine_learning_workspace.mlw.id
|
||||
subnet_resource_id = var.enable_aml_secure_workspace ? var.subnet_training_id : ""
|
||||
|
||||
count = var.enable_aml_computecluster ? 1 : 0
|
||||
|
||||
scale_settings {
|
||||
min_node_count = 0
|
||||
max_node_count = 4
|
||||
scale_down_nodes_after_idle_duration = "PT120S" # 120 seconds
|
||||
}
|
||||
}
|
||||
|
||||
# DNS Zones
|
||||
|
||||
resource "azurerm_private_dns_zone" "mlw_zone_api" {
|
||||
name = "privatelink.api.azureml.ms"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_private_dns_zone" "mlw_zone_notebooks" {
|
||||
name = "privatelink.notebooks.azure.net"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Linking of DNS zones to Virtual Network
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_api_link" {
|
||||
name = "${var.prefix}${var.postfix}_link_api"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_api[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "mlw_zone_notebooks_link" {
|
||||
name = "${var.prefix}${var.postfix}_link_notebooks"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.mlw_zone_notebooks[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Private Endpoint configuration
|
||||
|
||||
resource "azurerm_private_endpoint" "mlw_pe" {
|
||||
name = "pe-${azurerm_machine_learning_workspace.mlw.name}-amlw"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
subnet_id = var.subnet_default_id
|
||||
|
||||
private_service_connection {
|
||||
name = "psc-aml-${var.prefix}-${var.postfix}${var.env}"
|
||||
private_connection_resource_id = azurerm_machine_learning_workspace.mlw.id
|
||||
subresource_names = ["amlworkspace"]
|
||||
is_manual_connection = false
|
||||
}
|
||||
|
||||
private_dns_zone_group {
|
||||
name = "private-dns-zone-group-ws"
|
||||
private_dns_zone_ids = [azurerm_private_dns_zone.mlw_zone_api[0].id, azurerm_private_dns_zone.mlw_zone_notebooks[0].id]
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
output "name" {
|
||||
value = azurerm_machine_learning_workspace.mlw.name
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "storage_account_id" {
|
||||
type = string
|
||||
description = "The ID of the Storage Account linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "key_vault_id" {
|
||||
type = string
|
||||
description = "The ID of the Key Vault linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "application_insights_id" {
|
||||
type = string
|
||||
description = "The ID of the Application Insights linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "container_registry_id" {
|
||||
type = string
|
||||
description = "The ID of the Container Registry linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "enable_aml_computecluster" {
|
||||
description = "Variable to enable or disable AML compute cluster"
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "storage_account_name" {
|
||||
type = string
|
||||
description = "The Name of the Storage Account linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
||||
|
||||
variable "vnet_id" {
|
||||
type = string
|
||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
||||
}
|
||||
|
||||
variable "subnet_default_id" {
|
||||
type = string
|
||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
||||
}
|
||||
|
||||
variable "subnet_training_id" {
|
||||
type = string
|
||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
resource "azurerm_application_insights" "appi" {
|
||||
name = "appi-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
application_type = "web"
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
output "id" {
|
||||
value = azurerm_application_insights.appi.id
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
resource "azurerm_bastion_host" "bas" {
|
||||
name = "bas-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
sku = "Standard"
|
||||
copy_paste_enabled = false
|
||||
file_copy_enabled = false
|
||||
|
||||
ip_configuration {
|
||||
name = "configuration"
|
||||
subnet_id = var.subnet_id
|
||||
public_ip_address_id = azurerm_public_ip.pip[0].id
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_public_ip" "pip" {
|
||||
name = "pip-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
allocation_method = "Static"
|
||||
sku = "Standard"
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "Subnet ID for the bastion"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
locals {
|
||||
safe_prefix = replace(var.prefix, "-", "")
|
||||
safe_postfix = replace(var.postfix, "-", "")
|
||||
}
|
||||
|
||||
resource "azurerm_container_registry" "cr" {
|
||||
name = "cr${local.safe_prefix}${local.safe_postfix}${var.env}"
|
||||
resource_group_name = var.rg_name
|
||||
location = var.location
|
||||
sku = var.enable_aml_secure_workspace ? "Premium" : "Standard"
|
||||
admin_enabled = true
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
# DNS Zones
|
||||
|
||||
resource "azurerm_private_dns_zone" "cr_zone" {
|
||||
name = "privatelink.azurecr.io"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Linking of DNS zones to Virtual Network
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "cr_zone_link" {
|
||||
name = "${var.prefix}${var.postfix}_link_acr"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.cr_zone[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Private Endpoint configuration
|
||||
|
||||
resource "azurerm_private_endpoint" "cr_pe" {
|
||||
name = "pe-${azurerm_container_registry.cr.name}-acr"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
subnet_id = var.subnet_id
|
||||
|
||||
private_service_connection {
|
||||
name = "psc-acr-${var.prefix}-${var.postfix}${var.env}"
|
||||
private_connection_resource_id = azurerm_container_registry.cr.id
|
||||
subresource_names = ["registry"]
|
||||
is_manual_connection = false
|
||||
}
|
||||
|
||||
private_dns_zone_group {
|
||||
name = "private-dns-zone-group-acr"
|
||||
private_dns_zone_ids = [azurerm_private_dns_zone.cr_zone[0].id]
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
output "id" {
|
||||
value = azurerm_container_registry.cr.id
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
||||
|
||||
variable "vnet_id" {
|
||||
type = string
|
||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
data "azurerm_client_config" "current" {}
|
||||
|
||||
resource "azurerm_kusto_cluster" "cluster" {
|
||||
name = "adx${var.prefix}${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
streaming_ingestion_enabled = true
|
||||
language_extensions = ["PYTHON"]
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
|
||||
sku {
|
||||
name = "Standard_D11_v2"
|
||||
capacity = 2
|
||||
}
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_kusto_database" "database" {
|
||||
name = "mlmonitoring"
|
||||
resource_group_name = var.rg_name
|
||||
location = var.location
|
||||
cluster_name = azurerm_kusto_cluster.cluster[0].name
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_key_vault_secret" "SP_ID" {
|
||||
name = "kvmonitoringspid"
|
||||
value = data.azurerm_client_config.current.client_id
|
||||
key_vault_id = var.key_vault_id
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_key_vault_secret" "SP_KEY" {
|
||||
name = "kvmonitoringspkey"
|
||||
value = var.client_secret
|
||||
key_vault_id = var.key_vault_id
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_key_vault_secret" "SP_TENANT_ID" {
|
||||
name = "kvmonitoringadxtenantid"
|
||||
value = data.azurerm_client_config.current.tenant_id
|
||||
key_vault_id = var.key_vault_id
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_key_vault_secret" "ADX_URI" {
|
||||
name = "kvmonitoringadxuri"
|
||||
value = azurerm_kusto_cluster.cluster[0].uri
|
||||
key_vault_id = var.key_vault_id
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_key_vault_secret" "ADX_DB" {
|
||||
name = "kvmonitoringadxdb"
|
||||
value = azurerm_kusto_database.database[0].name
|
||||
key_vault_id = var.key_vault_id
|
||||
count = var.enable_monitoring ? 1 : 0
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "key_vault_id" {
|
||||
type = string
|
||||
description = "The ID of the Key Vault linked to AML workspace"
|
||||
}
|
||||
|
||||
variable "enable_monitoring" {
|
||||
description = "Variable to enable or disable AML compute cluster"
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "client_secret" {
|
||||
description = "client secret"
|
||||
default = false
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
data "azurerm_client_config" "current" {}
|
||||
|
||||
resource "azurerm_key_vault" "kv" {
|
||||
name = "kv-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
tenant_id = data.azurerm_client_config.current.tenant_id
|
||||
sku_name = "standard"
|
||||
|
||||
tags = var.tags
|
||||
|
||||
access_policy {
|
||||
tenant_id = data.azurerm_client_config.current.tenant_id
|
||||
object_id = data.azurerm_client_config.current.object_id
|
||||
|
||||
key_permissions = [
|
||||
"Create",
|
||||
"Get",
|
||||
]
|
||||
|
||||
secret_permissions = [
|
||||
"Set",
|
||||
"Get",
|
||||
"Delete",
|
||||
"Purge",
|
||||
"Recover"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# DNS Zones
|
||||
|
||||
resource "azurerm_private_dns_zone" "kv_zone" {
|
||||
name = "privatelink.vaultcore.azure.net"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Linking of DNS zones to Virtual Network
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "kv_zone_link" {
|
||||
name = "${var.prefix}${var.postfix}_link_kv"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.kv_zone[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Private Endpoint configuration
|
||||
|
||||
resource "azurerm_private_endpoint" "kv_pe" {
|
||||
name = "pe-${azurerm_key_vault.kv.name}-vault"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
subnet_id = var.subnet_id
|
||||
|
||||
private_service_connection {
|
||||
name = "psc-kv-${var.prefix}-${var.postfix}${var.env}"
|
||||
private_connection_resource_id = azurerm_key_vault.kv.id
|
||||
subresource_names = ["vault"]
|
||||
is_manual_connection = false
|
||||
}
|
||||
|
||||
private_dns_zone_group {
|
||||
name = "private-dns-zone-group-kv"
|
||||
private_dns_zone_ids = [azurerm_private_dns_zone.kv_zone[0].id]
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
output "id" {
|
||||
value = azurerm_key_vault.kv.id
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
||||
|
||||
variable "vnet_id" {
|
||||
type = string
|
||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
resource "azurerm_resource_group" "adl_rg" {
|
||||
name = "rg-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
output "name" {
|
||||
value = azurerm_resource_group.adl_rg.name
|
||||
}
|
||||
|
||||
output "location" {
|
||||
value = azurerm_resource_group.adl_rg.location
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
variable "location" {
|
||||
type = string
|
||||
default = "North Europe"
|
||||
description = "Location of the Resource Group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the Resource Group"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
data "azurerm_client_config" "current" {}
|
||||
|
||||
data "http" "ip" {
|
||||
url = "https://ifconfig.me"
|
||||
}
|
||||
|
||||
locals {
|
||||
safe_prefix = replace(var.prefix, "-", "")
|
||||
safe_postfix = replace(var.postfix, "-", "")
|
||||
}
|
||||
|
||||
resource "azurerm_storage_account" "st" {
|
||||
name = "st${local.safe_prefix}${local.safe_postfix}${var.env}"
|
||||
resource_group_name = var.rg_name
|
||||
location = var.location
|
||||
account_tier = "Standard"
|
||||
account_replication_type = "LRS"
|
||||
account_kind = "StorageV2"
|
||||
is_hns_enabled = var.hns_enabled
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
# Virtual Network & Firewall configuration
|
||||
|
||||
resource "azurerm_storage_account_network_rules" "firewall_rules" {
|
||||
resource_group_name = var.rg_name
|
||||
storage_account_name = azurerm_storage_account.st.name
|
||||
|
||||
default_action = "Allow"
|
||||
ip_rules = [] # [data.http.ip.body]
|
||||
virtual_network_subnet_ids = var.firewall_virtual_network_subnet_ids
|
||||
bypass = var.firewall_bypass
|
||||
}
|
||||
|
||||
# DNS Zones
|
||||
|
||||
resource "azurerm_private_dns_zone" "st_zone_blob" {
|
||||
name = "privatelink.blob.core.windows.net"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_private_dns_zone" "st_zone_file" {
|
||||
name = "privatelink.file.core.windows.net"
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Linking of DNS zones to Virtual Network
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_blob" {
|
||||
name = "${var.prefix}${var.postfix}_link_st_blob"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.st_zone_blob[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_private_dns_zone_virtual_network_link" "st_zone_link_file" {
|
||||
name = "${var.prefix}${var.postfix}_link_st_file"
|
||||
resource_group_name = var.rg_name
|
||||
private_dns_zone_name = azurerm_private_dns_zone.st_zone_file[0].name
|
||||
virtual_network_id = var.vnet_id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Private Endpoint configuration
|
||||
|
||||
resource "azurerm_private_endpoint" "st_pe_blob" {
|
||||
name = "pe-${azurerm_storage_account.st.name}-blob"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
subnet_id = var.subnet_id
|
||||
|
||||
private_service_connection {
|
||||
name = "psc-blob-${var.prefix}-${var.postfix}${var.env}"
|
||||
private_connection_resource_id = azurerm_storage_account.st.id
|
||||
subresource_names = ["blob"]
|
||||
is_manual_connection = false
|
||||
}
|
||||
|
||||
private_dns_zone_group {
|
||||
name = "private-dns-zone-group-blob"
|
||||
private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_blob[0].id]
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_private_endpoint" "st_pe_file" {
|
||||
name = "pe-${azurerm_storage_account.st.name}-file"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
subnet_id = var.subnet_id
|
||||
|
||||
private_service_connection {
|
||||
name = "psc-file-${var.prefix}-${var.postfix}${var.env}"
|
||||
private_connection_resource_id = azurerm_storage_account.st.id
|
||||
subresource_names = ["file"]
|
||||
is_manual_connection = false
|
||||
}
|
||||
|
||||
private_dns_zone_group {
|
||||
name = "private-dns-zone-group-file"
|
||||
private_dns_zone_ids = [azurerm_private_dns_zone.st_zone_file[0].id]
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
output "id" {
|
||||
value = azurerm_storage_account.st.id
|
||||
}
|
||||
|
||||
output "name" {
|
||||
value = azurerm_storage_account.st.name
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the Resource Group"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "hns_enabled" {
|
||||
type = bool
|
||||
description = "Hierarchical namespaces enabled/disabled"
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "firewall_virtual_network_subnet_ids" {
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "firewall_bypass" {
|
||||
default = ["None"]
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
||||
|
||||
variable "vnet_id" {
|
||||
type = string
|
||||
description = "The ID of the vnet that should be linked to the DNS zone"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "The ID of the subnet from which private IP addresses will be allocated for this Private Endpoint"
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
resource "azurerm_virtual_machine" "vm" {
|
||||
name = "wvm-jumphost"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
network_interface_ids = [azurerm_network_interface.vm_nic[0].id]
|
||||
vm_size = "Standard_DS3_v2"
|
||||
|
||||
delete_os_disk_on_termination = true
|
||||
delete_data_disks_on_termination = true
|
||||
|
||||
storage_image_reference {
|
||||
publisher = "microsoft-dsvm"
|
||||
offer = "dsvm-win-2019"
|
||||
sku = "server-2019"
|
||||
version = "latest"
|
||||
}
|
||||
|
||||
os_profile {
|
||||
computer_name = var.jumphost_username
|
||||
admin_username = var.jumphost_username
|
||||
admin_password = var.jumphost_password
|
||||
}
|
||||
|
||||
os_profile_windows_config {
|
||||
provision_vm_agent = true
|
||||
enable_automatic_upgrades = true
|
||||
}
|
||||
|
||||
identity {
|
||||
type = "SystemAssigned"
|
||||
}
|
||||
|
||||
storage_os_disk {
|
||||
name = "disk-${var.prefix}-${var.postfix}${var.env}"
|
||||
caching = "ReadWrite"
|
||||
create_option = "FromImage"
|
||||
managed_disk_type = "StandardSSD_LRS"
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_network_interface" "vm_nic" {
|
||||
name = "nic-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
ip_configuration {
|
||||
name = "configuration"
|
||||
private_ip_address_allocation = "Dynamic"
|
||||
subnet_id = var.subnet_id
|
||||
# public_ip_address_id = azurerm_public_ip.vm_public_ip.id
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_group" "vm_nsg" {
|
||||
name = "nsg-${var.prefix}-${var.postfix}${var.env}"
|
||||
location = var.location
|
||||
resource_group_name = var.rg_name
|
||||
|
||||
security_rule {
|
||||
name = "RDP"
|
||||
priority = 1010
|
||||
direction = "Inbound"
|
||||
access = "Allow"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = 3389
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "azurerm_network_interface_security_group_association" "vm_nsg_association" {
|
||||
network_interface_id = azurerm_network_interface.vm_nic[0].id
|
||||
network_security_group_id = azurerm_network_security_group.vm_nsg[0].id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_dev_test_global_vm_shutdown_schedule" "vm_schedule" {
|
||||
virtual_machine_id = azurerm_virtual_machine.vm[0].id
|
||||
location = var.location
|
||||
enabled = true
|
||||
|
||||
daily_recurrence_time = "2000"
|
||||
timezone = "W. Europe Standard Time"
|
||||
|
||||
notification_settings {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
variable "rg_name" {
|
||||
type = string
|
||||
description = "Resource group name"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group"
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
description = "A mapping of tags which should be assigned to the deployed resource"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for the module name"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for the module name"
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "Environment prefix"
|
||||
}
|
||||
|
||||
variable "jumphost_username" {
|
||||
type = string
|
||||
description = "VM username"
|
||||
}
|
||||
|
||||
variable "jumphost_password" {
|
||||
type = string
|
||||
description = "VM password"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "Subnet ID for the virtual machine"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
# Virtual network
|
||||
|
||||
resource "azurerm_virtual_network" "vnet_default" {
|
||||
name = "vnet-${var.prefix}-${var.postfix}${var.environment}"
|
||||
resource_group_name = module.resource_group.name
|
||||
location = module.resource_group.location
|
||||
address_space = ["10.0.0.0/16"]
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
|
||||
# Subnets
|
||||
|
||||
resource "azurerm_subnet" "snet_default" {
|
||||
name = "snet-${var.prefix}-${var.postfix}${var.environment}-default"
|
||||
resource_group_name = module.resource_group.name
|
||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
||||
address_prefixes = ["10.0.1.0/24"]
|
||||
enforce_private_link_endpoint_network_policies = true
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_subnet" "snet_bastion" {
|
||||
name = "AzureBastionSubnet"
|
||||
resource_group_name = module.resource_group.name
|
||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
||||
address_prefixes = ["10.0.10.0/27"]
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_subnet" "snet_training" {
|
||||
name = "snet-${var.prefix}-${var.postfix}${var.environment}-training"
|
||||
resource_group_name = module.resource_group.name
|
||||
virtual_network_name = azurerm_virtual_network.vnet_default[0].name
|
||||
address_prefixes = ["10.0.2.0/24"]
|
||||
enforce_private_link_endpoint_network_policies = true
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# Network security groups
|
||||
|
||||
resource "azurerm_network_security_group" "nsg_training" {
|
||||
name = "nsg-${var.prefix}-${var.postfix}${var.environment}-training"
|
||||
location = module.resource_group.location
|
||||
resource_group_name = module.resource_group.name
|
||||
|
||||
security_rule {
|
||||
name = "BatchNodeManagement"
|
||||
priority = 100
|
||||
direction = "Inbound"
|
||||
access = "Allow"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "29876-29877"
|
||||
source_address_prefix = "BatchNodeManagement"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
security_rule {
|
||||
name = "AzureMachineLearning"
|
||||
priority = 110
|
||||
direction = "Inbound"
|
||||
access = "Allow"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "44224"
|
||||
source_address_prefix = "AzureMachineLearning"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_network_security_group_association" "nsg-training-link" {
|
||||
subnet_id = azurerm_subnet.snet_training[0].id
|
||||
network_security_group_id = azurerm_network_security_group.nsg_training[0].id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
# User Defined Routes
|
||||
|
||||
resource "azurerm_route_table" "rt_training" {
|
||||
name = "rt-${var.prefix}-${var.postfix}${var.environment}-training"
|
||||
location = module.resource_group.location
|
||||
resource_group_name = module.resource_group.name
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_route" "route_training_internet" {
|
||||
name = "Internet"
|
||||
resource_group_name = module.resource_group.name
|
||||
route_table_name = azurerm_route_table.rt_training[0].name
|
||||
address_prefix = "0.0.0.0/0"
|
||||
next_hop_type = "Internet"
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_route" "route_training_aml" {
|
||||
name = "AzureMLRoute"
|
||||
resource_group_name = module.resource_group.name
|
||||
route_table_name = azurerm_route_table.rt_training[0].name
|
||||
address_prefix = "AzureMachineLearning"
|
||||
next_hop_type = "Internet"
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_route" "route_training_batch" {
|
||||
name = "BatchRoute"
|
||||
resource_group_name = module.resource_group.name
|
||||
route_table_name = azurerm_route_table.rt_training[0].name
|
||||
address_prefix = "BatchNodeManagement"
|
||||
next_hop_type = "Internet"
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_route_table_association" "rt_training_link" {
|
||||
subnet_id = azurerm_subnet.snet_training[0].id
|
||||
route_table_id = azurerm_route_table.rt_training[0].id
|
||||
|
||||
count = var.enable_aml_secure_workspace ? 1 : 0
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
|
||||
parameters:
|
||||
- name: jumphost_username
|
||||
type: string
|
||||
default: "azureuser"
|
||||
- name: jumphost_password
|
||||
type: string
|
||||
default: "ThisIsNotVerySecure!"
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: $(ap_vm_image)
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates
|
||||
name: Azure/mlops-templates
|
||||
endpoint: github-connection
|
||||
type: github
|
||||
ref: main #branch name
|
||||
|
||||
stages :
|
||||
- stage: CreateStorageAccountForTerraformState
|
||||
displayName: Create Storage for Terraform
|
||||
jobs:
|
||||
- job: CreateStorageForTerraform
|
||||
displayName: Create Storage for Terraform
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/infra/create-resource-group.yml@mlops-templates
|
||||
- template: templates/infra/create-storage-account.yml@mlops-templates
|
||||
- template: templates/infra/create-storage-container.yml@mlops-templates
|
||||
- stage: DeployAzureMachineLearningRG
|
||||
displayName: Deploy AML Workspace
|
||||
jobs:
|
||||
- job: DeployAMLWorkspace
|
||||
displayName: Deploy Terraform
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/infra/install-terraform.yml@mlops-templates
|
||||
- template: templates/infra/run-terraform-init.yml@mlops-templates
|
||||
- template: templates/infra/run-terraform-validate.yml@mlops-templates
|
||||
- template: templates/infra/run-terraform-plan.yml@mlops-templates
|
||||
parameters:
|
||||
jumphost_username: ${{parameters.jumphost_username}}
|
||||
jumphost_password: ${{parameters.jumphost_password}}
|
||||
- template: templates/infra/run-terraform-apply.yml@mlops-templates
|
||||
parameters:
|
||||
jumphost_username: ${{parameters.jumphost_username}}
|
||||
jumphost_password: ${{parameters.jumphost_password}}
|
|
@ -0,0 +1,47 @@
|
|||
variable "location" {
|
||||
type = string
|
||||
description = "Location of the resource group and modules"
|
||||
}
|
||||
|
||||
variable "prefix" {
|
||||
type = string
|
||||
description = "Prefix for module names"
|
||||
}
|
||||
|
||||
variable "environment" {
|
||||
type = string
|
||||
description = "Environment information"
|
||||
}
|
||||
|
||||
variable "postfix" {
|
||||
type = string
|
||||
description = "Postfix for module names"
|
||||
}
|
||||
|
||||
variable "enable_aml_computecluster" {
|
||||
description = "Variable to enable or disable AML compute cluster"
|
||||
}
|
||||
|
||||
variable "enable_aml_secure_workspace" {
|
||||
description = "Variable to enable or disable AML secure workspace"
|
||||
}
|
||||
|
||||
variable "jumphost_username" {
|
||||
type = string
|
||||
description = "VM username"
|
||||
default = "azureuser"
|
||||
}
|
||||
|
||||
variable "jumphost_password" {
|
||||
type = string
|
||||
description = "VM password"
|
||||
default = "ThisIsNotVerySecure!"
|
||||
}
|
||||
|
||||
variable "enable_monitoring" {
|
||||
description = "Variable to enable or disable Monitoring"
|
||||
}
|
||||
|
||||
variable "client_secret" {
|
||||
description = "Service Principal Secret"
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
|
||||
name: batch-dp
|
||||
endpoint_name: taxi-fare-batch
|
||||
model: azureml:taxi-model@latest
|
||||
compute: azureml:batch-cluster
|
||||
resources:
|
||||
instance_count: 1
|
||||
max_concurrency_per_instance: 2
|
||||
mini_batch_size: 10
|
||||
output_action: append_row
|
||||
output_file_name: predictions.csv
|
||||
retry_settings:
|
||||
max_retries: 3
|
||||
timeout: 30
|
||||
error_threshold: -1
|
||||
logging_level: info
|
|
@ -0,0 +1,4 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json
|
||||
name: taxi-fare-batch
|
||||
description: taxi cost batch endpoint
|
||||
auth_mode: aad_token
|
|
@ -0,0 +1,6 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
|
||||
name: blue
|
||||
endpoint_name: taxi-fare-online
|
||||
model: azureml:taxi-model@latest
|
||||
instance_type: Standard_DS2_v2
|
||||
instance_count: 1
|
|
@ -0,0 +1,4 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
|
||||
name: taxi-fare-online
|
||||
description: taxi cost online endpoint
|
||||
auth_mode: key
|
|
@ -0,0 +1,166 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
experiment_name: taxi-fare-training
|
||||
description: Training Pipeline to train a model that predicts taxi fare price
|
||||
|
||||
# <inputs_and_outputs>
|
||||
inputs:
|
||||
input: #using local data, will create an anonymous data asset
|
||||
type: uri_folder
|
||||
path: ../../../data/
|
||||
enable_monitoring: "true"
|
||||
table_name: 'taximonitoring'
|
||||
|
||||
outputs:
|
||||
train_data:
|
||||
val_data:
|
||||
test_data:
|
||||
trained_model:
|
||||
evaluation_output:
|
||||
model_info_output_path:
|
||||
# </inputs_and_outputs>
|
||||
|
||||
# <jobs>
|
||||
settings:
|
||||
default_datastore: azureml:workspaceblobstore
|
||||
default_compute: azureml:cpu-cluster
|
||||
continue_on_step_failure: false
|
||||
|
||||
jobs:
|
||||
prep_data:
|
||||
name: prep_data
|
||||
display_name: prep-data
|
||||
code: ../../../data-science/src/prep
|
||||
command: >-
|
||||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}
|
||||
--val_data ${{outputs.val_data}}
|
||||
--test_data ${{outputs.test_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
--table_name ${{inputs.table_name}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
raw_data: ${{parent.inputs.input}}
|
||||
enable_monitoring: ${{parent.inputs.enable_monitoring}}
|
||||
table_name: ${{parent.inputs.table_name}}
|
||||
outputs:
|
||||
train_data: ${{parent.outputs.train_data}}
|
||||
val_data: ${{parent.outputs.val_data}}
|
||||
test_data: ${{parent.outputs.test_data}}
|
||||
|
||||
train_model:
|
||||
name: train_model
|
||||
display_name: train-model
|
||||
code: ../../../data-science/src/train
|
||||
command: >-
|
||||
python train.py
|
||||
--train_data ${{inputs.train_data}}
|
||||
--model_output ${{outputs.model_output}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
outputs:
|
||||
model_output: ${{parent.outputs.trained_model}}
|
||||
|
||||
evaluate_model:
|
||||
name: evaluate_model
|
||||
display_name: evaluate-model
|
||||
code: ../../../data-science/src/evaluate
|
||||
command: >-
|
||||
python evaluate.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_input ${{inputs.model_input}}
|
||||
--test_data ${{inputs.test_data}}
|
||||
--evaluation_output ${{outputs.evaluation_output}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_input: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
test_data: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
outputs:
|
||||
evaluation_output: ${{parent.outputs.evaluation_output}}
|
||||
|
||||
register_model:
|
||||
name: register_model
|
||||
display_name: register-model
|
||||
code: ../../../data-science/src/register
|
||||
command: >-
|
||||
python register.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_path ${{inputs.model_path}}
|
||||
--evaluation_output ${{inputs.evaluation_output}}
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_path: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
|
||||
outputs:
|
||||
model_info_output_path: ${{parent.outputs.model_info_output_path}}
|
||||
|
||||
create_rai_job:
|
||||
type: command
|
||||
component: azureml:rai_insights_constructor@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
title: Responsible AI for Taxi Fare Prediction
|
||||
task_type: regression
|
||||
model_info_path: ${{parent.jobs.register_model.outputs.model_info_output_path}}
|
||||
train_dataset: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
test_dataset: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
target_column_name: "cost"
|
||||
categorical_column_names: '["store_forward", "vendor"]'
|
||||
|
||||
explain_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_explanation@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
comment: Some random string
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
|
||||
causal_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_causal@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
treatment_features: '["distance", "passengers"]'
|
||||
heterogeneity_features: '["store_forward", "vendor"]'
|
||||
|
||||
counterfactual_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_counterfactual@latest
|
||||
limits:
|
||||
timeout: 600
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
total_CFs: 10
|
||||
desired_range: '[16, 30]'
|
||||
feature_importance: True
|
||||
|
||||
error_analysis_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_erroranalysis@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
filter_features: '["distance", "passengers"]'
|
||||
|
||||
gather_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_gather@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
constructor: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
insight_1: ${{parent.jobs.causal_01.outputs.causal}}
|
||||
insight_2: ${{parent.jobs.counterfactual_01.outputs.counterfactual}}
|
||||
insight_3: ${{parent.jobs.error_analysis_01.outputs.error_analysis}}
|
||||
insight_4: ${{parent.jobs.explain_01.outputs.explanation}}
|
||||
# </jobs>
|
|
@ -0,0 +1,5 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
||||
name: taxi-train-env
|
||||
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
|
||||
conda_file: ../../../data-science/environment/train-conda.yml
|
||||
description: Environment created from a Docker image plus Conda environment to train taxi model.
|
|
@ -0,0 +1,66 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-batch-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: batch
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
|
||||
stages:
|
||||
- stage: CreateBatchEndpoint
|
||||
displayName: Create/Update Batch Endpoint
|
||||
jobs:
|
||||
- job: DeployBatchEndpoint
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: batch-cluster # name must match cluster name in deployment file below
|
||||
size: STANDARD_DS3_V2
|
||||
min_instances: 0
|
||||
max_instances: 5
|
||||
cluster_tier: dedicated
|
||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
||||
parameters:
|
||||
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
|
||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-batch-dp
|
||||
deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
|
||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-batch-dp
|
||||
sample_request: data/taxi-batch.csv
|
||||
request_type: uri_file #either uri_folder or uri_file
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
- repository: rai-vnext-preview # Template Repo
|
||||
name: Azure/rai-vnext-preview # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
stages:
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/tests/unit-tests.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
||||
parameters:
|
||||
build_type: conda
|
||||
environment_name: taxi-train-env
|
||||
environment_file: mlops/azureml/train/train-env.yml
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- checkout: rai-vnext-preview
|
||||
path: s/
|
||||
- template: register-rai-components.yml
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
|
||||
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
|
||||
enable_monitoring: $(enable_monitoring)
|
|
@ -0,0 +1,61 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-online-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: online
|
||||
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
stages:
|
||||
- stage: CreateOnlineEndpoint
|
||||
displayName: Create/Update Online Endpoint
|
||||
jobs:
|
||||
- job: DeployOnlineEndpoint
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
||||
parameters:
|
||||
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
|
||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-online-dp
|
||||
deployment_file: mlops/azureml/deploy/online/online-deployment.yml
|
||||
- template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
|
||||
parameters:
|
||||
traffic_allocation: taxi-online-dp=100
|
||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-online-dp
|
||||
sample_request: data/taxi-request.json
|
||||
request_type: json
|
|
@ -0,0 +1,13 @@
|
|||
steps:
|
||||
- task: AzureCLI@2
|
||||
displayName: Register RAI components
|
||||
continueOnError: true
|
||||
inputs:
|
||||
azureSubscription: $(ado_service_connection_rg) #needs to have access at the RG level
|
||||
scriptType: bash
|
||||
workingDirectory: $(System.DefaultWorkingDirectory)
|
||||
scriptLocation: inlineScript
|
||||
inlineScript: |
|
||||
subscription_id=$(az account list --query "[?isDefault].id | [0]" --output tsv)
|
||||
chmod u+x quick-setup.bash
|
||||
bash quick-setup.bash conda-env $subscription_id $(resource_group) $(aml_workspace)
|
|
@ -0,0 +1,4 @@
|
|||
black==22.3.0
|
||||
flake8==4.0.1
|
||||
isort==5.10.1
|
||||
pre-commit==2.19.0
|
Загрузка…
Ссылка в новой задаче