diff --git a/classical/python-sdk-v2/data-science/src/evaluate.py b/classical/python-sdk-v2/data-science/src/evaluate/evaluate.py
similarity index 88%
rename from classical/python-sdk-v2/data-science/src/evaluate.py
rename to classical/python-sdk-v2/data-science/src/evaluate/evaluate.py
index 375421f..3f9486e 100644
--- a/classical/python-sdk-v2/data-science/src/evaluate.py
+++ b/classical/python-sdk-v2/data-science/src/evaluate/evaluate.py
@@ -22,29 +22,14 @@ from mlflow.tracking import MlflowClient
TARGET_COL = "cost"
NUMERIC_COLS = [
- "distance",
- "dropoff_latitude",
- "dropoff_longitude",
- "passengers",
- "pickup_latitude",
- "pickup_longitude",
- "pickup_weekday",
- "pickup_month",
- "pickup_monthday",
- "pickup_hour",
- "pickup_minute",
- "pickup_second",
- "dropoff_weekday",
- "dropoff_month",
- "dropoff_monthday",
- "dropoff_hour",
- "dropoff_minute",
- "dropoff_second",
+ "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+ "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+ "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+ "dropoff_hour", "dropoff_minute", "dropoff_second"
]
CAT_NOM_COLS = [
- "store_forward",
- "vendor",
+ "store_forward", "vendor"
]
CAT_ORD_COLS = [
@@ -58,7 +43,6 @@ def parse_args():
parser.add_argument("--model_input", type=str, help="Path of input model")
parser.add_argument("--test_data", type=str, help="Path to test dataset")
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
- parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
args = parser.parse_args()
@@ -81,8 +65,7 @@ def main(args):
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
# ----------------- Model Promotion ---------------- #
- if args.runner == "CloudRunner":
- predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
+ predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
diff --git a/classical/python-sdk-v2/data-science/src/prep.py b/classical/python-sdk-v2/data-science/src/prep/prep.py
similarity index 83%
rename from classical/python-sdk-v2/data-science/src/prep.py
rename to classical/python-sdk-v2/data-science/src/prep/prep.py
index 1a97bc2..70b8ebd 100644
--- a/classical/python-sdk-v2/data-science/src/prep.py
+++ b/classical/python-sdk-v2/data-science/src/prep/prep.py
@@ -16,29 +16,14 @@ import mlflow
TARGET_COL = "cost"
NUMERIC_COLS = [
- "distance",
- "dropoff_latitude",
- "dropoff_longitude",
- "passengers",
- "pickup_latitude",
- "pickup_longitude",
- "pickup_weekday",
- "pickup_month",
- "pickup_monthday",
- "pickup_hour",
- "pickup_minute",
- "pickup_second",
- "dropoff_weekday",
- "dropoff_month",
- "dropoff_monthday",
- "dropoff_hour",
- "dropoff_minute",
- "dropoff_second",
+ "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+ "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+ "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+ "dropoff_hour", "dropoff_minute", "dropoff_second"
]
CAT_NOM_COLS = [
- "store_forward",
- "vendor",
+ "store_forward", "vendor"
]
CAT_ORD_COLS = [
@@ -70,11 +55,8 @@ def main(args):
# ------------ Reading Data ------------ #
# -------------------------------------- #
- print("mounted_path files: ")
- arr = os.listdir(args.raw_data)
- print(arr)
- data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
+ data = pd.read_csv((Path(args.raw_data)))
data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
# ------------- Split Data ------------- #
diff --git a/classical/python-sdk-v2/data-science/src/register.py b/classical/python-sdk-v2/data-science/src/register/register.py
similarity index 100%
rename from classical/python-sdk-v2/data-science/src/register.py
rename to classical/python-sdk-v2/data-science/src/register/register.py
diff --git a/classical/python-sdk-v2/data-science/src/train.py b/classical/python-sdk-v2/data-science/src/train/train.py
similarity index 91%
rename from classical/python-sdk-v2/data-science/src/train.py
rename to classical/python-sdk-v2/data-science/src/train/train.py
index 1a8f097..14132d2 100644
--- a/classical/python-sdk-v2/data-science/src/train.py
+++ b/classical/python-sdk-v2/data-science/src/train/train.py
@@ -21,29 +21,14 @@ import mlflow.sklearn
TARGET_COL = "cost"
NUMERIC_COLS = [
- "distance",
- "dropoff_latitude",
- "dropoff_longitude",
- "passengers",
- "pickup_latitude",
- "pickup_longitude",
- "pickup_weekday",
- "pickup_month",
- "pickup_monthday",
- "pickup_hour",
- "pickup_minute",
- "pickup_second",
- "dropoff_weekday",
- "dropoff_month",
- "dropoff_monthday",
- "dropoff_hour",
- "dropoff_minute",
- "dropoff_second",
+ "distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
+ "pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
+ "pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
+ "dropoff_hour", "dropoff_minute", "dropoff_second"
]
CAT_NOM_COLS = [
- "store_forward",
- "vendor",
+ "store_forward", "vendor"
]
CAT_ORD_COLS = [
diff --git a/classical/python-sdk-v2/mlops/azureml/train/components/evaluate.yml b/classical/python-sdk-v2/mlops/azureml/train/components/evaluate.yml
deleted file mode 100644
index 643ee5a..0000000
--- a/classical/python-sdk-v2/mlops/azureml/train/components/evaluate.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
-name: evaluate_model
-display_name: evaluate-model
-type: command
-inputs:
- model_name:
- type: string
- model_input:
- type: uri_folder
- test_data:
- type: uri_folder
-outputs:
- evaluation_output:
- type: uri_folder
-code: ../../../../data-science/src
-environment: azureml:taxi-train-env@latest
-command: >-
- python evaluate.py
- --model_name ${{inputs.model_name}}
- --model_input ${{inputs.model_input}}
- --test_data ${{inputs.test_data}}
- --evaluation_output ${{outputs.evaluation_output}}
-#
diff --git a/classical/python-sdk-v2/mlops/azureml/train/components/prep.yml b/classical/python-sdk-v2/mlops/azureml/train/components/prep.yml
deleted file mode 100644
index 81c285b..0000000
--- a/classical/python-sdk-v2/mlops/azureml/train/components/prep.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-#
-$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
-name: prep_data
-display_name: prep-data
-type: command
-inputs:
- raw_data:
- type: uri_file
- enable_monitoring:
- type: string
- table_name:
- type: string
-outputs:
- train_data:
- type: uri_folder
- val_data:
- type: uri_folder
- test_data:
- type: uri_folder
-code: ../../../../data-science/src
-environment: azureml:taxi-train-env@latest
-command: >-
- python prep.py
- --raw_data ${{inputs.raw_data}}
- --train_data ${{outputs.train_data}}
- --val_data ${{outputs.val_data}}
- --test_data ${{outputs.test_data}}
- --enable_monitoring ${{inputs.enable_monitoring}}
- --table_name ${{inputs.table_name}}
-#
diff --git a/classical/python-sdk-v2/mlops/azureml/train/components/register.yml b/classical/python-sdk-v2/mlops/azureml/train/components/register.yml
deleted file mode 100644
index 95a3d46..0000000
--- a/classical/python-sdk-v2/mlops/azureml/train/components/register.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
-name: register_model
-display_name: register-model
-type: command
-inputs:
- model_name:
- type: string
- model_path:
- type: uri_folder
- evaluation_output:
- type: uri_folder
-outputs:
- model_info_output_path:
- type: uri_folder
-code: ../../../../data-science/src
-environment: azureml:taxi-train-env@latest
-command: >-
- python register.py
- --model_name ${{inputs.model_name}}
- --model_path ${{inputs.model_path}}
- --evaluation_output ${{inputs.evaluation_output}}
- --model_info_output_path ${{outputs.model_info_output_path}}
-#
diff --git a/classical/python-sdk-v2/mlops/azureml/train/components/train.yml b/classical/python-sdk-v2/mlops/azureml/train/components/train.yml
deleted file mode 100644
index 8945a6b..0000000
--- a/classical/python-sdk-v2/mlops/azureml/train/components/train.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
-name: train_model
-display_name: train-model
-type: command
-inputs:
- train_data:
- type: uri_folder
-outputs:
- model_output:
- type: uri_folder
-code: ../../../../data-science/src
-environment: azureml:taxi-train-env@latest
-command: >-
- python train.py
- --train_data ${{inputs.train_data}}
- --model_output ${{outputs.model_output}}
-#
diff --git a/classical/python-sdk-v2/mlops/azureml/train/run_pipeline.py b/classical/python-sdk-v2/mlops/azureml/train/run_pipeline.py
index fabe9db..cef684d 100644
--- a/classical/python-sdk-v2/mlops/azureml/train/run_pipeline.py
+++ b/classical/python-sdk-v2/mlops/azureml/train/run_pipeline.py
@@ -14,7 +14,7 @@ from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import Environment
from azure.ai.ml.dsl import pipeline
-from azure.ai.ml import Input, Output, load_component
+from azure.ai.ml import Input, Output, command
from azure.ai.ml.constants import AssetTypes, InputOutputModes
import json
@@ -24,10 +24,12 @@ import os
def parse_args():
parser = argparse.ArgumentParser("Deploy Training Pipeline")
- parser.add_argument("-c", type=str, help="Compute Cluster Name")
- parser.add_argument("-m", type=str, help="Enable Monitoring", default="false")
- parser.add_argument("-d", type=str, help="Data Asset Name")
- parser.add_argument("-n", type=str, help="Experiment Name")
+ parser.add_argument("--experiment_name", type=str, help="Experiment Name")
+ parser.add_argument("--compute_name", type=str, help="Compute Cluster Name")
+ parser.add_argument("--data_name", type=str, help="Data Asset Name")
+ parser.add_argument("--environment_name", type=str, help="Registered Environment Name")
+ parser.add_argument("--enable_monitoring", type=str, help="Enable Monitoring", default="false")
+ parser.add_argument("--table_name", type=str, help="ADX Monitoring Table Name", default="taximonitoring")
args = parser.parse_args()
@@ -47,8 +49,7 @@ def main():
print(ex)
try:
- compute_target = args.c
- print(ml_client.compute.get(compute_target))
+ print(ml_client.compute.get(args.compute_name))
except:
print("No compute found")
@@ -58,34 +59,104 @@ def main():
print('current', os.listdir())
# Create pipeline job
- parent_dir = "mlops/azureml/train/components"
+ # 1. Define components
+ parent_dir = "data-science/src"
+
+ prep_data = command(
+ name="prep_data",
+ display_name="prep-data",
+ code=os.path.join(parent_dir, "prep"),
+ command="python prep.py \
+ --raw_data ${{inputs.raw_data}} \
+ --train_data ${{outputs.train_data}} \
+ --val_data ${{outputs.val_data}} \
+ --test_data ${{outputs.test_data}} \
+ --enable_monitoring ${{inputs.enable_monitoring}} \
+ --table_name ${{inputs.table_name}}",
+ environment=args.environment_name+"@latest",
+ inputs={
+ "raw_data": Input(type="uri_file"),
+ "enable_monitoring": Input(type="string"),
+ "table_name": Input(type="string")
+ },
+ outputs={
+ "train_data": Output(type="uri_folder"),
+ "val_data": Output(type="uri_folder"),
+ "test_data": Output(type="uri_folder"),
+ }
+ )
+ train_model = command(
+ name="train_model",
+ display_name="train-model",
+ code=os.path.join(parent_dir, "train"),
+ command="python train.py \
+ --train_data ${{inputs.train_data}} \
+ --model_output ${{outputs.model_output}}",
+ environment=args.environment_name+"@latest",
+ inputs={"train_data": Input(type="uri_folder")},
+ outputs={"model_output": Output(type="uri_folder")}
+ )
- # 1. Load components
- prepare_data = load_component(source=os.path.join(parent_dir , "prep.yml"))
- train_model = load_component(source=os.path.join(parent_dir, "train.yml"))
- evaluate_model = load_component(source=os.path.join(parent_dir, "evaluate.yml"))
- register_model = load_component(source=os.path.join(parent_dir, "register.yml"))
+ evaluate_model = command(
+ name="evaluate_model",
+ display_name="evaluate-model",
+ code=os.path.join(parent_dir, "evaluate"),
+ command="python evaluate.py \
+ --model_name ${{inputs.model_name}} \
+ --model_input ${{inputs.model_input}} \
+ --test_data ${{inputs.test_data}} \
+ --evaluation_output ${{outputs.evaluation_output}}",
+ environment=args.environment_name+"@latest",
+ inputs={
+ "model_name": Input(type="string"),
+ "model_input": Input(type="uri_folder"),
+ "test_data": Input(type="uri_folder")
+ },
+ outputs={
+ "evaluation_output": Output(type="uri_folder")
+ }
+ )
+
+ register_model = command(
+ name="register_model",
+ display_name="register-model",
+ code=os.path.join(parent_dir, "register"),
+ command="python register.py \
+ --model_name ${{inputs.model_name}} \
+ --model_path ${{inputs.model_path}} \
+ --evaluation_output ${{inputs.evaluation_output}} \
+ --model_info_output_path ${{outputs.model_info_output_path}}",
+ environment=args.environment_name+"@latest",
+ inputs={
+ "model_name": Input(type="string"),
+ "model_path": Input(type="uri_folder"),
+ "evaluation_output": Input(type="uri_folder")
+ },
+ outputs={
+ "model_info_output_path": Output(type="uri_folder")
+ }
+ )
# 2. Construct pipeline
@pipeline()
def taxi_training_pipeline(raw_data, enable_monitoring, table_name):
- prepare = prepare_data(
+ prep = prep_data(
raw_data=raw_data,
enable_monitoring=enable_monitoring,
table_name=table_name
)
train = train_model(
- train_data=prepare.outputs.train_data
+ train_data=prep.outputs.train_data
)
evaluate = evaluate_model(
model_name="taxi-model",
model_input=train.outputs.model_output,
- test_data=prepare.outputs.test_data
+ test_data=prep.outputs.test_data
)
@@ -96,24 +167,24 @@ def main():
)
return {
- "pipeline_job_train_data": prepare.outputs.train_data,
- "pipeline_job_test_data": prepare.outputs.test_data,
+ "pipeline_job_train_data": prep.outputs.train_data,
+ "pipeline_job_test_data": prep.outputs.test_data,
"pipeline_job_trained_model": train.outputs.model_output,
"pipeline_job_score_report": evaluate.outputs.evaluation_output,
}
pipeline_job = taxi_training_pipeline(
- Input(path=args.d + "@latest"), args.m, "taximonitoring"
+ Input(path=args.data_name + "@latest", type="uri_file"), args.enable_monitoring, args.table_name
)
# set pipeline level compute
- pipeline_job.settings.default_compute = args.c
+ pipeline_job.settings.default_compute = args.compute_name
# set pipeline level datastore
pipeline_job.settings.default_datastore = "workspaceblobstore"
pipeline_job = ml_client.jobs.create_or_update(
- pipeline_job, experiment_name=args.n
+ pipeline_job, experiment_name=args.experiment_name
)
pipeline_job
diff --git a/classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml b/classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
index ee309cf..61e1e44 100644
--- a/classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
+++ b/classical/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
@@ -26,43 +26,45 @@ resources:
ref: main-dec31
stages:
- - stage: DeployTrainingPipeline
- displayName: Deploy Training Pipeline
- jobs:
- - job: DeployTrainingPipeline
- timeoutInMinutes: 120 # how long to run the job before automatically cancelling
- steps:
- - checkout: self
- path: s/
- - checkout: mlops-templates
- path: s/templates/
- - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
- - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
- - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
- - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
- - template: templates/aml-cli-v2/create-compute.yml@mlops-templates
- parameters:
- cluster_name: cpu-cluster
- size: Standard_DS3_v2
- min_instances: 0
- max_instances: 4
- cluster_tier: low_priority
- - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
- parameters:
- environment_name: taxi-train-env
- environment_description: "Training Environment for Taxi Pipeline"
- environment_path: data-science/environment/train-conda.yml
- build_type: conda
- - template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates
- parameters:
- data_name: taxi-data
- data_description: taxi-training-dataset
- data_path: data/taxi-data.csv
- data_type: uri_file
- - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
- parameters:
- pipeline_path: mlops/azureml/train/run_pipeline.py
- data_name: taxi-data
- cluster_name: cpu-cluster
- enable_monitoring: $(enable_monitoring)
- experiment_name: taxi-train-pipeline
+- stage: DeployTrainingPipeline
+ displayName: Deploy Training Pipeline
+ jobs:
+ - job: DeployTrainingPipeline
+ timeoutInMinutes: 120 # how long to run the job before automatically cancelling
+ steps:
+ - checkout: self
+ path: s/
+ - checkout: mlops-templates
+ path: s/templates/
+ - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
+ - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
+ - template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
+ - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
+ - template: templates/aml-cli-v2/create-compute.yml@mlops-templates
+ parameters:
+ cluster_name: cpu-cluster
+ size: Standard_DS3_v2
+ min_instances: 0
+ max_instances: 4
+ cluster_tier: low_priority
+ - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
+ parameters:
+ environment_name: taxi-train-env
+ environment_description: "Training Environment for Taxi Pipeline"
+ environment_path: data-science/environment/train-conda.yml
+ build_type: conda
+ - template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates
+ parameters:
+ data_name: taxi-data
+ data_description: taxi-training-dataset
+ data_path: data/taxi-data.csv
+ data_type: uri_file
+ - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
+ parameters:
+ pipeline_path: mlops/azureml/train/run_pipeline.py
+ experiment_name: taxi-train-pipeline
+ data_name: taxi-data
+ environment_name: taxi-train-env
+ compute_name: cpu-cluster
+ enable_monitoring: $(enable_monitoring)
+ table_name: 'taximonitoring'