Merge branch 'main-dec31' into main-dec31
This commit is contained in:
Коммит
703be110a6
|
@ -22,29 +22,14 @@ from mlflow.tracking import MlflowClient
|
|||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
"store_forward", "vendor"
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
|
@ -58,7 +43,6 @@ def parse_args():
|
|||
parser.add_argument("--model_input", type=str, help="Path of input model")
|
||||
parser.add_argument("--test_data", type=str, help="Path to test dataset")
|
||||
parser.add_argument("--evaluation_output", type=str, help="Path of eval results")
|
||||
parser.add_argument("--runner", type=str, help="Local or Cloud Runner", default="CloudRunner")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -81,8 +65,7 @@ def main(args):
|
|||
yhat_test, score = model_evaluation(X_test, y_test, model, args.evaluation_output)
|
||||
|
||||
# ----------------- Model Promotion ---------------- #
|
||||
if args.runner == "CloudRunner":
|
||||
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
|
||||
predictions, deploy_flag = model_promotion(args.model_name, args.evaluation_output, X_test, y_test, yhat_test, score)
|
||||
|
||||
|
||||
|
|
@ -16,29 +16,14 @@ import mlflow
|
|||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
"store_forward", "vendor"
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
|
@ -70,11 +55,8 @@ def main(args):
|
|||
|
||||
# ------------ Reading Data ------------ #
|
||||
# -------------------------------------- #
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.raw_data)
|
||||
print(arr)
|
||||
|
||||
data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
|
||||
data = pd.read_csv((Path(args.raw_data)))
|
||||
data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
|
||||
|
||||
# ------------- Split Data ------------- #
|
|
@ -21,29 +21,14 @@ import mlflow.sklearn
|
|||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
"distance", "dropoff_latitude", "dropoff_longitude", "passengers", "pickup_latitude",
|
||||
"pickup_longitude", "pickup_weekday", "pickup_month", "pickup_monthday", "pickup_hour",
|
||||
"pickup_minute", "pickup_second", "dropoff_weekday", "dropoff_month", "dropoff_monthday",
|
||||
"dropoff_hour", "dropoff_minute", "dropoff_second"
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
"store_forward", "vendor"
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
|
@ -1,24 +0,0 @@
|
|||
# <component>
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||
name: evaluate_model
|
||||
display_name: evaluate-model
|
||||
type: command
|
||||
inputs:
|
||||
model_name:
|
||||
type: string
|
||||
model_input:
|
||||
type: uri_folder
|
||||
test_data:
|
||||
type: uri_folder
|
||||
outputs:
|
||||
evaluation_output:
|
||||
type: uri_folder
|
||||
code: ../../../../data-science/src
|
||||
environment: azureml:taxi-train-env@latest
|
||||
command: >-
|
||||
python evaluate.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_input ${{inputs.model_input}}
|
||||
--test_data ${{inputs.test_data}}
|
||||
--evaluation_output ${{outputs.evaluation_output}}
|
||||
# </component>
|
|
@ -1,30 +0,0 @@
|
|||
# <component>
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||
name: prep_data
|
||||
display_name: prep-data
|
||||
type: command
|
||||
inputs:
|
||||
raw_data:
|
||||
type: uri_file
|
||||
enable_monitoring:
|
||||
type: string
|
||||
table_name:
|
||||
type: string
|
||||
outputs:
|
||||
train_data:
|
||||
type: uri_folder
|
||||
val_data:
|
||||
type: uri_folder
|
||||
test_data:
|
||||
type: uri_folder
|
||||
code: ../../../../data-science/src
|
||||
environment: azureml:taxi-train-env@latest
|
||||
command: >-
|
||||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}
|
||||
--val_data ${{outputs.val_data}}
|
||||
--test_data ${{outputs.test_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
--table_name ${{inputs.table_name}}
|
||||
# </component>
|
|
@ -1,24 +0,0 @@
|
|||
# <component>
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||
name: register_model
|
||||
display_name: register-model
|
||||
type: command
|
||||
inputs:
|
||||
model_name:
|
||||
type: string
|
||||
model_path:
|
||||
type: uri_folder
|
||||
evaluation_output:
|
||||
type: uri_folder
|
||||
outputs:
|
||||
model_info_output_path:
|
||||
type: uri_folder
|
||||
code: ../../../../data-science/src
|
||||
environment: azureml:taxi-train-env@latest
|
||||
command: >-
|
||||
python register.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_path ${{inputs.model_path}}
|
||||
--evaluation_output ${{inputs.evaluation_output}}
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||
# </component>
|
|
@ -1,18 +0,0 @@
|
|||
# <component>
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
|
||||
name: train_model
|
||||
display_name: train-model
|
||||
type: command
|
||||
inputs:
|
||||
train_data:
|
||||
type: uri_folder
|
||||
outputs:
|
||||
model_output:
|
||||
type: uri_folder
|
||||
code: ../../../../data-science/src
|
||||
environment: azureml:taxi-train-env@latest
|
||||
command: >-
|
||||
python train.py
|
||||
--train_data ${{inputs.train_data}}
|
||||
--model_output ${{outputs.model_output}}
|
||||
# </component>
|
|
@ -14,7 +14,7 @@ from azure.ai.ml.entities import Data
|
|||
from azure.ai.ml.constants import AssetTypes
|
||||
from azure.ai.ml.entities import Environment
|
||||
from azure.ai.ml.dsl import pipeline
|
||||
from azure.ai.ml import Input, Output, load_component
|
||||
from azure.ai.ml import Input, Output, command
|
||||
from azure.ai.ml.constants import AssetTypes, InputOutputModes
|
||||
|
||||
import json
|
||||
|
@ -24,10 +24,12 @@ import os
|
|||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser("Deploy Training Pipeline")
|
||||
parser.add_argument("-c", type=str, help="Compute Cluster Name")
|
||||
parser.add_argument("-m", type=str, help="Enable Monitoring", default="false")
|
||||
parser.add_argument("-d", type=str, help="Data Asset Name")
|
||||
parser.add_argument("-n", type=str, help="Experiment Name")
|
||||
parser.add_argument("--experiment_name", type=str, help="Experiment Name")
|
||||
parser.add_argument("--compute_name", type=str, help="Compute Cluster Name")
|
||||
parser.add_argument("--data_name", type=str, help="Data Asset Name")
|
||||
parser.add_argument("--environment_name", type=str, help="Registered Environment Name")
|
||||
parser.add_argument("--enable_monitoring", type=str, help="Enable Monitoring", default="false")
|
||||
parser.add_argument("--table_name", type=str, help="ADX Monitoring Table Name", default="taximonitoring")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -47,8 +49,7 @@ def main():
|
|||
print(ex)
|
||||
|
||||
try:
|
||||
compute_target = args.c
|
||||
print(ml_client.compute.get(compute_target))
|
||||
print(ml_client.compute.get(args.compute_name))
|
||||
except:
|
||||
print("No compute found")
|
||||
|
||||
|
@ -58,34 +59,104 @@ def main():
|
|||
print('current', os.listdir())
|
||||
|
||||
# Create pipeline job
|
||||
parent_dir = "mlops/azureml/train/components"
|
||||
|
||||
# 1. Define components
|
||||
parent_dir = "data-science/src"
|
||||
|
||||
prep_data = command(
|
||||
name="prep_data",
|
||||
display_name="prep-data",
|
||||
code=os.path.join(parent_dir, "prep"),
|
||||
command="python prep.py \
|
||||
--raw_data ${{inputs.raw_data}} \
|
||||
--train_data ${{outputs.train_data}} \
|
||||
--val_data ${{outputs.val_data}} \
|
||||
--test_data ${{outputs.test_data}} \
|
||||
--enable_monitoring ${{inputs.enable_monitoring}} \
|
||||
--table_name ${{inputs.table_name}}",
|
||||
environment=args.environment_name+"@latest",
|
||||
inputs={
|
||||
"raw_data": Input(type="uri_file"),
|
||||
"enable_monitoring": Input(type="string"),
|
||||
"table_name": Input(type="string")
|
||||
},
|
||||
outputs={
|
||||
"train_data": Output(type="uri_folder"),
|
||||
"val_data": Output(type="uri_folder"),
|
||||
"test_data": Output(type="uri_folder"),
|
||||
}
|
||||
)
|
||||
|
||||
train_model = command(
|
||||
name="train_model",
|
||||
display_name="train-model",
|
||||
code=os.path.join(parent_dir, "train"),
|
||||
command="python train.py \
|
||||
--train_data ${{inputs.train_data}} \
|
||||
--model_output ${{outputs.model_output}}",
|
||||
environment=args.environment_name+"@latest",
|
||||
inputs={"train_data": Input(type="uri_folder")},
|
||||
outputs={"model_output": Output(type="uri_folder")}
|
||||
)
|
||||
|
||||
# 1. Load components
|
||||
prepare_data = load_component(source=os.path.join(parent_dir , "prep.yml"))
|
||||
train_model = load_component(source=os.path.join(parent_dir, "train.yml"))
|
||||
evaluate_model = load_component(source=os.path.join(parent_dir, "evaluate.yml"))
|
||||
register_model = load_component(source=os.path.join(parent_dir, "register.yml"))
|
||||
evaluate_model = command(
|
||||
name="evaluate_model",
|
||||
display_name="evaluate-model",
|
||||
code=os.path.join(parent_dir, "evaluate"),
|
||||
command="python evaluate.py \
|
||||
--model_name ${{inputs.model_name}} \
|
||||
--model_input ${{inputs.model_input}} \
|
||||
--test_data ${{inputs.test_data}} \
|
||||
--evaluation_output ${{outputs.evaluation_output}}",
|
||||
environment=args.environment_name+"@latest",
|
||||
inputs={
|
||||
"model_name": Input(type="string"),
|
||||
"model_input": Input(type="uri_folder"),
|
||||
"test_data": Input(type="uri_folder")
|
||||
},
|
||||
outputs={
|
||||
"evaluation_output": Output(type="uri_folder")
|
||||
}
|
||||
)
|
||||
|
||||
register_model = command(
|
||||
name="register_model",
|
||||
display_name="register-model",
|
||||
code=os.path.join(parent_dir, "register"),
|
||||
command="python register.py \
|
||||
--model_name ${{inputs.model_name}} \
|
||||
--model_path ${{inputs.model_path}} \
|
||||
--evaluation_output ${{inputs.evaluation_output}} \
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}",
|
||||
environment=args.environment_name+"@latest",
|
||||
inputs={
|
||||
"model_name": Input(type="string"),
|
||||
"model_path": Input(type="uri_folder"),
|
||||
"evaluation_output": Input(type="uri_folder")
|
||||
},
|
||||
outputs={
|
||||
"model_info_output_path": Output(type="uri_folder")
|
||||
}
|
||||
)
|
||||
|
||||
# 2. Construct pipeline
|
||||
@pipeline()
|
||||
def taxi_training_pipeline(raw_data, enable_monitoring, table_name):
|
||||
|
||||
prepare = prepare_data(
|
||||
prep = prep_data(
|
||||
raw_data=raw_data,
|
||||
enable_monitoring=enable_monitoring,
|
||||
table_name=table_name
|
||||
)
|
||||
|
||||
train = train_model(
|
||||
train_data=prepare.outputs.train_data
|
||||
train_data=prep.outputs.train_data
|
||||
)
|
||||
|
||||
evaluate = evaluate_model(
|
||||
model_name="taxi-model",
|
||||
model_input=train.outputs.model_output,
|
||||
test_data=prepare.outputs.test_data
|
||||
test_data=prep.outputs.test_data
|
||||
)
|
||||
|
||||
|
||||
|
@ -96,24 +167,24 @@ def main():
|
|||
)
|
||||
|
||||
return {
|
||||
"pipeline_job_train_data": prepare.outputs.train_data,
|
||||
"pipeline_job_test_data": prepare.outputs.test_data,
|
||||
"pipeline_job_train_data": prep.outputs.train_data,
|
||||
"pipeline_job_test_data": prep.outputs.test_data,
|
||||
"pipeline_job_trained_model": train.outputs.model_output,
|
||||
"pipeline_job_score_report": evaluate.outputs.evaluation_output,
|
||||
}
|
||||
|
||||
|
||||
pipeline_job = taxi_training_pipeline(
|
||||
Input(path=args.d + "@latest"), args.m, "taximonitoring"
|
||||
Input(path=args.data_name + "@latest", type="uri_file"), args.enable_monitoring, args.table_name
|
||||
)
|
||||
|
||||
# set pipeline level compute
|
||||
pipeline_job.settings.default_compute = args.c
|
||||
pipeline_job.settings.default_compute = args.compute_name
|
||||
# set pipeline level datastore
|
||||
pipeline_job.settings.default_datastore = "workspaceblobstore"
|
||||
|
||||
pipeline_job = ml_client.jobs.create_or_update(
|
||||
pipeline_job, experiment_name=args.n
|
||||
pipeline_job, experiment_name=args.experiment_name
|
||||
)
|
||||
|
||||
pipeline_job
|
||||
|
|
|
@ -26,43 +26,45 @@ resources:
|
|||
ref: main-dec31
|
||||
|
||||
stages:
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: cpu-cluster
|
||||
size: Standard_DS3_v2
|
||||
min_instances: 0
|
||||
max_instances: 4
|
||||
cluster_tier: low_priority
|
||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
||||
parameters:
|
||||
environment_name: taxi-train-env
|
||||
environment_description: "Training Environment for Taxi Pipeline"
|
||||
environment_path: data-science/environment/train-conda.yml
|
||||
build_type: conda
|
||||
- template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates
|
||||
parameters:
|
||||
data_name: taxi-data
|
||||
data_description: taxi-training-dataset
|
||||
data_path: data/taxi-data.csv
|
||||
data_type: uri_file
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_path: mlops/azureml/train/run_pipeline.py
|
||||
data_name: taxi-data
|
||||
cluster_name: cpu-cluster
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
experiment_name: taxi-train-pipeline
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/python-sdk-v2/install-requirements.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: cpu-cluster
|
||||
size: Standard_DS3_v2
|
||||
min_instances: 0
|
||||
max_instances: 4
|
||||
cluster_tier: low_priority
|
||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
||||
parameters:
|
||||
environment_name: taxi-train-env
|
||||
environment_description: "Training Environment for Taxi Pipeline"
|
||||
environment_path: data-science/environment/train-conda.yml
|
||||
build_type: conda
|
||||
- template: templates/${{ variables.version }}/register-data-asset.yml@mlops-templates
|
||||
parameters:
|
||||
data_name: taxi-data
|
||||
data_description: taxi-training-dataset
|
||||
data_path: data/taxi-data.csv
|
||||
data_type: uri_file
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_path: mlops/azureml/train/run_pipeline.py
|
||||
experiment_name: taxi-train-pipeline
|
||||
data_name: taxi-data
|
||||
environment_name: taxi-train-env
|
||||
compute_name: cpu-cluster
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
table_name: 'taximonitoring'
|
||||
|
|
Загрузка…
Ссылка в новой задаче