adding optional pipeline with automl training

This commit is contained in:
Maggie Mhanna 2022-11-06 09:20:44 +00:00
Родитель a7ebe6b957
Коммит ada53fa478
3 изменённых файлов: 171 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,80 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Registers trained ML model if deploy flag is True.
"""
import argparse
from pathlib import Path
import pickle
import mlflow
import os
import json
def parse_args():
'''Parse input arguments'''
parser = argparse.ArgumentParser()
parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
parser.add_argument('--model_path', type=str, help='Model directory')
parser.add_argument(
"--model_info_output_path", type=str, help="Path to write model info JSON"
)
args, _ = parser.parse_known_args()
print(f'Arguments: {args}')
return args
def main(args):
'''Loads model, registers it if deply flag is True'''
print("Registering ", args.model_name)
# Get Run ID from model path
print("Getting model path")
mlmodel_path = os.path.join(args.model_path, "MLmodel")
runid = ""
with open(mlmodel_path, "r") as modelfile:
for line in modelfile:
if "run_id" in line:
runid = line.split(":")[1].strip()
# Construct Model URI from run ID extract previously
model_uri = "runs:/{}/outputs/".format(runid)
print("Model URI: " + model_uri)
# Register the model with Model URI and Name of choice
print(f"Registering model as {args.model_name}")
mlflow_model = mlflow.register_model(model_uri, args.model_name)
model_version = mlflow_model.version
# write model info
print("Writing JSON")
dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
output_path = os.path.join(args.model_info_output_path, "model_info.json")
with open(output_path, "w") as of:
json.dump(dict, fp=of)
if __name__ == "__main__":
mlflow.start_run()
# ---------- Parse Arguments ----------- #
# -------------------------------------- #
args = parse_args()
lines = [
f"Model name: {args.model_name}",
f"Model path: {args.model_path}",
]
for line in lines:
print(line)
main(args)
mlflow.end_run()

Просмотреть файл

@ -8,6 +8,8 @@ az ml data create --file train/data.yml
az ml environment create --file train/environment.yml
# Create pipeline job
az ml job create --file train/pipeline.yml
# Create pipeline job with automl training job
az ml job create --file train/pipeline_automl.yml
# DEPLOY

Просмотреть файл

@ -0,0 +1,89 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
experiment_name: taxi-training-pipeline
description: Training Pipeline to train a model that predicts taxi fare price
# <inputs_and_outputs>
inputs:
raw_data:
type: uri_file
path: azureml:taxi-data@latest
enable_monitoring: "false"
table_name: 'taximonitoring'
outputs:
train_data:
val_data:
test_data:
trained_model:
evaluation_output:
model_info_output_path:
# </inputs_and_outputs>
# <jobs>
settings:
default_datastore: azureml:workspaceblobstore
default_compute: azureml:cpu-cluster
continue_on_step_failure: false
jobs:
prep_data:
name: prep_data
display_name: prep-data
code: ../../../data-science/src/prep
command: >-
python prep.py
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}
--val_data ${{outputs.val_data}}
--test_data ${{outputs.test_data}}
--enable_monitoring ${{inputs.enable_monitoring}}
--table_name ${{inputs.table_name}}
environment: azureml:taxi-train-env@latest
inputs:
raw_data: ${{parent.inputs.raw_data}}
enable_monitoring: ${{parent.inputs.enable_monitoring}}
table_name: ${{parent.inputs.table_name}}
outputs:
train_data: ${{parent.outputs.train_data}}
val_data: ${{parent.outputs.val_data}}
test_data: ${{parent.outputs.test_data}}
train_model:
type: automl
name: train_model
display_name: train-model
task: regression
primary_metric: r2_score
limits:
max_trials: 6
max_concurrent_trials: 2
target_column_name: "cost"
training_data: ${{parent.jobs.prep_data.outputs.train_data}}
validation_data: ${{parent.jobs.prep_data.outputs.val_data}}
testing_data: ${{parent.jobs.prep_data.outputs.test_data}}
featurization:
mode: "auto"
training:
enable_stack_ensemble: false
enable_vote_ensemble: false
outputs:
best_model:
type: mlflow_model
register_model:
name: register_model
display_name: register-model
code: ../../../data-science/src/register
command: >-
python register_automl.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
--model_info_output_path ${{outputs.model_info_output_path}}
environment: azureml:taxi-train-env@latest
inputs:
model_name: "taxi-model-automl"
model_path: ${{parent.jobs.train_model.outputs.best_model}}
outputs:
model_info_output_path: ${{parent.outputs.model_info_output_path}}
# </jobs>