adding optional pipeline with automl training
This commit is contained in:
Родитель
a7ebe6b957
Коммит
ada53fa478
|
@ -0,0 +1,80 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
"""
|
||||
Registers trained ML model if deploy flag is True.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
import mlflow
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
def parse_args():
|
||||
'''Parse input arguments'''
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
|
||||
parser.add_argument('--model_path', type=str, help='Model directory')
|
||||
parser.add_argument(
|
||||
"--model_info_output_path", type=str, help="Path to write model info JSON"
|
||||
)
|
||||
args, _ = parser.parse_known_args()
|
||||
print(f'Arguments: {args}')
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main(args):
|
||||
'''Loads model, registers it if deply flag is True'''
|
||||
|
||||
print("Registering ", args.model_name)
|
||||
|
||||
# Get Run ID from model path
|
||||
print("Getting model path")
|
||||
mlmodel_path = os.path.join(args.model_path, "MLmodel")
|
||||
runid = ""
|
||||
with open(mlmodel_path, "r") as modelfile:
|
||||
for line in modelfile:
|
||||
if "run_id" in line:
|
||||
runid = line.split(":")[1].strip()
|
||||
|
||||
# Construct Model URI from run ID extract previously
|
||||
model_uri = "runs:/{}/outputs/".format(runid)
|
||||
print("Model URI: " + model_uri)
|
||||
|
||||
# Register the model with Model URI and Name of choice
|
||||
print(f"Registering model as {args.model_name}")
|
||||
mlflow_model = mlflow.register_model(model_uri, args.model_name)
|
||||
model_version = mlflow_model.version
|
||||
|
||||
# write model info
|
||||
print("Writing JSON")
|
||||
dict = {"id": "{0}:{1}".format(args.model_name, model_version)}
|
||||
output_path = os.path.join(args.model_info_output_path, "model_info.json")
|
||||
with open(output_path, "w") as of:
|
||||
json.dump(dict, fp=of)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
mlflow.start_run()
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Model name: {args.model_name}",
|
||||
f"Model path: {args.model_path}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
main(args)
|
||||
|
||||
mlflow.end_run()
|
|
@ -8,6 +8,8 @@ az ml data create --file train/data.yml
|
|||
az ml environment create --file train/environment.yml
|
||||
# Create pipeline job
|
||||
az ml job create --file train/pipeline.yml
|
||||
# Create pipeline job with automl training job
|
||||
az ml job create --file train/pipeline_automl.yml
|
||||
|
||||
# DEPLOY
|
||||
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
experiment_name: taxi-training-pipeline
|
||||
description: Training Pipeline to train a model that predicts taxi fare price
|
||||
|
||||
# <inputs_and_outputs>
|
||||
inputs:
|
||||
raw_data:
|
||||
type: uri_file
|
||||
path: azureml:taxi-data@latest
|
||||
enable_monitoring: "false"
|
||||
table_name: 'taximonitoring'
|
||||
|
||||
outputs:
|
||||
train_data:
|
||||
val_data:
|
||||
test_data:
|
||||
trained_model:
|
||||
evaluation_output:
|
||||
model_info_output_path:
|
||||
# </inputs_and_outputs>
|
||||
|
||||
# <jobs>
|
||||
settings:
|
||||
default_datastore: azureml:workspaceblobstore
|
||||
default_compute: azureml:cpu-cluster
|
||||
continue_on_step_failure: false
|
||||
|
||||
jobs:
|
||||
prep_data:
|
||||
name: prep_data
|
||||
display_name: prep-data
|
||||
code: ../../../data-science/src/prep
|
||||
command: >-
|
||||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}
|
||||
--val_data ${{outputs.val_data}}
|
||||
--test_data ${{outputs.test_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
--table_name ${{inputs.table_name}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
raw_data: ${{parent.inputs.raw_data}}
|
||||
enable_monitoring: ${{parent.inputs.enable_monitoring}}
|
||||
table_name: ${{parent.inputs.table_name}}
|
||||
outputs:
|
||||
train_data: ${{parent.outputs.train_data}}
|
||||
val_data: ${{parent.outputs.val_data}}
|
||||
test_data: ${{parent.outputs.test_data}}
|
||||
|
||||
train_model:
|
||||
type: automl
|
||||
name: train_model
|
||||
display_name: train-model
|
||||
task: regression
|
||||
primary_metric: r2_score
|
||||
limits:
|
||||
max_trials: 6
|
||||
max_concurrent_trials: 2
|
||||
target_column_name: "cost"
|
||||
training_data: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
validation_data: ${{parent.jobs.prep_data.outputs.val_data}}
|
||||
testing_data: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
featurization:
|
||||
mode: "auto"
|
||||
training:
|
||||
enable_stack_ensemble: false
|
||||
enable_vote_ensemble: false
|
||||
outputs:
|
||||
best_model:
|
||||
type: mlflow_model
|
||||
|
||||
register_model:
|
||||
name: register_model
|
||||
display_name: register-model
|
||||
code: ../../../data-science/src/register
|
||||
command: >-
|
||||
python register_automl.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_path ${{inputs.model_path}}
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model-automl"
|
||||
model_path: ${{parent.jobs.train_model.outputs.best_model}}
|
||||
outputs:
|
||||
model_info_output_path: ${{parent.outputs.model_info_output_path}}
|
||||
# </jobs>
|
Загрузка…
Ссылка в новой задаче