Merge pull request #104 from Azure/feature/rai-aml-cli-v2

fixes github actions as RAI has not been implemented yet for GHA
This commit is contained in:
Cindy Weng 2022-11-08 16:48:28 +00:00 коммит произвёл GitHub
Родитель 910f41bf6c e06972a99b
Коммит 1328f776eb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
42 изменённых файлов: 12279 добавлений и 124 удалений

2
.github/PULL_REQUEST_TEMPLATE.md поставляемый
Просмотреть файл

@ -1,4 +1,4 @@
# PR into Azure/mlops-v2
# PR into Azure/mlops-project-template
## Checklist

Просмотреть файл

@ -12,5 +12,8 @@ dependencies:
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0

Просмотреть файл

@ -12,5 +12,8 @@ dependencies:
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0

Просмотреть файл

@ -0,0 +1,233 @@
import argparse
from pathlib import Path
import os
import pickle
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from azureml.core import Run, Model
from fairlearn.metrics._group_metric_set import _create_group_metric_set
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id
from interpret_community import TabularExplainer
from azureml.interpret import ExplanationClient
import mlflow
import mlflow.sklearn
# current run
run = Run.get_context()
ws = run.experiment.workspace
TARGET_COL = "cost"
NUMERIC_COLS = [
"distance",
"dropoff_latitude",
"dropoff_longitude",
"passengers",
"pickup_latitude",
"pickup_longitude",
"pickup_weekday",
"pickup_month",
"pickup_monthday",
"pickup_hour",
"pickup_minute",
"pickup_second",
"dropoff_weekday",
"dropoff_month",
"dropoff_monthday",
"dropoff_hour",
"dropoff_minute",
"dropoff_second",
]
CAT_NOM_COLS = [
"store_forward",
"vendor",
]
CAT_ORD_COLS = [
]
SENSITIVE_COLS = ["vendor"] # for fairlearn dashborad
def parse_args():
parser = argparse.ArgumentParser("predict")
parser.add_argument("--model_name", type=str, help="Name of registered model")
parser.add_argument("--model_input", type=str, help="Path of input model")
parser.add_argument("--prepared_data", type=str, help="Path to transformed data")
parser.add_argument("--predictions", type=str, help="Path of predictions")
parser.add_argument("--score_report", type=str, help="Path to score report")
parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
args = parser.parse_args()
return args
def main():
args = parse_args()
lines = [
f"Model path: {args.model_input}",
f"Test data path: {args.prepared_data}",
f"Predictions path: {args.predictions}",
f"Scoring output path: {args.score_report}",
]
for line in lines:
print(line)
# ---------------- Model Evaluation ---------------- #
# Load the test data
print("mounted_path files: ")
arr = os.listdir(args.prepared_data)
train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
test_data = pd.read_csv((Path(args.prepared_data) / "test.csv"))
y_train = train_data[TARGET_COL]
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
y_test = test_data[TARGET_COL]
X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
# Load the model from input port
model = pickle.load(open((Path(args.model_input) / "model.pkl"), "rb"))
# Get predictions to y_test (y_test)
yhat_test = model.predict(X_test)
# Save the output data with feature columns, predicted cost, and actual cost in csv file
output_data = X_test.copy()
output_data["real_label"] = y_test
output_data["predicted_label"] = yhat_test
output_data.to_csv((Path(args.predictions) / "predictions.csv"))
# Evaluate Model performance with the test set
r2 = r2_score(y_test, yhat_test)
mse = mean_squared_error(y_test, yhat_test)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, yhat_test)
# Print score report to a text file
(Path(args.score_report) / "score.txt").write_text(
"Scored with the following model:\n{}".format(model)
)
with open((Path(args.score_report) / "score.txt"), "a") as f:
f.write("Mean squared error: %.2f \n" % mse)
f.write("Root mean squared error: %.2f \n" % rmse)
f.write("Mean absolute error: %.2f \n" % mae)
f.write("Coefficient of determination: %.2f \n" % r2)
mlflow.log_metric("test r2", r2)
mlflow.log_metric("test mse", mse)
mlflow.log_metric("test rmse", rmse)
mlflow.log_metric("test mae", mae)
# Visualize results
plt.scatter(y_test, yhat_test, color='black')
plt.plot(y_test, y_test, color='blue', linewidth=3)
plt.xlabel("Real value")
plt.ylabel("Predicted value")
plt.title("Comparing Model Predictions to Real values - Test Data")
plt.savefig("predictions.png")
mlflow.log_artifact("predictions.png")
# -------------------- Promotion ------------------- #
scores = {}
predictions = {}
score = r2_score(y_test, yhat_test) # current model
for model_run in Model.list(ws):
if model_run.name == args.model_name:
model_path = Model.download(model_run, exist_ok=True)
mdl = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
predictions[model_run.id] = mdl.predict(X_test)
scores[model_run.id] = r2_score(y_test, predictions[model_run.id])
print(scores)
if scores:
if score >= max(list(scores.values())):
deploy_flag = 1
else:
deploy_flag = 0
else:
deploy_flag = 1
print("Deploy flag: ",deploy_flag)
with open((Path(args.deploy_flag) / "deploy_flag"), 'w') as f:
f.write('%d' % int(deploy_flag))
scores["current model"] = score
perf_comparison_plot = pd.DataFrame(scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
perf_comparison_plot.figure.savefig("perf_comparison.png")
perf_comparison_plot.figure.savefig(Path(args.score_report) / "perf_comparison.png")
mlflow.log_metric("deploy flag", bool(deploy_flag))
mlflow.log_artifact("perf_comparison.png")
# -------------------- FAIRNESS ------------------- #
# Calculate Fairness Metrics over Sensitive Features
# Create a dictionary of model(s) you want to assess for fairness
sf = { col: X_test[[col]] for col in SENSITIVE_COLS }
predictions["currrent model"] = [x for x in model.predict(X_test)]
dash_dict_all = _create_group_metric_set(y_true=y_test,
predictions=predictions,
sensitive_features=sf,
prediction_type='regression',
)
# Upload the dashboard to Azure Machine Learning
dashboard_title = "Fairness insights Comparison of Models"
# Set validate_model_ids parameter of upload_dashboard_dictionary to False
# if you have not registered your model(s)
upload_id = upload_dashboard_dictionary(run,
dash_dict_all,
dashboard_name=dashboard_title,
validate_model_ids=False)
print("\nUploaded to id: {0}\n".format(upload_id))
# -------------------- Explainability ------------------- #
tabular_explainer = TabularExplainer(model,
initialization_examples=X_train,
features=X_train.columns)
# save explainer
#joblib.dump(tabular_explainer, os.path.join(tabular_explainer, "explainer"))
# find global explanations for feature importance
# you can use the training data or the test data here,
# but test data would allow you to use Explanation Exploration
global_explanation = tabular_explainer.explain_global(X_test)
# sorted feature importance values and feature names
sorted_global_importance_values = global_explanation.get_ranked_global_values()
sorted_global_importance_names = global_explanation.get_ranked_global_names()
print("Explainability feature importance:")
# alternatively, you can print out a dictionary that holds the top K feature names and values
global_explanation.get_feature_importance_dict()
client = ExplanationClient.from_run(run)
client.upload_model_explanation(global_explanation, comment='global explanation: all features')
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,78 @@
import argparse
from pathlib import Path
import os
import numpy as np
import pandas as pd
import mlflow
def parse_args():
parser = argparse.ArgumentParser("prep")
parser.add_argument("--raw_data", type=str, help="Path to raw data")
parser.add_argument("--prepared_data", type=str, help="Path of prepared data")
parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
args = parser.parse_args()
return args
def log_training_data(df, table_name):
from obs.collector import Online_Collector
collector = Online_Collector(table_name)
collector.batch_collect(df)
def main():
# ---------- Parse Arguments ----------- #
# -------------------------------------- #
args = parse_args()
lines = [
f"Raw data path: {args.raw_data}",
f"Data output path: {args.prepared_data}",
]
for line in lines:
print(line)
# ------------ Reading Data ------------ #
# -------------------------------------- #
print("mounted_path files: ")
arr = os.listdir(args.raw_data)
print(arr)
data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
# ------------- Split Data ------------- #
# -------------------------------------- #
# Split data into train, val and test datasets
random_data = np.random.rand(len(data))
msk_train = random_data < 0.7
msk_val = (random_data >= 0.7) & (random_data < 0.85)
msk_test = random_data >= 0.85
train = data[msk_train]
val = data[msk_val]
test = data[msk_test]
mlflow.log_metric('train size', train.shape[0])
mlflow.log_metric('val size', val.shape[0])
mlflow.log_metric('test size', test.shape[0])
train.to_csv((Path(args.prepared_data) / "train.csv"))
val.to_csv((Path(args.prepared_data) / "val.csv"))
test.to_csv((Path(args.prepared_data) / "test.csv"))
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
log_training_data(data, args.table_name)
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,56 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import argparse
from pathlib import Path
import pickle
import mlflow
from azureml.core import Run
# Get run
run = Run.get_context()
run_id = run.get_details()["runId"]
print(run_id)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
parser.add_argument('--model_path', type=str, help='Model directory')
parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
args, _ = parser.parse_known_args()
print(f'Arguments: {args}')
return args
def main():
args = parse_args()
model_name = args.model_name
model_path = args.model_path
with open((Path(args.deploy_flag) / "deploy_flag"), 'rb') as f:
deploy_flag = int(f.read())
if deploy_flag==1:
print("Registering ", model_name)
model = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
# log model using mlflow
mlflow.sklearn.log_model(model, model_name)
# register model using mlflow model
model_uri = f'runs:/{run_id}/{args.model_name}'
mlflow.register_model(model_uri, model_name)
else:
print("Model will not be registered!")
if __name__ == "__main__":
main()

Просмотреть файл

@ -0,0 +1,191 @@
import argparse
from pathlib import Path
import os
import pickle
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import mlflow
import mlflow.sklearn
TARGET_COL = "cost"
NUMERIC_COLS = [
"distance",
"dropoff_latitude",
"dropoff_longitude",
"passengers",
"pickup_latitude",
"pickup_longitude",
"pickup_weekday",
"pickup_month",
"pickup_monthday",
"pickup_hour",
"pickup_minute",
"pickup_second",
"dropoff_weekday",
"dropoff_month",
"dropoff_monthday",
"dropoff_hour",
"dropoff_minute",
"dropoff_second",
]
CAT_NOM_COLS = [
"store_forward",
"vendor",
]
CAT_ORD_COLS = [
]
def parse_args():
parser = argparse.ArgumentParser("train")
parser.add_argument("--prepared_data", type=str, help="Path to training data")
parser.add_argument("--model_output", type=str, help="Path of output model")
# classifier specific arguments
parser.add_argument('--regressor__n_estimators', type=int, default=500,
help='Number of trees')
parser.add_argument('--regressor__bootstrap', type=int, default=1,
help='Method of selecting samples for training each tree')
parser.add_argument('--regressor__max_depth', type=int, default=10,
help=' Maximum number of levels in tree')
parser.add_argument('--regressor__max_features', type=str, default='auto',
help='Number of features to consider at every split')
parser.add_argument('--regressor__min_samples_leaf', type=int, default=4,
help='Minimum number of samples required at each leaf node')
parser.add_argument('--regressor__min_samples_split', type=int, default=5,
help='Minimum number of samples required to split a node')
args = parser.parse_args()
return args
def main():
args = parse_args()
lines = [
f"Training data path: {args.prepared_data}",
f"Model output path: {args.model_output}",
]
for line in lines:
print(line)
print("mounted_path files: ")
arr = os.listdir(args.prepared_data)
print(arr)
train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
# Split the data into input(X) and output(y)
y_train = train_data[TARGET_COL]
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
# Train a Linear Regression Model with the train set
# numerical features
numeric_transformer = Pipeline(steps=[
('standardscaler', StandardScaler())])
# ordinal features transformer
ordinal_transformer = Pipeline(steps=[
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent")),
('minmaxscaler', MinMaxScaler())
])
# nominal features transformer
nominal_transformer = Pipeline(steps=[
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent")),
('onehot', OneHotEncoder(sparse=False))
])
# imputer only for all other features
imputer_transformer = Pipeline(steps=[
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent"))
])
# preprocessing pipeline
preprocessor = ColumnTransformer(
transformers=[
('numeric', numeric_transformer, NUMERIC_COLS),
#('ordinal', ordinal_transformer, CAT_ORD_COLS),
('nominal', nominal_transformer, CAT_NOM_COLS)], # other features are already binary
remainder="drop")
# append regressor to preprocessing pipeline.
# now we have a full prediction pipeline.
#model = Pipeline(steps=[('preprocessor', preprocessor),
# ('regressor', RandomForestRegressor(
# n_estimators = args.regressor__n_estimators,
# bootstrap = args.regressor__bootstrap,
# max_depth = args.regressor__max_depth,
# max_features = args.regressor__max_features,
# min_samples_leaf = args.regressor__min_samples_leaf,
# min_samples_split = args.regressor__min_samples_split,
# random_state=0))])
model = RandomForestRegressor(n_estimators = args.regressor__n_estimators,
bootstrap = args.regressor__bootstrap,
max_depth = args.regressor__max_depth,
max_features = args.regressor__max_features,
min_samples_leaf = args.regressor__min_samples_leaf,
min_samples_split = args.regressor__min_samples_split,
random_state=0)
mlflow.log_param("model", "RandomForestRegressor")
mlflow.log_param("n_estimators", args.regressor__n_estimators)
mlflow.log_param("bootstrap", args.regressor__bootstrap)
mlflow.log_param("max_depth", args.regressor__max_depth)
mlflow.log_param("max_features", args.regressor__max_features)
mlflow.log_param("min_samples_leaf", args.regressor__min_samples_leaf)
mlflow.log_param("min_samples_split", args.regressor__min_samples_split)
model.fit(X_train, y_train)
# Predict using the Regression Model
yhat_train = model.predict(X_train)
# Evaluate Regression performance with the train set
r2 = r2_score(y_train, yhat_train)
mse = mean_squared_error(y_train, yhat_train)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_train, yhat_train)
mlflow.log_metric("train r2", r2)
mlflow.log_metric("train mse", mse)
mlflow.log_metric("train rmse", rmse)
mlflow.log_metric("train mae", mae)
# Visualize results
plt.scatter(y_train, yhat_train, color='black')
plt.plot(y_train, y_train, color='blue', linewidth=3)
plt.xlabel("Real value")
plt.ylabel("Predicted value")
plt.savefig("regression_results.png")
mlflow.log_artifact("regression_results.png")
# Save the model
pickle.dump(model, open((Path(args.model_output) / "model.pkl"), "wb"))
if __name__ == "__main__":
main()

Просмотреть файл

@ -1,23 +1,21 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
experiment_name: taxi-fare-training
experiment_name: taxi-fare-training-gha
description: Training Pipeline to train a model that predicts taxi fare price
# <inputs_and_outputs>
inputs:
input: #using local data, will create an anonymous data asset
input: #using local data, will crate an anonymous data asset
type: uri_folder
path: ../../../data/
enable_monitoring: "true"
table_name: 'taximonitoring'
enable_monitoring: 'false'
outputs:
train_data:
val_data:
test_data:
prepared_data:
trained_model:
evaluation_output:
model_info_output_path:
predictions:
score_report:
deploy_flag:
# </inputs_and_outputs>
# <jobs>
@ -30,137 +28,68 @@ jobs:
prep_data:
name: prep_data
display_name: prep-data
code: ../../../data-science/src/prep
code: ../../../data-science/src
command: >-
python prep.py
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}
--val_data ${{outputs.val_data}}
--test_data ${{outputs.test_data}}
--prepared_data ${{outputs.prepared_data}}
--enable_monitoring ${{inputs.enable_monitoring}}
--table_name ${{inputs.table_name}}
environment: azureml:taxi-train-env@latest
inputs:
raw_data: ${{parent.inputs.input}}
enable_monitoring: ${{parent.inputs.enable_monitoring}}
table_name: ${{parent.inputs.table_name}}
outputs:
train_data: ${{parent.outputs.train_data}}
val_data: ${{parent.outputs.val_data}}
test_data: ${{parent.outputs.test_data}}
prepared_data: ${{parent.outputs.prepared_data}}
train_model:
name: train_model
display_name: train-model
code: ../../../data-science/src/train
code: ../../../data-science/src
command: >-
python train.py
--train_data ${{inputs.train_data}}
--prepared_data ${{inputs.prepared_data}}
--model_output ${{outputs.model_output}}
environment: azureml:taxi-train-env@latest
inputs:
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
prepared_data: ${{parent.jobs.prep_data.outputs.prepared_data}}
outputs:
model_output: ${{parent.outputs.trained_model}}
evaluate_model:
name: evaluate_model
display_name: evaluate-model
code: ../../../data-science/src/evaluate
code: ../../../data-science/src
command: >-
python evaluate.py
--model_name ${{inputs.model_name}}
--model_input ${{inputs.model_input}}
--test_data ${{inputs.test_data}}
--evaluation_output ${{outputs.evaluation_output}}
--prepared_data ${{inputs.prepared_data}}
--predictions ${{outputs.predictions}}
--score_report ${{outputs.score_report}}
--deploy_flag ${{outputs.deploy_flag}}
environment: azureml:taxi-train-env@latest
inputs:
model_name: "taxi-model"
model_name: "taxi-model-gha"
model_input: ${{parent.jobs.train_model.outputs.model_output}}
test_data: ${{parent.jobs.prep_data.outputs.test_data}}
prepared_data: ${{parent.jobs.prep_data.outputs.prepared_data}}
outputs:
evaluation_output: ${{parent.outputs.evaluation_output}}
predictions: ${{parent.outputs.predictions}}
score_report: ${{parent.outputs.score_report}}
deploy_flag: ${{parent.outputs.deploy_flag}}
register_model:
name: register_model
display_name: register-model
code: ../../../data-science/src/register
code: ../../../data-science/src
command: >-
python register.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
--evaluation_output ${{inputs.evaluation_output}}
--model_info_output_path ${{outputs.model_info_output_path}}
--deploy_flag ${{inputs.deploy_flag}}
environment: azureml:taxi-train-env@latest
inputs:
model_name: "taxi-model"
model_name: "taxi-model-gha"
model_path: ${{parent.jobs.train_model.outputs.model_output}}
evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
outputs:
model_info_output_path: ${{parent.outputs.model_info_output_path}}
deploy_flag: ${{parent.jobs.evaluate_model.outputs.deploy_flag}}
create_rai_job:
type: command
component: azureml:rai_insights_constructor@latest
limits:
timeout: 120
inputs:
title: Responsible AI for Taxi Fare Prediction
task_type: regression
model_info_path: ${{parent.jobs.register_model.outputs.model_info_output_path}}
train_dataset: ${{parent.jobs.prep_data.outputs.train_data}}
test_dataset: ${{parent.jobs.prep_data.outputs.test_data}}
target_column_name: "cost"
categorical_column_names: '["store_forward", "vendor"]'
explain_01:
type: command
component: azureml:rai_insights_explanation@latest
limits:
timeout: 120
inputs:
comment: Some random string
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
causal_01:
type: command
component: azureml:rai_insights_causal@latest
limits:
timeout: 120
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
treatment_features: '["distance", "passengers"]'
heterogeneity_features: '["store_forward", "vendor"]'
counterfactual_01:
type: command
component: azureml:rai_insights_counterfactual@latest
limits:
timeout: 600
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
total_CFs: 10
desired_range: '[16, 30]'
feature_importance: True
error_analysis_01:
type: command
component: azureml:rai_insights_erroranalysis@latest
limits:
timeout: 120
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
filter_features: '["distance", "passengers"]'
gather_01:
type: command
component: azureml:rai_insights_gather@latest
limits:
timeout: 120
inputs:
constructor: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
insight_1: ${{parent.jobs.causal_01.outputs.causal}}
insight_2: ${{parent.jobs.counterfactual_01.outputs.counterfactual}}
insight_3: ${{parent.jobs.error_analysis_01.outputs.error_analysis}}
insight_4: ${{parent.jobs.explain_01.outputs.explanation}}
# </jobs>
# </jobs>

Просмотреть файл

@ -11,7 +11,7 @@ variables:
- name: version
value: aml-cli-v2
- name: endpoint_name
value: taxi-batch-$(namespace)$(postfix)$(environment)
value: taxi2-batch-$(namespace)$(postfix)$(environment)
- name: endpoint_type
value: batch
@ -28,7 +28,7 @@ resources:
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
ref: main-jul31
stages:

Просмотреть файл

@ -37,13 +37,11 @@ stages:
displayName: Deploy Training Pipeline
jobs:
- job: DeployTrainingPipeline
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/tests/unit-tests.yml@mlops-templates
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
@ -53,16 +51,9 @@ stages:
environment_name: taxi-train-env
environment_file: mlops/azureml/train/train-env.yml
enable_monitoring: $(enable_monitoring)
- checkout: rai-vnext-preview
path: s/
- template: register-rai-components.yml
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters:
pipeline_file: mlops/azureml/train/pipeline.yml
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
enable_monitoring: $(enable_monitoring)
enable_monitoring: $(enable_monitoring)

Просмотреть файл

@ -7,11 +7,11 @@ variables:
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- template: ../../../../config-infra-dev.yml
- name: version
value: aml-cli-v2
- name: endpoint_name
value: taxi-online-$(namespace)$(postfix)$(environment)
value: taxi2-online-$(namespace)$(postfix)$(environment)
- name: endpoint_type
value: online
@ -29,7 +29,7 @@ resources:
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
ref: main-jul31
stages:
- stage: CreateOnlineEndpoint

Просмотреть файл

@ -26,7 +26,7 @@ jobs:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
endpoint_name: ${{ format('taxi-batch-{0}', needs.get-config.outputs.bep) }}
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
endpoint_type: batch
secrets:
creds: ${{secrets.AZURE_CREDENTIALS}}
@ -37,7 +37,7 @@ jobs:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
endpoint_file: mlops/azureml/deploy/batch/batch-deployment.yml
endpoint_name: ${{ format('taxi-batch-{0}', needs.get-config.outputs.bep) }}
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
endpoint_type: batch
deployment_name: eptestdeploy
secrets:

Просмотреть файл

@ -14,7 +14,7 @@ jobs:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
endpoint_type: online
secrets:
creds: ${{secrets.AZURE_CREDENTIALS}}
@ -25,7 +25,7 @@ jobs:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
endpoint_file: mlops/azureml/deploy/online/online-deployment.yml
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
endpoint_type: online
deployment_name: taxi-online-dp
secrets:
@ -37,6 +37,6 @@ jobs:
resource_group: ${{ needs.get-config.outputs.resource_group }}
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
traffic_allocation: taxi-online-dp=100
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
secrets:
creds: ${{secrets.AZURE_CREDENTIALS}}

Просмотреть файл

@ -0,0 +1,16 @@
channels:
- defaults
- anaconda
- conda-forge
dependencies:
- python=3.7.5
- pip
- pip:
- azureml-mlflow==1.38.0
- azureml-sdk==1.38.0
- scikit-learn==0.24.1
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector

Просмотреть файл

@ -0,0 +1,16 @@
channels:
- defaults
- anaconda
- conda-forge
dependencies:
- python=3.7.5
- pip
- pip:
- azureml-mlflow==1.38.0
- azureml-sdk==1.38.0
- scikit-learn==0.24.1
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,3 @@
{"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
[3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
}

Просмотреть файл

@ -0,0 +1,16 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
name: batch-dp
endpoint_name: taxi-fare-batch
model: azureml:taxi-model@latest
compute: azureml:batch-cluster
resources:
instance_count: 1
max_concurrency_per_instance: 2
mini_batch_size: 10
output_action: append_row
output_file_name: predictions.csv
retry_settings:
max_retries: 3
timeout: 30
error_threshold: -1
logging_level: info

Просмотреть файл

@ -0,0 +1,4 @@
$schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json
name: taxi-fare-batch
description: taxi cost batch endpoint
auth_mode: aad_token

Просмотреть файл

@ -0,0 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: blue
endpoint_name: taxi-fare-online
model: azureml:taxi-model@latest
instance_type: Standard_DS2_v2
instance_count: 1

Просмотреть файл

@ -0,0 +1,4 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
name: taxi-fare-online
description: taxi cost online endpoint
auth_mode: key

Просмотреть файл

Просмотреть файл

@ -0,0 +1,166 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
experiment_name: taxi-fare-training
description: Training Pipeline to train a model that predicts taxi fare price
# <inputs_and_outputs>
inputs:
input: #using local data, will create an anonymous data asset
type: uri_folder
path: ../../../data/
enable_monitoring: "true"
table_name: 'taximonitoring'
outputs:
train_data:
val_data:
test_data:
trained_model:
evaluation_output:
model_info_output_path:
# </inputs_and_outputs>
# <jobs>
settings:
default_datastore: azureml:workspaceblobstore
default_compute: azureml:cpu-cluster
continue_on_step_failure: false
jobs:
prep_data:
name: prep_data
display_name: prep-data
code: ../../../data-science/src/prep
command: >-
python prep.py
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}
--val_data ${{outputs.val_data}}
--test_data ${{outputs.test_data}}
--enable_monitoring ${{inputs.enable_monitoring}}
--table_name ${{inputs.table_name}}
environment: azureml:taxi-train-env@latest
inputs:
raw_data: ${{parent.inputs.input}}
enable_monitoring: ${{parent.inputs.enable_monitoring}}
table_name: ${{parent.inputs.table_name}}
outputs:
train_data: ${{parent.outputs.train_data}}
val_data: ${{parent.outputs.val_data}}
test_data: ${{parent.outputs.test_data}}
train_model:
name: train_model
display_name: train-model
code: ../../../data-science/src/train
command: >-
python train.py
--train_data ${{inputs.train_data}}
--model_output ${{outputs.model_output}}
environment: azureml:taxi-train-env@latest
inputs:
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
outputs:
model_output: ${{parent.outputs.trained_model}}
evaluate_model:
name: evaluate_model
display_name: evaluate-model
code: ../../../data-science/src/evaluate
command: >-
python evaluate.py
--model_name ${{inputs.model_name}}
--model_input ${{inputs.model_input}}
--test_data ${{inputs.test_data}}
--evaluation_output ${{outputs.evaluation_output}}
environment: azureml:taxi-train-env@latest
inputs:
model_name: "taxi-model"
model_input: ${{parent.jobs.train_model.outputs.model_output}}
test_data: ${{parent.jobs.prep_data.outputs.test_data}}
outputs:
evaluation_output: ${{parent.outputs.evaluation_output}}
register_model:
name: register_model
display_name: register-model
code: ../../../data-science/src/register
command: >-
python register.py
--model_name ${{inputs.model_name}}
--model_path ${{inputs.model_path}}
--evaluation_output ${{inputs.evaluation_output}}
--model_info_output_path ${{outputs.model_info_output_path}}
environment: azureml:taxi-train-env@latest
inputs:
model_name: "taxi-model"
model_path: ${{parent.jobs.train_model.outputs.model_output}}
evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
outputs:
model_info_output_path: ${{parent.outputs.model_info_output_path}}
create_rai_job:
type: command
component: azureml:rai_insights_constructor@latest
limits:
timeout: 120
inputs:
title: Responsible AI for Taxi Fare Prediction
task_type: regression
model_info_path: ${{parent.jobs.register_model.outputs.model_info_output_path}}
train_dataset: ${{parent.jobs.prep_data.outputs.train_data}}
test_dataset: ${{parent.jobs.prep_data.outputs.test_data}}
target_column_name: "cost"
categorical_column_names: '["store_forward", "vendor"]'
explain_01:
type: command
component: azureml:rai_insights_explanation@latest
limits:
timeout: 120
inputs:
comment: Some random string
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
causal_01:
type: command
component: azureml:rai_insights_causal@latest
limits:
timeout: 120
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
treatment_features: '["distance", "passengers"]'
heterogeneity_features: '["store_forward", "vendor"]'
counterfactual_01:
type: command
component: azureml:rai_insights_counterfactual@latest
limits:
timeout: 600
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
total_CFs: 10
desired_range: '[16, 30]'
feature_importance: True
error_analysis_01:
type: command
component: azureml:rai_insights_erroranalysis@latest
limits:
timeout: 120
inputs:
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
filter_features: '["distance", "passengers"]'
gather_01:
type: command
component: azureml:rai_insights_gather@latest
limits:
timeout: 120
inputs:
constructor: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
insight_1: ${{parent.jobs.causal_01.outputs.causal}}
insight_2: ${{parent.jobs.counterfactual_01.outputs.counterfactual}}
insight_3: ${{parent.jobs.error_analysis_01.outputs.error_analysis}}
insight_4: ${{parent.jobs.explain_01.outputs.explanation}}
# </jobs>

Просмотреть файл

@ -0,0 +1,5 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
name: taxi-train-env
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
conda_file: ../../../data-science/environment/train-conda.yml
description: Environment created from a Docker image plus Conda environment to train taxi model.

Просмотреть файл

@ -0,0 +1,66 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
variables:
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
# 'main' branch: PRD environment
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: aml-cli-v2
- name: endpoint_name
value: taxi-batch-$(namespace)$(postfix)$(environment)
- name: endpoint_type
value: batch
trigger:
- none
pool:
vmImage: ubuntu-20.04
resources:
repositories:
- repository: mlops-templates # Template Repo
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
stages:
- stage: CreateBatchEndpoint
displayName: Create/Update Batch Endpoint
jobs:
- job: DeployBatchEndpoint
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
parameters:
cluster_name: batch-cluster # name must match cluster name in deployment file below
size: STANDARD_DS3_V2
min_instances: 0
max_instances: 5
cluster_tier: dedicated
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
parameters:
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
parameters:
deployment_name: taxi-batch-dp
deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
parameters:
deployment_name: taxi-batch-dp
sample_request: data/taxi-batch.csv
request_type: uri_file #either uri_folder or uri_file

Просмотреть файл

@ -0,0 +1,68 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
variables:
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
# 'main' branch: PRD environment
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: aml-cli-v2
trigger:
- none
pool:
vmImage: ubuntu-20.04
resources:
repositories:
- repository: mlops-templates # Template Repo
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
- repository: rai-vnext-preview # Template Repo
name: Azure/rai-vnext-preview # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
stages:
- stage: DeployTrainingPipeline
displayName: Deploy Training Pipeline
jobs:
- job: DeployTrainingPipeline
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/tests/unit-tests.yml@mlops-templates
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
parameters:
build_type: conda
environment_name: taxi-train-env
environment_file: mlops/azureml/train/train-env.yml
enable_monitoring: $(enable_monitoring)
- checkout: rai-vnext-preview
path: s/
- template: register-rai-components.yml
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters:
pipeline_file: mlops/azureml/train/pipeline.yml
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
enable_monitoring: $(enable_monitoring)

Просмотреть файл

@ -0,0 +1,61 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
variables:
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
# 'main' branch: PRD environment
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: aml-cli-v2
- name: endpoint_name
value: taxi-online-$(namespace)$(postfix)$(environment)
- name: endpoint_type
value: online
trigger:
- none
pool:
vmImage: ubuntu-20.04
resources:
repositories:
- repository: mlops-templates # Template Repo
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main
stages:
- stage: CreateOnlineEndpoint
displayName: Create/Update Online Endpoint
jobs:
- job: DeployOnlineEndpoint
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
parameters:
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
parameters:
deployment_name: taxi-online-dp
deployment_file: mlops/azureml/deploy/online/online-deployment.yml
- template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
parameters:
traffic_allocation: taxi-online-dp=100
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
parameters:
deployment_name: taxi-online-dp
sample_request: data/taxi-request.json
request_type: json