Merge pull request #104 from Azure/feature/rai-aml-cli-v2
fixes github actions as RAI has not been implemented yet for GHA
This commit is contained in:
Коммит
1328f776eb
|
@ -1,4 +1,4 @@
|
|||
# PR into Azure/mlops-v2
|
||||
# PR into Azure/mlops-project-template
|
||||
|
||||
## Checklist
|
||||
|
||||
|
|
|
@ -12,5 +12,8 @@ dependencies:
|
|||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
||||
- fairlearn==0.7.0
|
||||
- azureml-contrib-fairness==1.38.0
|
||||
- interpret-community==0.24.1
|
||||
- interpret-core==0.2.7
|
||||
- azureml-interpret==1.38.0
|
|
@ -12,5 +12,8 @@ dependencies:
|
|||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
||||
- fairlearn==0.7.0
|
||||
- azureml-contrib-fairness==1.38.0
|
||||
- interpret-community==0.24.1
|
||||
- interpret-core==0.2.7
|
||||
- azureml-interpret==1.38.0
|
|
@ -0,0 +1,233 @@
|
|||
import argparse
|
||||
from pathlib import Path
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||
|
||||
from azureml.core import Run, Model
|
||||
|
||||
from fairlearn.metrics._group_metric_set import _create_group_metric_set
|
||||
from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id
|
||||
|
||||
from interpret_community import TabularExplainer
|
||||
from azureml.interpret import ExplanationClient
|
||||
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
# current run
|
||||
run = Run.get_context()
|
||||
ws = run.experiment.workspace
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
SENSITIVE_COLS = ["vendor"] # for fairlearn dashborad
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
parser = argparse.ArgumentParser("predict")
|
||||
parser.add_argument("--model_name", type=str, help="Name of registered model")
|
||||
parser.add_argument("--model_input", type=str, help="Path of input model")
|
||||
parser.add_argument("--prepared_data", type=str, help="Path to transformed data")
|
||||
parser.add_argument("--predictions", type=str, help="Path of predictions")
|
||||
parser.add_argument("--score_report", type=str, help="Path to score report")
|
||||
parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Model path: {args.model_input}",
|
||||
f"Test data path: {args.prepared_data}",
|
||||
f"Predictions path: {args.predictions}",
|
||||
f"Scoring output path: {args.score_report}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
# ---------------- Model Evaluation ---------------- #
|
||||
|
||||
# Load the test data
|
||||
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.prepared_data)
|
||||
|
||||
train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
|
||||
test_data = pd.read_csv((Path(args.prepared_data) / "test.csv"))
|
||||
|
||||
y_train = train_data[TARGET_COL]
|
||||
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
y_test = test_data[TARGET_COL]
|
||||
X_test = test_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
# Load the model from input port
|
||||
model = pickle.load(open((Path(args.model_input) / "model.pkl"), "rb"))
|
||||
|
||||
# Get predictions to y_test (y_test)
|
||||
yhat_test = model.predict(X_test)
|
||||
|
||||
# Save the output data with feature columns, predicted cost, and actual cost in csv file
|
||||
output_data = X_test.copy()
|
||||
output_data["real_label"] = y_test
|
||||
output_data["predicted_label"] = yhat_test
|
||||
output_data.to_csv((Path(args.predictions) / "predictions.csv"))
|
||||
|
||||
# Evaluate Model performance with the test set
|
||||
r2 = r2_score(y_test, yhat_test)
|
||||
mse = mean_squared_error(y_test, yhat_test)
|
||||
rmse = np.sqrt(mse)
|
||||
mae = mean_absolute_error(y_test, yhat_test)
|
||||
|
||||
# Print score report to a text file
|
||||
(Path(args.score_report) / "score.txt").write_text(
|
||||
"Scored with the following model:\n{}".format(model)
|
||||
)
|
||||
with open((Path(args.score_report) / "score.txt"), "a") as f:
|
||||
f.write("Mean squared error: %.2f \n" % mse)
|
||||
f.write("Root mean squared error: %.2f \n" % rmse)
|
||||
f.write("Mean absolute error: %.2f \n" % mae)
|
||||
f.write("Coefficient of determination: %.2f \n" % r2)
|
||||
|
||||
|
||||
mlflow.log_metric("test r2", r2)
|
||||
mlflow.log_metric("test mse", mse)
|
||||
mlflow.log_metric("test rmse", rmse)
|
||||
mlflow.log_metric("test mae", mae)
|
||||
|
||||
# Visualize results
|
||||
plt.scatter(y_test, yhat_test, color='black')
|
||||
plt.plot(y_test, y_test, color='blue', linewidth=3)
|
||||
plt.xlabel("Real value")
|
||||
plt.ylabel("Predicted value")
|
||||
plt.title("Comparing Model Predictions to Real values - Test Data")
|
||||
plt.savefig("predictions.png")
|
||||
mlflow.log_artifact("predictions.png")
|
||||
|
||||
# -------------------- Promotion ------------------- #
|
||||
scores = {}
|
||||
predictions = {}
|
||||
score = r2_score(y_test, yhat_test) # current model
|
||||
for model_run in Model.list(ws):
|
||||
if model_run.name == args.model_name:
|
||||
model_path = Model.download(model_run, exist_ok=True)
|
||||
mdl = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
|
||||
predictions[model_run.id] = mdl.predict(X_test)
|
||||
scores[model_run.id] = r2_score(y_test, predictions[model_run.id])
|
||||
|
||||
print(scores)
|
||||
if scores:
|
||||
if score >= max(list(scores.values())):
|
||||
deploy_flag = 1
|
||||
else:
|
||||
deploy_flag = 0
|
||||
else:
|
||||
deploy_flag = 1
|
||||
print("Deploy flag: ",deploy_flag)
|
||||
|
||||
with open((Path(args.deploy_flag) / "deploy_flag"), 'w') as f:
|
||||
f.write('%d' % int(deploy_flag))
|
||||
|
||||
scores["current model"] = score
|
||||
perf_comparison_plot = pd.DataFrame(scores, index=["r2 score"]).plot(kind='bar', figsize=(15, 10))
|
||||
perf_comparison_plot.figure.savefig("perf_comparison.png")
|
||||
perf_comparison_plot.figure.savefig(Path(args.score_report) / "perf_comparison.png")
|
||||
|
||||
mlflow.log_metric("deploy flag", bool(deploy_flag))
|
||||
mlflow.log_artifact("perf_comparison.png")
|
||||
|
||||
|
||||
# -------------------- FAIRNESS ------------------- #
|
||||
# Calculate Fairness Metrics over Sensitive Features
|
||||
# Create a dictionary of model(s) you want to assess for fairness
|
||||
|
||||
sf = { col: X_test[[col]] for col in SENSITIVE_COLS }
|
||||
predictions["currrent model"] = [x for x in model.predict(X_test)]
|
||||
|
||||
dash_dict_all = _create_group_metric_set(y_true=y_test,
|
||||
predictions=predictions,
|
||||
sensitive_features=sf,
|
||||
prediction_type='regression',
|
||||
)
|
||||
|
||||
# Upload the dashboard to Azure Machine Learning
|
||||
dashboard_title = "Fairness insights Comparison of Models"
|
||||
|
||||
# Set validate_model_ids parameter of upload_dashboard_dictionary to False
|
||||
# if you have not registered your model(s)
|
||||
upload_id = upload_dashboard_dictionary(run,
|
||||
dash_dict_all,
|
||||
dashboard_name=dashboard_title,
|
||||
validate_model_ids=False)
|
||||
print("\nUploaded to id: {0}\n".format(upload_id))
|
||||
|
||||
|
||||
# -------------------- Explainability ------------------- #
|
||||
tabular_explainer = TabularExplainer(model,
|
||||
initialization_examples=X_train,
|
||||
features=X_train.columns)
|
||||
|
||||
# save explainer
|
||||
#joblib.dump(tabular_explainer, os.path.join(tabular_explainer, "explainer"))
|
||||
|
||||
# find global explanations for feature importance
|
||||
# you can use the training data or the test data here,
|
||||
# but test data would allow you to use Explanation Exploration
|
||||
global_explanation = tabular_explainer.explain_global(X_test)
|
||||
|
||||
# sorted feature importance values and feature names
|
||||
sorted_global_importance_values = global_explanation.get_ranked_global_values()
|
||||
sorted_global_importance_names = global_explanation.get_ranked_global_names()
|
||||
|
||||
print("Explainability feature importance:")
|
||||
# alternatively, you can print out a dictionary that holds the top K feature names and values
|
||||
global_explanation.get_feature_importance_dict()
|
||||
|
||||
client = ExplanationClient.from_run(run)
|
||||
client.upload_model_explanation(global_explanation, comment='global explanation: all features')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,78 @@
|
|||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import mlflow
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
parser = argparse.ArgumentParser("prep")
|
||||
parser.add_argument("--raw_data", type=str, help="Path to raw data")
|
||||
parser.add_argument("--prepared_data", type=str, help="Path of prepared data")
|
||||
parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
|
||||
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def log_training_data(df, table_name):
|
||||
from obs.collector import Online_Collector
|
||||
collector = Online_Collector(table_name)
|
||||
collector.batch_collect(df)
|
||||
|
||||
def main():
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Raw data path: {args.raw_data}",
|
||||
f"Data output path: {args.prepared_data}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
# ------------ Reading Data ------------ #
|
||||
# -------------------------------------- #
|
||||
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.raw_data)
|
||||
print(arr)
|
||||
|
||||
data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
|
||||
|
||||
# ------------- Split Data ------------- #
|
||||
# -------------------------------------- #
|
||||
|
||||
# Split data into train, val and test datasets
|
||||
|
||||
random_data = np.random.rand(len(data))
|
||||
|
||||
msk_train = random_data < 0.7
|
||||
msk_val = (random_data >= 0.7) & (random_data < 0.85)
|
||||
msk_test = random_data >= 0.85
|
||||
|
||||
train = data[msk_train]
|
||||
val = data[msk_val]
|
||||
test = data[msk_test]
|
||||
|
||||
mlflow.log_metric('train size', train.shape[0])
|
||||
mlflow.log_metric('val size', val.shape[0])
|
||||
mlflow.log_metric('test size', test.shape[0])
|
||||
|
||||
train.to_csv((Path(args.prepared_data) / "train.csv"))
|
||||
val.to_csv((Path(args.prepared_data) / "val.csv"))
|
||||
test.to_csv((Path(args.prepared_data) / "test.csv"))
|
||||
|
||||
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
|
||||
log_training_data(data, args.table_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,56 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import pickle
|
||||
|
||||
import mlflow
|
||||
|
||||
from azureml.core import Run
|
||||
|
||||
# Get run
|
||||
run = Run.get_context()
|
||||
run_id = run.get_details()["runId"]
|
||||
print(run_id)
|
||||
|
||||
def parse_args():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_name', type=str, help='Name under which model will be registered')
|
||||
parser.add_argument('--model_path', type=str, help='Model directory')
|
||||
parser.add_argument('--deploy_flag', type=str, help='A deploy flag whether to deploy or no')
|
||||
|
||||
args, _ = parser.parse_known_args()
|
||||
print(f'Arguments: {args}')
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
args = parse_args()
|
||||
|
||||
model_name = args.model_name
|
||||
model_path = args.model_path
|
||||
|
||||
with open((Path(args.deploy_flag) / "deploy_flag"), 'rb') as f:
|
||||
deploy_flag = int(f.read())
|
||||
|
||||
if deploy_flag==1:
|
||||
|
||||
print("Registering ", model_name)
|
||||
|
||||
model = pickle.load(open((Path(model_path) / "model.pkl"), "rb"))
|
||||
# log model using mlflow
|
||||
mlflow.sklearn.log_model(model, model_name)
|
||||
|
||||
# register model using mlflow model
|
||||
model_uri = f'runs:/{run_id}/{args.model_name}'
|
||||
mlflow.register_model(model_uri, model_name)
|
||||
|
||||
else:
|
||||
print("Model will not be registered!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,191 @@
|
|||
|
||||
import argparse
|
||||
|
||||
from pathlib import Path
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
||||
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
TARGET_COL = "cost"
|
||||
|
||||
NUMERIC_COLS = [
|
||||
"distance",
|
||||
"dropoff_latitude",
|
||||
"dropoff_longitude",
|
||||
"passengers",
|
||||
"pickup_latitude",
|
||||
"pickup_longitude",
|
||||
"pickup_weekday",
|
||||
"pickup_month",
|
||||
"pickup_monthday",
|
||||
"pickup_hour",
|
||||
"pickup_minute",
|
||||
"pickup_second",
|
||||
"dropoff_weekday",
|
||||
"dropoff_month",
|
||||
"dropoff_monthday",
|
||||
"dropoff_hour",
|
||||
"dropoff_minute",
|
||||
"dropoff_second",
|
||||
]
|
||||
|
||||
CAT_NOM_COLS = [
|
||||
"store_forward",
|
||||
"vendor",
|
||||
]
|
||||
|
||||
CAT_ORD_COLS = [
|
||||
]
|
||||
|
||||
|
||||
def parse_args():
|
||||
|
||||
parser = argparse.ArgumentParser("train")
|
||||
parser.add_argument("--prepared_data", type=str, help="Path to training data")
|
||||
parser.add_argument("--model_output", type=str, help="Path of output model")
|
||||
|
||||
# classifier specific arguments
|
||||
parser.add_argument('--regressor__n_estimators', type=int, default=500,
|
||||
help='Number of trees')
|
||||
parser.add_argument('--regressor__bootstrap', type=int, default=1,
|
||||
help='Method of selecting samples for training each tree')
|
||||
parser.add_argument('--regressor__max_depth', type=int, default=10,
|
||||
help=' Maximum number of levels in tree')
|
||||
parser.add_argument('--regressor__max_features', type=str, default='auto',
|
||||
help='Number of features to consider at every split')
|
||||
parser.add_argument('--regressor__min_samples_leaf', type=int, default=4,
|
||||
help='Minimum number of samples required at each leaf node')
|
||||
parser.add_argument('--regressor__min_samples_split', type=int, default=5,
|
||||
help='Minimum number of samples required to split a node')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def main():
|
||||
|
||||
args = parse_args()
|
||||
|
||||
lines = [
|
||||
f"Training data path: {args.prepared_data}",
|
||||
f"Model output path: {args.model_output}",
|
||||
]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.prepared_data)
|
||||
print(arr)
|
||||
|
||||
train_data = pd.read_csv((Path(args.prepared_data) / "train.csv"))
|
||||
|
||||
# Split the data into input(X) and output(y)
|
||||
y_train = train_data[TARGET_COL]
|
||||
X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]
|
||||
|
||||
# Train a Linear Regression Model with the train set
|
||||
|
||||
# numerical features
|
||||
numeric_transformer = Pipeline(steps=[
|
||||
('standardscaler', StandardScaler())])
|
||||
|
||||
# ordinal features transformer
|
||||
ordinal_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent")),
|
||||
('minmaxscaler', MinMaxScaler())
|
||||
])
|
||||
|
||||
# nominal features transformer
|
||||
nominal_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent")),
|
||||
('onehot', OneHotEncoder(sparse=False))
|
||||
])
|
||||
|
||||
# imputer only for all other features
|
||||
imputer_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(missing_values=np.nan, strategy="most_frequent"))
|
||||
])
|
||||
|
||||
# preprocessing pipeline
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
('numeric', numeric_transformer, NUMERIC_COLS),
|
||||
#('ordinal', ordinal_transformer, CAT_ORD_COLS),
|
||||
('nominal', nominal_transformer, CAT_NOM_COLS)], # other features are already binary
|
||||
remainder="drop")
|
||||
|
||||
# append regressor to preprocessing pipeline.
|
||||
# now we have a full prediction pipeline.
|
||||
|
||||
#model = Pipeline(steps=[('preprocessor', preprocessor),
|
||||
# ('regressor', RandomForestRegressor(
|
||||
# n_estimators = args.regressor__n_estimators,
|
||||
# bootstrap = args.regressor__bootstrap,
|
||||
# max_depth = args.regressor__max_depth,
|
||||
# max_features = args.regressor__max_features,
|
||||
# min_samples_leaf = args.regressor__min_samples_leaf,
|
||||
# min_samples_split = args.regressor__min_samples_split,
|
||||
# random_state=0))])
|
||||
|
||||
|
||||
model = RandomForestRegressor(n_estimators = args.regressor__n_estimators,
|
||||
bootstrap = args.regressor__bootstrap,
|
||||
max_depth = args.regressor__max_depth,
|
||||
max_features = args.regressor__max_features,
|
||||
min_samples_leaf = args.regressor__min_samples_leaf,
|
||||
min_samples_split = args.regressor__min_samples_split,
|
||||
random_state=0)
|
||||
|
||||
mlflow.log_param("model", "RandomForestRegressor")
|
||||
mlflow.log_param("n_estimators", args.regressor__n_estimators)
|
||||
mlflow.log_param("bootstrap", args.regressor__bootstrap)
|
||||
mlflow.log_param("max_depth", args.regressor__max_depth)
|
||||
mlflow.log_param("max_features", args.regressor__max_features)
|
||||
mlflow.log_param("min_samples_leaf", args.regressor__min_samples_leaf)
|
||||
mlflow.log_param("min_samples_split", args.regressor__min_samples_split)
|
||||
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
# Predict using the Regression Model
|
||||
yhat_train = model.predict(X_train)
|
||||
|
||||
# Evaluate Regression performance with the train set
|
||||
r2 = r2_score(y_train, yhat_train)
|
||||
mse = mean_squared_error(y_train, yhat_train)
|
||||
rmse = np.sqrt(mse)
|
||||
mae = mean_absolute_error(y_train, yhat_train)
|
||||
|
||||
mlflow.log_metric("train r2", r2)
|
||||
mlflow.log_metric("train mse", mse)
|
||||
mlflow.log_metric("train rmse", rmse)
|
||||
mlflow.log_metric("train mae", mae)
|
||||
|
||||
# Visualize results
|
||||
plt.scatter(y_train, yhat_train, color='black')
|
||||
plt.plot(y_train, y_train, color='blue', linewidth=3)
|
||||
plt.xlabel("Real value")
|
||||
plt.ylabel("Predicted value")
|
||||
plt.savefig("regression_results.png")
|
||||
mlflow.log_artifact("regression_results.png")
|
||||
|
||||
# Save the model
|
||||
pickle.dump(model, open((Path(args.model_output) / "model.pkl"), "wb"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,23 +1,21 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
experiment_name: taxi-fare-training
|
||||
experiment_name: taxi-fare-training-gha
|
||||
description: Training Pipeline to train a model that predicts taxi fare price
|
||||
|
||||
# <inputs_and_outputs>
|
||||
inputs:
|
||||
input: #using local data, will create an anonymous data asset
|
||||
input: #using local data, will crate an anonymous data asset
|
||||
type: uri_folder
|
||||
path: ../../../data/
|
||||
enable_monitoring: "true"
|
||||
table_name: 'taximonitoring'
|
||||
enable_monitoring: 'false'
|
||||
|
||||
outputs:
|
||||
train_data:
|
||||
val_data:
|
||||
test_data:
|
||||
prepared_data:
|
||||
trained_model:
|
||||
evaluation_output:
|
||||
model_info_output_path:
|
||||
predictions:
|
||||
score_report:
|
||||
deploy_flag:
|
||||
# </inputs_and_outputs>
|
||||
|
||||
# <jobs>
|
||||
|
@ -30,137 +28,68 @@ jobs:
|
|||
prep_data:
|
||||
name: prep_data
|
||||
display_name: prep-data
|
||||
code: ../../../data-science/src/prep
|
||||
code: ../../../data-science/src
|
||||
command: >-
|
||||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}
|
||||
--val_data ${{outputs.val_data}}
|
||||
--test_data ${{outputs.test_data}}
|
||||
--prepared_data ${{outputs.prepared_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
--table_name ${{inputs.table_name}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
raw_data: ${{parent.inputs.input}}
|
||||
enable_monitoring: ${{parent.inputs.enable_monitoring}}
|
||||
table_name: ${{parent.inputs.table_name}}
|
||||
outputs:
|
||||
train_data: ${{parent.outputs.train_data}}
|
||||
val_data: ${{parent.outputs.val_data}}
|
||||
test_data: ${{parent.outputs.test_data}}
|
||||
prepared_data: ${{parent.outputs.prepared_data}}
|
||||
|
||||
train_model:
|
||||
name: train_model
|
||||
display_name: train-model
|
||||
code: ../../../data-science/src/train
|
||||
code: ../../../data-science/src
|
||||
command: >-
|
||||
python train.py
|
||||
--train_data ${{inputs.train_data}}
|
||||
--prepared_data ${{inputs.prepared_data}}
|
||||
--model_output ${{outputs.model_output}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
prepared_data: ${{parent.jobs.prep_data.outputs.prepared_data}}
|
||||
outputs:
|
||||
model_output: ${{parent.outputs.trained_model}}
|
||||
|
||||
evaluate_model:
|
||||
name: evaluate_model
|
||||
display_name: evaluate-model
|
||||
code: ../../../data-science/src/evaluate
|
||||
code: ../../../data-science/src
|
||||
command: >-
|
||||
python evaluate.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_input ${{inputs.model_input}}
|
||||
--test_data ${{inputs.test_data}}
|
||||
--evaluation_output ${{outputs.evaluation_output}}
|
||||
--prepared_data ${{inputs.prepared_data}}
|
||||
--predictions ${{outputs.predictions}}
|
||||
--score_report ${{outputs.score_report}}
|
||||
--deploy_flag ${{outputs.deploy_flag}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_name: "taxi-model-gha"
|
||||
model_input: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
test_data: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
prepared_data: ${{parent.jobs.prep_data.outputs.prepared_data}}
|
||||
outputs:
|
||||
evaluation_output: ${{parent.outputs.evaluation_output}}
|
||||
predictions: ${{parent.outputs.predictions}}
|
||||
score_report: ${{parent.outputs.score_report}}
|
||||
deploy_flag: ${{parent.outputs.deploy_flag}}
|
||||
|
||||
register_model:
|
||||
name: register_model
|
||||
display_name: register-model
|
||||
code: ../../../data-science/src/register
|
||||
code: ../../../data-science/src
|
||||
command: >-
|
||||
python register.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_path ${{inputs.model_path}}
|
||||
--evaluation_output ${{inputs.evaluation_output}}
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||
--deploy_flag ${{inputs.deploy_flag}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_name: "taxi-model-gha"
|
||||
model_path: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
|
||||
outputs:
|
||||
model_info_output_path: ${{parent.outputs.model_info_output_path}}
|
||||
deploy_flag: ${{parent.jobs.evaluate_model.outputs.deploy_flag}}
|
||||
|
||||
create_rai_job:
|
||||
type: command
|
||||
component: azureml:rai_insights_constructor@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
title: Responsible AI for Taxi Fare Prediction
|
||||
task_type: regression
|
||||
model_info_path: ${{parent.jobs.register_model.outputs.model_info_output_path}}
|
||||
train_dataset: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
test_dataset: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
target_column_name: "cost"
|
||||
categorical_column_names: '["store_forward", "vendor"]'
|
||||
|
||||
explain_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_explanation@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
comment: Some random string
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
|
||||
causal_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_causal@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
treatment_features: '["distance", "passengers"]'
|
||||
heterogeneity_features: '["store_forward", "vendor"]'
|
||||
|
||||
counterfactual_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_counterfactual@latest
|
||||
limits:
|
||||
timeout: 600
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
total_CFs: 10
|
||||
desired_range: '[16, 30]'
|
||||
feature_importance: True
|
||||
|
||||
error_analysis_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_erroranalysis@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
filter_features: '["distance", "passengers"]'
|
||||
|
||||
gather_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_gather@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
constructor: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
insight_1: ${{parent.jobs.causal_01.outputs.causal}}
|
||||
insight_2: ${{parent.jobs.counterfactual_01.outputs.counterfactual}}
|
||||
insight_3: ${{parent.jobs.error_analysis_01.outputs.error_analysis}}
|
||||
insight_4: ${{parent.jobs.explain_01.outputs.explanation}}
|
||||
# </jobs>
|
||||
# </jobs>
|
||||
|
|
|
@ -11,7 +11,7 @@ variables:
|
|||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-batch-$(namespace)$(postfix)$(environment)
|
||||
value: taxi2-batch-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: batch
|
||||
|
||||
|
@ -28,7 +28,7 @@ resources:
|
|||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
ref: main-jul31
|
||||
|
||||
|
||||
stages:
|
||||
|
|
|
@ -37,13 +37,11 @@ stages:
|
|||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/tests/unit-tests.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
|
@ -53,16 +51,9 @@ stages:
|
|||
environment_name: taxi-train-env
|
||||
environment_file: mlops/azureml/train/train-env.yml
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- checkout: rai-vnext-preview
|
||||
path: s/
|
||||
- template: register-rai-components.yml
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
|
||||
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
enable_monitoring: $(enable_monitoring)
|
|
@ -7,11 +7,11 @@ variables:
|
|||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- template: ../../../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-online-$(namespace)$(postfix)$(environment)
|
||||
value: taxi2-online-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: online
|
||||
|
||||
|
@ -29,7 +29,7 @@ resources:
|
|||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
ref: main-jul31
|
||||
|
||||
stages:
|
||||
- stage: CreateOnlineEndpoint
|
||||
|
|
|
@ -26,7 +26,7 @@ jobs:
|
|||
resource_group: ${{ needs.get-config.outputs.resource_group }}
|
||||
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
|
||||
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
|
||||
endpoint_name: ${{ format('taxi-batch-{0}', needs.get-config.outputs.bep) }}
|
||||
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
|
||||
endpoint_type: batch
|
||||
secrets:
|
||||
creds: ${{secrets.AZURE_CREDENTIALS}}
|
||||
|
@ -37,7 +37,7 @@ jobs:
|
|||
resource_group: ${{ needs.get-config.outputs.resource_group }}
|
||||
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
|
||||
endpoint_file: mlops/azureml/deploy/batch/batch-deployment.yml
|
||||
endpoint_name: ${{ format('taxi-batch-{0}', needs.get-config.outputs.bep) }}
|
||||
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.bep) }}
|
||||
endpoint_type: batch
|
||||
deployment_name: eptestdeploy
|
||||
secrets:
|
||||
|
|
|
@ -14,7 +14,7 @@ jobs:
|
|||
resource_group: ${{ needs.get-config.outputs.resource_group }}
|
||||
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
|
||||
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
|
||||
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
|
||||
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
|
||||
endpoint_type: online
|
||||
secrets:
|
||||
creds: ${{secrets.AZURE_CREDENTIALS}}
|
||||
|
@ -25,7 +25,7 @@ jobs:
|
|||
resource_group: ${{ needs.get-config.outputs.resource_group }}
|
||||
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
|
||||
endpoint_file: mlops/azureml/deploy/online/online-deployment.yml
|
||||
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
|
||||
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
|
||||
endpoint_type: online
|
||||
deployment_name: taxi-online-dp
|
||||
secrets:
|
||||
|
@ -37,6 +37,6 @@ jobs:
|
|||
resource_group: ${{ needs.get-config.outputs.resource_group }}
|
||||
workspace_name: ${{ needs.get-config.outputs.aml_workspace }}
|
||||
traffic_allocation: taxi-online-dp=100
|
||||
endpoint_name: ${{ format('taxi-online-{0}', needs.get-config.outputs.oep) }}
|
||||
endpoint_name: ${{ format('taxi-gha-{0}', needs.get-config.outputs.oep) }}
|
||||
secrets:
|
||||
creds: ${{secrets.AZURE_CREDENTIALS}}
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
channels:
|
||||
- defaults
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.7.5
|
||||
- pip
|
||||
- pip:
|
||||
- azureml-mlflow==1.38.0
|
||||
- azureml-sdk==1.38.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
|
@ -0,0 +1,16 @@
|
|||
channels:
|
||||
- defaults
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.7.5
|
||||
- pip
|
||||
- pip:
|
||||
- azureml-mlflow==1.38.0
|
||||
- azureml-sdk==1.38.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,3 @@
|
|||
{"input_data": [[2.86,40.66551971,-73.98258972,1,40.69801331,-73.97357178,0,2,1,1,19,21,3,56,1,1,19,21,21,57],
|
||||
[3.98,40.68072128,-73.931633,1,40.6909523,-73.99185181,0,2,0,1,4,21,44,11,0,1,4,21,59,35]]
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/batchDeployment.schema.json
|
||||
name: batch-dp
|
||||
endpoint_name: taxi-fare-batch
|
||||
model: azureml:taxi-model@latest
|
||||
compute: azureml:batch-cluster
|
||||
resources:
|
||||
instance_count: 1
|
||||
max_concurrency_per_instance: 2
|
||||
mini_batch_size: 10
|
||||
output_action: append_row
|
||||
output_file_name: predictions.csv
|
||||
retry_settings:
|
||||
max_retries: 3
|
||||
timeout: 30
|
||||
error_threshold: -1
|
||||
logging_level: info
|
|
@ -0,0 +1,4 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/batchEndpoint.schema.json
|
||||
name: taxi-fare-batch
|
||||
description: taxi cost batch endpoint
|
||||
auth_mode: aad_token
|
|
@ -0,0 +1,6 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
|
||||
name: blue
|
||||
endpoint_name: taxi-fare-online
|
||||
model: azureml:taxi-model@latest
|
||||
instance_type: Standard_DS2_v2
|
||||
instance_count: 1
|
|
@ -0,0 +1,4 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineEndpoint.schema.json
|
||||
name: taxi-fare-online
|
||||
description: taxi cost online endpoint
|
||||
auth_mode: key
|
|
@ -0,0 +1,166 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
experiment_name: taxi-fare-training
|
||||
description: Training Pipeline to train a model that predicts taxi fare price
|
||||
|
||||
# <inputs_and_outputs>
|
||||
inputs:
|
||||
input: #using local data, will create an anonymous data asset
|
||||
type: uri_folder
|
||||
path: ../../../data/
|
||||
enable_monitoring: "true"
|
||||
table_name: 'taximonitoring'
|
||||
|
||||
outputs:
|
||||
train_data:
|
||||
val_data:
|
||||
test_data:
|
||||
trained_model:
|
||||
evaluation_output:
|
||||
model_info_output_path:
|
||||
# </inputs_and_outputs>
|
||||
|
||||
# <jobs>
|
||||
settings:
|
||||
default_datastore: azureml:workspaceblobstore
|
||||
default_compute: azureml:cpu-cluster
|
||||
continue_on_step_failure: false
|
||||
|
||||
jobs:
|
||||
prep_data:
|
||||
name: prep_data
|
||||
display_name: prep-data
|
||||
code: ../../../data-science/src/prep
|
||||
command: >-
|
||||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}
|
||||
--val_data ${{outputs.val_data}}
|
||||
--test_data ${{outputs.test_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
--table_name ${{inputs.table_name}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
raw_data: ${{parent.inputs.input}}
|
||||
enable_monitoring: ${{parent.inputs.enable_monitoring}}
|
||||
table_name: ${{parent.inputs.table_name}}
|
||||
outputs:
|
||||
train_data: ${{parent.outputs.train_data}}
|
||||
val_data: ${{parent.outputs.val_data}}
|
||||
test_data: ${{parent.outputs.test_data}}
|
||||
|
||||
train_model:
|
||||
name: train_model
|
||||
display_name: train-model
|
||||
code: ../../../data-science/src/train
|
||||
command: >-
|
||||
python train.py
|
||||
--train_data ${{inputs.train_data}}
|
||||
--model_output ${{outputs.model_output}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
train_data: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
outputs:
|
||||
model_output: ${{parent.outputs.trained_model}}
|
||||
|
||||
evaluate_model:
|
||||
name: evaluate_model
|
||||
display_name: evaluate-model
|
||||
code: ../../../data-science/src/evaluate
|
||||
command: >-
|
||||
python evaluate.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_input ${{inputs.model_input}}
|
||||
--test_data ${{inputs.test_data}}
|
||||
--evaluation_output ${{outputs.evaluation_output}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_input: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
test_data: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
outputs:
|
||||
evaluation_output: ${{parent.outputs.evaluation_output}}
|
||||
|
||||
register_model:
|
||||
name: register_model
|
||||
display_name: register-model
|
||||
code: ../../../data-science/src/register
|
||||
command: >-
|
||||
python register.py
|
||||
--model_name ${{inputs.model_name}}
|
||||
--model_path ${{inputs.model_path}}
|
||||
--evaluation_output ${{inputs.evaluation_output}}
|
||||
--model_info_output_path ${{outputs.model_info_output_path}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
model_name: "taxi-model"
|
||||
model_path: ${{parent.jobs.train_model.outputs.model_output}}
|
||||
evaluation_output: ${{parent.jobs.evaluate_model.outputs.evaluation_output}}
|
||||
outputs:
|
||||
model_info_output_path: ${{parent.outputs.model_info_output_path}}
|
||||
|
||||
create_rai_job:
|
||||
type: command
|
||||
component: azureml:rai_insights_constructor@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
title: Responsible AI for Taxi Fare Prediction
|
||||
task_type: regression
|
||||
model_info_path: ${{parent.jobs.register_model.outputs.model_info_output_path}}
|
||||
train_dataset: ${{parent.jobs.prep_data.outputs.train_data}}
|
||||
test_dataset: ${{parent.jobs.prep_data.outputs.test_data}}
|
||||
target_column_name: "cost"
|
||||
categorical_column_names: '["store_forward", "vendor"]'
|
||||
|
||||
explain_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_explanation@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
comment: Some random string
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
|
||||
causal_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_causal@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
treatment_features: '["distance", "passengers"]'
|
||||
heterogeneity_features: '["store_forward", "vendor"]'
|
||||
|
||||
counterfactual_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_counterfactual@latest
|
||||
limits:
|
||||
timeout: 600
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
total_CFs: 10
|
||||
desired_range: '[16, 30]'
|
||||
feature_importance: True
|
||||
|
||||
error_analysis_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_erroranalysis@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
rai_insights_dashboard: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
filter_features: '["distance", "passengers"]'
|
||||
|
||||
gather_01:
|
||||
type: command
|
||||
component: azureml:rai_insights_gather@latest
|
||||
limits:
|
||||
timeout: 120
|
||||
inputs:
|
||||
constructor: ${{parent.jobs.create_rai_job.outputs.rai_insights_dashboard}}
|
||||
insight_1: ${{parent.jobs.causal_01.outputs.causal}}
|
||||
insight_2: ${{parent.jobs.counterfactual_01.outputs.counterfactual}}
|
||||
insight_3: ${{parent.jobs.error_analysis_01.outputs.error_analysis}}
|
||||
insight_4: ${{parent.jobs.explain_01.outputs.explanation}}
|
||||
# </jobs>
|
|
@ -0,0 +1,5 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
||||
name: taxi-train-env
|
||||
image: mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04
|
||||
conda_file: ../../../data-science/environment/train-conda.yml
|
||||
description: Environment created from a Docker image plus Conda environment to train taxi model.
|
|
@ -0,0 +1,66 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-batch-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: batch
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
|
||||
stages:
|
||||
- stage: CreateBatchEndpoint
|
||||
displayName: Create/Update Batch Endpoint
|
||||
jobs:
|
||||
- job: DeployBatchEndpoint
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: batch-cluster # name must match cluster name in deployment file below
|
||||
size: STANDARD_DS3_V2
|
||||
min_instances: 0
|
||||
max_instances: 5
|
||||
cluster_tier: dedicated
|
||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
||||
parameters:
|
||||
endpoint_file: mlops/azureml/deploy/batch/batch-endpoint.yml
|
||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-batch-dp
|
||||
deployment_file: mlops/azureml/deploy/batch/batch-deployment.yml
|
||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-batch-dp
|
||||
sample_request: data/taxi-batch.csv
|
||||
request_type: uri_file #either uri_folder or uri_file
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
- repository: rai-vnext-preview # Template Repo
|
||||
name: Azure/rai-vnext-preview # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
stages:
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
timeoutInMinutes: 120 # how long to run the job before automatically cancelling
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/tests/unit-tests.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
||||
parameters:
|
||||
build_type: conda
|
||||
environment_name: taxi-train-env
|
||||
environment_file: mlops/azureml/train/train-env.yml
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- checkout: rai-vnext-preview
|
||||
path: s/
|
||||
- template: register-rai-components.yml
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
experiment_name: $(environment)_taxi_fare_train_$(Build.SourceBranchName)
|
||||
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
|
||||
enable_monitoring: $(enable_monitoring)
|
|
@ -0,0 +1,61 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
- name: endpoint_name
|
||||
value: taxi-online-$(namespace)$(postfix)$(environment)
|
||||
- name: endpoint_type
|
||||
value: online
|
||||
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main
|
||||
|
||||
stages:
|
||||
- stage: CreateOnlineEndpoint
|
||||
displayName: Create/Update Online Endpoint
|
||||
jobs:
|
||||
- job: DeployOnlineEndpoint
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/install-az-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/create-endpoint.yml@mlops-templates
|
||||
parameters:
|
||||
endpoint_file: mlops/azureml/deploy/online/online-endpoint.yml
|
||||
- template: templates/${{ variables.version }}/create-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-online-dp
|
||||
deployment_file: mlops/azureml/deploy/online/online-deployment.yml
|
||||
- template: templates/${{ variables.version }}/allocate-traffic.yml@mlops-templates
|
||||
parameters:
|
||||
traffic_allocation: taxi-online-dp=100
|
||||
- template: templates/${{ variables.version }}/test-deployment.yml@mlops-templates
|
||||
parameters:
|
||||
deployment_name: taxi-online-dp
|
||||
sample_request: data/taxi-request.json
|
||||
request_type: json
|
Загрузка…
Ссылка в новой задаче