Merge pull request #61 from Azure/monitoring

Monitoring
This commit is contained in:
Cindy Weng 2022-07-29 13:12:35 +01:00 коммит произвёл GitHub
Родитель 88735db28f 1fb64e2538
Коммит 2177fdb2b8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
19 изменённых файлов: 150 добавлений и 10 удалений

Просмотреть файл

@ -0,0 +1,19 @@
channels:
- defaults
- anaconda
- conda-forge
dependencies:
- python=3.7.5
- pip
- pip:
- azureml-mlflow==1.38.0
- azureml-sdk==1.38.0
- scikit-learn==0.24.1
- pandas==1.2.1
- joblib==1.0.0
- matplotlib==3.3.3
- fairlearn==0.7.0
- azureml-contrib-fairness==1.38.0
- interpret-community==0.24.1
- interpret-core==0.2.7
- azureml-interpret==1.38.0

Просмотреть файл

@ -13,11 +13,17 @@ def parse_args():
parser = argparse.ArgumentParser("prep")
parser.add_argument("--raw_data", type=str, help="Path to raw data")
parser.add_argument("--prepared_data", type=str, help="Path of prepared data")
parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
args = parser.parse_args()
return args
def log_training_data(df, table_name):
from obs.collector import Online_Collector
collector = Online_Collector(table_name)
collector.batch_collect(df)
def main():
# ---------- Parse Arguments ----------- #
@ -65,5 +71,8 @@ def main():
val.to_csv((Path(args.prepared_data) / "val.csv"))
test.to_csv((Path(args.prepared_data) / "test.csv"))
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
log_training_data(data, args.table_name)
if __name__ == "__main__":
main()

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -8,6 +8,7 @@ inputs:
input: #using local data, will crate an anonymous data asset
type: uri_folder
path: ../../../data/
enable_monitoring: 'false'
outputs:
prepared_data:
@ -32,9 +33,11 @@ jobs:
python prep.py
--raw_data ${{inputs.raw_data}}
--prepared_data ${{outputs.prepared_data}}
--enable_monitoring ${{inputs.enable_monitoring}}
environment: azureml:taxi-train-env@latest
inputs:
raw_data: ${{parent.inputs.input}}
enable_monitoring: ${{parent.inputs.enable_monitoring}}
outputs:
prepared_data: ${{parent.outputs.prepared_data}}

Просмотреть файл

@ -9,7 +9,7 @@ variables:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: aml-cli-v2
value: aml-cli-v2
trigger:
@ -45,6 +45,7 @@ stages:
build_type: conda
environment_name: taxi-train-env
environment_file: mlops/azureml/train/train-env.yml
enable_monitoring: $(enable_monitoring)
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters:
pipeline_file: mlops/azureml/train/pipeline.yml

Просмотреть файл

@ -80,3 +80,7 @@ variables:
batch_process_count_per_node: 1
batch_node_count: 1
# Monitoring settings
scoring_table_name: scoringdata
training_table_name: mlmonitoring

Просмотреть файл

@ -0,0 +1,16 @@
name: batch-monitoring
channels:
- anaconda
- conda-forge
dependencies:
- python=3.8.1
- pip:
- azureml-defaults==1.41.0
- azureml-mlflow==1.41.0
- azureml-sdk==1.41.0
- azureml-interpret==1.41.0
- scikit-learn==0.24.1
- pandas==1.4.1
- joblib==1.0.0
- matplotlib==3.3.3
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector

Просмотреть файл

@ -0,0 +1,26 @@
name: train
channels:
- anaconda
- conda-forge
dependencies:
- python=3.8.1
- pip:
- azureml-core==1.41.0
- azureml-mlflow==1.41.0
- azure-identity==1.9.0
- azure-mgmt-kusto==2.2.0
- azure-kusto-data==3.1.2
- azure-kusto-ingest==3.1.2
- jupyter-dash==0.4.2
- dash==2.3.1
- plotly==5.7.0
- azureml-defaults==1.41.0
- pandas==1.4.1
- scikit-learn==0.24.1
- fairlearn==0.7.0
- azureml-contrib-fairness==1.41.0
- interpret-core==0.2.7
- azureml-interpret==1.41.0
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
- matplotlib==3.3.3

Просмотреть файл

@ -22,8 +22,16 @@ def parse_args():
parser = argparse.ArgumentParser(description="UCI Credit example")
parser.add_argument("--uci-credit", type=str, default='data/', help="Directory path to training data")
parser.add_argument("--prepared_data_path", type=str, default='prepared_data/', help="prepared data directory")
parser.add_argument("--enabling_monitoring", type=str, default="false", help="enable logging to ADX")
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
return parser.parse_args()
def log_training_data(df, table_name):
from obs.collector import Online_Collector
collector = Online_Collector(table_name)
collector.batch_collect(df)
def main():
# Parse command-line arguments
args = parse_args()
@ -64,6 +72,9 @@ def main():
train.to_csv(TRAIN_PATH, index=False)
val.to_csv(VAL_PATH, index=False)
test.to_csv(TEST_PATH, index=False)
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
log_training_data(df, args.table_name)
if __name__ == '__main__':
main()

Просмотреть файл

@ -13,13 +13,16 @@ from azureml.core.model import Model
model = None
explainer = None
collector = None
def init():
global model, explainer
global model, explainer, collector
print("Started batch scoring by running init()")
parser = argparse.ArgumentParser('batch_scoring')
parser.add_argument('--model_name', type=str, help='Model to use for batch scoring')
parser.add_argument('--enable_monitoring', type=str, help="Enable Monitoring", default="false")
parser.add_argument('--table_name', type=str, help="Table Name for logging data")
args, _ = parser.parse_known_args()
model_path = Model.get_model_path(args.model_name)
@ -30,11 +33,15 @@ def init():
explainer_path = os.path.join(Model.get_model_path(args.model_name), "explainer")
#explainer = joblib.load(explainer_path)
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
from obs.collector import Online_Collector
collector = Online_Collector(args.table_name)
def run(file_list):
print(f"Files to process: {file_list}")
results = pd.DataFrame(columns=["Sno", "ProbaGoodCredit", "ProbaBadCredit", "FeatureImportance"])
all_results = []
for filename in file_list:
df = pd.read_csv(filename)
@ -57,5 +64,11 @@ def run(file_list):
#result = pd.concat([sno, proba, explanation], axis=1)
result = pd.concat([sno, proba], axis=1)
results = results.append(result)
all_results.append(pd.concat([df, proba], axis=1))
print(f"Batch scored: {filename}")
if collector:
full_results = pd.concat(all_results)
collector.batch_collect(full_results)
return results

Просмотреть файл

@ -0,0 +1,31 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
variables:
- template: ../../config-aml.yml
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
# 'main' branch: PRD environment
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: python-sdk
trigger:
- none
pool:
vmImage: $(ap_vm_image)
stages:
- stage: DeployDriftJob
displayName: Deploy Drift Job
jobs:
- job: DeployDriftJob
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/${{ variables.version }}/deploy-drift-detection.yml@mlops-templates

Просмотреть файл

@ -44,9 +44,12 @@ stages:
environment_name: $(batch_env_name)
build_type: 'conda'
environment_file: $(batch_env_conda_yaml)
enable_monitoring: $(enable_monitoring)
- template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
parameters:
data_type: scoring
- template: templates/${{ variables.version }}/deploy-batch-scoring-pipeline.yml@mlops-templates
parameters:
enable_monitoring: $(enable_monitoring)
- template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates

Просмотреть файл

@ -44,6 +44,7 @@ stages:
environment_name: $(training_env_name)
build_type: 'conda'
environment_file: $(training_env_conda_yaml)
enable_monitoring: $(enable_monitoring)
- template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
parameters:
data_type: training
@ -51,5 +52,7 @@ stages:
parameters:
compute_type: training
- template: templates/${{ variables.version }}/deploy-training-pipeline.yml@mlops-templates
parameters:
enable_monitoring: $(enable_monitoring)
- template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates

Просмотреть файл

@ -7,8 +7,9 @@ variables:
ap_vm_image: ubuntu-20.04
namespace: mlopsv2
postfix: 0688
location: northeurope
postfix: 0659
location: westus
environment: dev
enable_aml_computecluster: true
enable_aml_secure_workspace: true

Просмотреть файл

@ -131,4 +131,4 @@ module "data_explorer" {
client_secret = var.client_secret
tags = local.tags
}
}

Просмотреть файл

@ -8,4 +8,4 @@ output "uri" {
output "name" {
value = azurerm_kusto_database.database[0].name
}
}

Просмотреть файл

@ -66,4 +66,4 @@ stages :
- template: templates/infra/run-terraform-apply.yml@mlops-templates
parameters:
jumphost_username: ${{parameters.jumphost_username}}
jumphost_password: ${{parameters.jumphost_password}}
jumphost_password: ${{parameters.jumphost_password}}

Просмотреть файл

@ -44,4 +44,4 @@ variable "enable_monitoring" {
variable "client_secret" {
description = "Service Principal Secret"
}
}