Коммит
2177fdb2b8
|
@ -0,0 +1,19 @@
|
|||
channels:
|
||||
- defaults
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.7.5
|
||||
- pip
|
||||
- pip:
|
||||
- azureml-mlflow==1.38.0
|
||||
- azureml-sdk==1.38.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.2.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- fairlearn==0.7.0
|
||||
- azureml-contrib-fairness==1.38.0
|
||||
- interpret-community==0.24.1
|
||||
- interpret-core==0.2.7
|
||||
- azureml-interpret==1.38.0
|
|
@ -13,11 +13,17 @@ def parse_args():
|
|||
parser = argparse.ArgumentParser("prep")
|
||||
parser.add_argument("--raw_data", type=str, help="Path to raw data")
|
||||
parser.add_argument("--prepared_data", type=str, help="Path of prepared data")
|
||||
|
||||
parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
|
||||
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
def log_training_data(df, table_name):
|
||||
from obs.collector import Online_Collector
|
||||
collector = Online_Collector(table_name)
|
||||
collector.batch_collect(df)
|
||||
|
||||
def main():
|
||||
|
||||
# ---------- Parse Arguments ----------- #
|
||||
|
@ -65,5 +71,8 @@ def main():
|
|||
val.to_csv((Path(args.prepared_data) / "val.csv"))
|
||||
test.to_csv((Path(args.prepared_data) / "test.csv"))
|
||||
|
||||
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
|
||||
log_training_data(data, args.table_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -8,6 +8,7 @@ inputs:
|
|||
input: #using local data, will crate an anonymous data asset
|
||||
type: uri_folder
|
||||
path: ../../../data/
|
||||
enable_monitoring: 'false'
|
||||
|
||||
outputs:
|
||||
prepared_data:
|
||||
|
@ -32,9 +33,11 @@ jobs:
|
|||
python prep.py
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--prepared_data ${{outputs.prepared_data}}
|
||||
--enable_monitoring ${{inputs.enable_monitoring}}
|
||||
environment: azureml:taxi-train-env@latest
|
||||
inputs:
|
||||
raw_data: ${{parent.inputs.input}}
|
||||
enable_monitoring: ${{parent.inputs.enable_monitoring}}
|
||||
outputs:
|
||||
prepared_data: ${{parent.outputs.prepared_data}}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ variables:
|
|||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: aml-cli-v2
|
||||
value: aml-cli-v2
|
||||
|
||||
|
||||
trigger:
|
||||
|
@ -45,6 +45,7 @@ stages:
|
|||
build_type: conda
|
||||
environment_name: taxi-train-env
|
||||
environment_file: mlops/azureml/train/train-env.yml
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
|
|
|
@ -80,3 +80,7 @@ variables:
|
|||
batch_process_count_per_node: 1
|
||||
batch_node_count: 1
|
||||
|
||||
# Monitoring settings
|
||||
scoring_table_name: scoringdata
|
||||
training_table_name: mlmonitoring
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
name: batch-monitoring
|
||||
channels:
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.8.1
|
||||
- pip:
|
||||
- azureml-defaults==1.41.0
|
||||
- azureml-mlflow==1.41.0
|
||||
- azureml-sdk==1.41.0
|
||||
- azureml-interpret==1.41.0
|
||||
- scikit-learn==0.24.1
|
||||
- pandas==1.4.1
|
||||
- joblib==1.0.0
|
||||
- matplotlib==3.3.3
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
|
@ -0,0 +1,26 @@
|
|||
name: train
|
||||
channels:
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.8.1
|
||||
- pip:
|
||||
- azureml-core==1.41.0
|
||||
- azureml-mlflow==1.41.0
|
||||
- azure-identity==1.9.0
|
||||
- azure-mgmt-kusto==2.2.0
|
||||
- azure-kusto-data==3.1.2
|
||||
- azure-kusto-ingest==3.1.2
|
||||
- jupyter-dash==0.4.2
|
||||
- dash==2.3.1
|
||||
- plotly==5.7.0
|
||||
- azureml-defaults==1.41.0
|
||||
- pandas==1.4.1
|
||||
- scikit-learn==0.24.1
|
||||
- fairlearn==0.7.0
|
||||
- azureml-contrib-fairness==1.41.0
|
||||
- interpret-core==0.2.7
|
||||
- azureml-interpret==1.41.0
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
|
||||
- git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
|
||||
- matplotlib==3.3.3
|
|
@ -22,8 +22,16 @@ def parse_args():
|
|||
parser = argparse.ArgumentParser(description="UCI Credit example")
|
||||
parser.add_argument("--uci-credit", type=str, default='data/', help="Directory path to training data")
|
||||
parser.add_argument("--prepared_data_path", type=str, default='prepared_data/', help="prepared data directory")
|
||||
parser.add_argument("--enabling_monitoring", type=str, default="false", help="enable logging to ADX")
|
||||
parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
|
||||
return parser.parse_args()
|
||||
|
||||
def log_training_data(df, table_name):
|
||||
from obs.collector import Online_Collector
|
||||
collector = Online_Collector(table_name)
|
||||
collector.batch_collect(df)
|
||||
|
||||
|
||||
def main():
|
||||
# Parse command-line arguments
|
||||
args = parse_args()
|
||||
|
@ -64,6 +72,9 @@ def main():
|
|||
train.to_csv(TRAIN_PATH, index=False)
|
||||
val.to_csv(VAL_PATH, index=False)
|
||||
test.to_csv(TEST_PATH, index=False)
|
||||
|
||||
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
|
||||
log_training_data(df, args.table_name)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -13,13 +13,16 @@ from azureml.core.model import Model
|
|||
|
||||
model = None
|
||||
explainer = None
|
||||
collector = None
|
||||
|
||||
def init():
|
||||
global model, explainer
|
||||
global model, explainer, collector
|
||||
print("Started batch scoring by running init()")
|
||||
|
||||
parser = argparse.ArgumentParser('batch_scoring')
|
||||
parser.add_argument('--model_name', type=str, help='Model to use for batch scoring')
|
||||
parser.add_argument('--enable_monitoring', type=str, help="Enable Monitoring", default="false")
|
||||
parser.add_argument('--table_name', type=str, help="Table Name for logging data")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
model_path = Model.get_model_path(args.model_name)
|
||||
|
@ -30,11 +33,15 @@ def init():
|
|||
explainer_path = os.path.join(Model.get_model_path(args.model_name), "explainer")
|
||||
#explainer = joblib.load(explainer_path)
|
||||
|
||||
if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
|
||||
from obs.collector import Online_Collector
|
||||
collector = Online_Collector(args.table_name)
|
||||
|
||||
def run(file_list):
|
||||
|
||||
print(f"Files to process: {file_list}")
|
||||
results = pd.DataFrame(columns=["Sno", "ProbaGoodCredit", "ProbaBadCredit", "FeatureImportance"])
|
||||
|
||||
all_results = []
|
||||
for filename in file_list:
|
||||
|
||||
df = pd.read_csv(filename)
|
||||
|
@ -57,5 +64,11 @@ def run(file_list):
|
|||
#result = pd.concat([sno, proba, explanation], axis=1)
|
||||
result = pd.concat([sno, proba], axis=1)
|
||||
results = results.append(result)
|
||||
all_results.append(pd.concat([df, proba], axis=1))
|
||||
print(f"Batch scored: {filename}")
|
||||
|
||||
if collector:
|
||||
full_results = pd.concat(all_results)
|
||||
collector.batch_collect(full_results)
|
||||
|
||||
return results
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- template: ../../config-aml.yml
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: python-sdk
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: $(ap_vm_image)
|
||||
|
||||
stages:
|
||||
- stage: DeployDriftJob
|
||||
displayName: Deploy Drift Job
|
||||
jobs:
|
||||
- job: DeployDriftJob
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/${{ variables.version }}/deploy-drift-detection.yml@mlops-templates
|
|
@ -44,9 +44,12 @@ stages:
|
|||
environment_name: $(batch_env_name)
|
||||
build_type: 'conda'
|
||||
environment_file: $(batch_env_conda_yaml)
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
|
||||
parameters:
|
||||
data_type: scoring
|
||||
- template: templates/${{ variables.version }}/deploy-batch-scoring-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
|
|
|
@ -44,6 +44,7 @@ stages:
|
|||
environment_name: $(training_env_name)
|
||||
build_type: 'conda'
|
||||
environment_file: $(training_env_conda_yaml)
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
|
||||
parameters:
|
||||
data_type: training
|
||||
|
@ -51,5 +52,7 @@ stages:
|
|||
parameters:
|
||||
compute_type: training
|
||||
- template: templates/${{ variables.version }}/deploy-training-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
|
|
|
@ -7,8 +7,9 @@ variables:
|
|||
ap_vm_image: ubuntu-20.04
|
||||
|
||||
namespace: mlopsv2
|
||||
postfix: 0688
|
||||
location: northeurope
|
||||
postfix: 0659
|
||||
location: westus
|
||||
|
||||
environment: dev
|
||||
enable_aml_computecluster: true
|
||||
enable_aml_secure_workspace: true
|
||||
|
|
|
@ -131,4 +131,4 @@ module "data_explorer" {
|
|||
client_secret = var.client_secret
|
||||
|
||||
tags = local.tags
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,4 +8,4 @@ output "uri" {
|
|||
|
||||
output "name" {
|
||||
value = azurerm_kusto_database.database[0].name
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,4 +66,4 @@ stages :
|
|||
- template: templates/infra/run-terraform-apply.yml@mlops-templates
|
||||
parameters:
|
||||
jumphost_username: ${{parameters.jumphost_username}}
|
||||
jumphost_password: ${{parameters.jumphost_password}}
|
||||
jumphost_password: ${{parameters.jumphost_password}}
|
||||
|
|
|
@ -44,4 +44,4 @@ variable "enable_monitoring" {
|
|||
|
||||
variable "client_secret" {
|
||||
description = "Service Principal Secret"
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче