Merge pull request #61 from Azure/monitoring

Monitoring
2022-07-29 13:12:35 +01:00 · 2022-07-29 13:12:35 +01:00 · 2177fdb2b8
--- a/classical/aml-cli-v2/data-science/environment/train-monitor-conda.yml
+++ b/classical/aml-cli-v2/data-science/environment/train-monitor-conda.yml
@ -0,0 +1,19 @@
+channels:
+  - defaults
+  - anaconda
+  - conda-forge
+dependencies:
+  - python=3.7.5
+  - pip
+  - pip:
+      - azureml-mlflow==1.38.0
+      - azureml-sdk==1.38.0
+      - scikit-learn==0.24.1
+      - pandas==1.2.1
+      - joblib==1.0.0
+      - matplotlib==3.3.3
+      - fairlearn==0.7.0
+      - azureml-contrib-fairness==1.38.0
+      - interpret-community==0.24.1
+      - interpret-core==0.2.7
+      - azureml-interpret==1.38.0
--- a/classical/aml-cli-v2/data-science/src/prep.py
+++ b/classical/aml-cli-v2/data-science/src/prep.py
@ -13,11 +13,17 @@ def parse_args():
    parser = argparse.ArgumentParser("prep")
    parser.add_argument("--raw_data", type=str, help="Path to raw data")
    parser.add_argument("--prepared_data", type=str, help="Path of prepared data")
-
+    parser.add_argument("--enable_monitoring", type=str, help="enable logging to ADX")
+    parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
    args = parser.parse_args()

    return args

+def log_training_data(df, table_name):
+    from obs.collector import Online_Collector
+    collector = Online_Collector(table_name)
+    collector.batch_collect(df)
+
 def main():

    # ---------- Parse Arguments ----------- #
@ -65,5 +71,8 @@ def main():
    val.to_csv((Path(args.prepared_data) / "val.csv"))
    test.to_csv((Path(args.prepared_data) / "test.csv"))

+    if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
+        log_training_data(data, args.table_name)
+
 if __name__ == "__main__":
    main()
--- a/classical/aml-cli-v2/mlops/azureml/deploy/batch/score.py
+++ b/classical/aml-cli-v2/mlops/azureml/deploy/batch/score.py
--- a/classical/aml-cli-v2/mlops/azureml/deploy/online/score.py
+++ b/classical/aml-cli-v2/mlops/azureml/deploy/online/score.py
--- a/classical/aml-cli-v2/mlops/azureml/train/pipeline.yml
+++ b/classical/aml-cli-v2/mlops/azureml/train/pipeline.yml
@ -8,6 +8,7 @@ inputs:
  input: #using local data, will crate an anonymous data asset
    type: uri_folder
    path: ../../../data/
+  enable_monitoring: 'false'

 outputs: 
  prepared_data:
@ -32,9 +33,11 @@ jobs:
      python prep.py 
      --raw_data ${{inputs.raw_data}} 
      --prepared_data ${{outputs.prepared_data}}
+      --enable_monitoring ${{inputs.enable_monitoring}}
    environment: azureml:taxi-train-env@latest
    inputs:
      raw_data: ${{parent.inputs.input}}
+      enable_monitoring: ${{parent.inputs.enable_monitoring}}
    outputs:
      prepared_data: ${{parent.outputs.prepared_data}}

--- a/classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
+++ b/classical/aml-cli-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
@ -9,7 +9,7 @@ variables:
    # 'develop' or feature branches: DEV environment
    - template: ../../config-infra-dev.yml
 - name: version
-  value: aml-cli-v2 
+  value: aml-cli-v2


 trigger:
@ -45,6 +45,7 @@ stages:
          build_type: conda
          environment_name: taxi-train-env
          environment_file: mlops/azureml/train/train-env.yml
+          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
        parameters: 
          pipeline_file: mlops/azureml/train/pipeline.yml
--- a/classical/python-sdk/config-aml.yml
+++ b/classical/python-sdk/config-aml.yml
@ -80,3 +80,7 @@ variables:
  batch_process_count_per_node: 1
  batch_node_count: 1

+  # Monitoring settings
+  scoring_table_name: scoringdata
+  training_table_name: mlmonitoring
+
--- a/classical/python-sdk/data-science/environment/batch_monitor.yml
+++ b/classical/python-sdk/data-science/environment/batch_monitor.yml
@ -0,0 +1,16 @@
+name: batch-monitoring
+channels:
+- anaconda
+- conda-forge
+dependencies:
+- python=3.8.1
+- pip:
+    - azureml-defaults==1.41.0
+    - azureml-mlflow==1.41.0
+    - azureml-sdk==1.41.0
+    - azureml-interpret==1.41.0
+    - scikit-learn==0.24.1
+    - pandas==1.4.1
+    - joblib==1.0.0
+    - matplotlib==3.3.3
+    - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
--- a/classical/python-sdk/data-science/environment/train_monitor.yml
+++ b/classical/python-sdk/data-science/environment/train_monitor.yml
@ -0,0 +1,26 @@
+name: train
+channels:
+- anaconda
+- conda-forge
+dependencies:
+- python=3.8.1
+- pip:
+    - azureml-core==1.41.0
+    - azureml-mlflow==1.41.0
+    - azure-identity==1.9.0
+    - azure-mgmt-kusto==2.2.0
+    - azure-kusto-data==3.1.2
+    - azure-kusto-ingest==3.1.2
+    - jupyter-dash==0.4.2
+    - dash==2.3.1
+    - plotly==5.7.0
+    - azureml-defaults==1.41.0
+    - pandas==1.4.1
+    - scikit-learn==0.24.1
+    - fairlearn==0.7.0
+    - azureml-contrib-fairness==1.41.0
+    - interpret-core==0.2.7
+    - azureml-interpret==1.41.0
+    - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-client
+    - git+https://github.com/microsoft/AzureML-Observability#subdirectory=aml-obs-collector
+    - matplotlib==3.3.3
--- a/classical/python-sdk/data-science/src/prep.py
+++ b/classical/python-sdk/data-science/src/prep.py
@ -22,8 +22,16 @@ def parse_args():
    parser = argparse.ArgumentParser(description="UCI Credit example")
    parser.add_argument("--uci-credit", type=str, default='data/', help="Directory path to training data")
    parser.add_argument("--prepared_data_path", type=str, default='prepared_data/', help="prepared data directory")
+    parser.add_argument("--enabling_monitoring", type=str, default="false", help="enable logging to ADX")
+    parser.add_argument("--table_name", type=str, default="mlmonitoring", help="Table name in ADX for logging")
    return parser.parse_args()

+def log_training_data(df, table_name):
+    from obs.collector import Online_Collector
+    collector = Online_Collector(table_name)
+    collector.batch_collect(df)
+
+
 def main():
    # Parse command-line arguments
    args = parse_args()
@ -64,6 +72,9 @@ def main():
    train.to_csv(TRAIN_PATH, index=False)
    val.to_csv(VAL_PATH, index=False)
    test.to_csv(TEST_PATH, index=False)
+
+    if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
+        log_training_data(df, args.table_name)
    
 if __name__ == '__main__':
    main()
--- a/classical/python-sdk/data-science/src/score.py
+++ b/classical/python-sdk/data-science/src/score.py
@ -13,13 +13,16 @@ from azureml.core.model import Model

 model = None
 explainer = None
+collector = None

 def init():
-    global model, explainer
+    global model, explainer, collector
    print("Started batch scoring by running init()")
    
    parser = argparse.ArgumentParser('batch_scoring')
    parser.add_argument('--model_name', type=str, help='Model to use for batch scoring')
+    parser.add_argument('--enable_monitoring', type=str, help="Enable Monitoring", default="false")
+    parser.add_argument('--table_name', type=str, help="Table Name for logging data")
    args, _ = parser.parse_known_args()
    
    model_path = Model.get_model_path(args.model_name)
@ -30,11 +33,15 @@ def init():
    explainer_path = os.path.join(Model.get_model_path(args.model_name), "explainer")
    #explainer = joblib.load(explainer_path)

+    if (args.enable_monitoring.lower == 'true' or args.enable_monitoring == '1' or args.enable_monitoring.lower == 'yes'):
+        from obs.collector import Online_Collector
+        collector = Online_Collector(args.table_name)
+        
 def run(file_list):
    
    print(f"Files to process: {file_list}")
    results = pd.DataFrame(columns=["Sno", "ProbaGoodCredit", "ProbaBadCredit", "FeatureImportance"])
-    
+    all_results = []
    for filename in file_list:
        
        df = pd.read_csv(filename)
@ -57,5 +64,11 @@ def run(file_list):
        #result = pd.concat([sno, proba, explanation], axis=1)
        result = pd.concat([sno, proba], axis=1)
        results = results.append(result)
+        all_results.append(pd.concat([df, proba], axis=1))
        print(f"Batch scored: {filename}")
+
+    if collector:
+        full_results = pd.concat(all_results)
+        collector.batch_collect(full_results)
+
    return results
--- a/classical/python-sdk/mlops/devops-pipelines/deploy-drift-detection.yml
+++ b/classical/python-sdk/mlops/devops-pipelines/deploy-drift-detection.yml
@ -0,0 +1,31 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+variables:
+- template: ../../config-aml.yml
+- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
+    # 'main' branch: PRD environment
+    - template: ../../config-infra-prod.yml
+- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:  
+    # 'develop' or feature branches: DEV environment
+    - template: ../../config-infra-dev.yml
+- name: version
+  value: python-sdk
+
+trigger:
+- none
+
+pool:
+  vmImage: $(ap_vm_image)
+
+stages:
+- stage: DeployDriftJob
+  displayName: Deploy Drift Job
+  jobs:
+    - job: DeployDriftJob
+      steps:
+      - checkout: self
+        path: s/
+      - checkout: mlops-templates
+        path: s/templates/
+      - template: templates/${{ variables.version }}/deploy-drift-detection.yml@mlops-templates
--- a/classical/python-sdk/mlops/devops-pipelines/deploy-model-batch-scoring.yml
+++ b/classical/python-sdk/mlops/devops-pipelines/deploy-model-batch-scoring.yml
@ -44,9 +44,12 @@ stages:
          environment_name: $(batch_env_name)
          build_type: 'conda'
          environment_file: $(batch_env_conda_yaml)
+          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
        parameters:
          data_type: scoring
      - template: templates/${{ variables.version }}/deploy-batch-scoring-pipeline.yml@mlops-templates
+        parameters:
+          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
      - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
--- a/classical/python-sdk/mlops/devops-pipelines/deploy-model-training-pipeline.yml
+++ b/classical/python-sdk/mlops/devops-pipelines/deploy-model-training-pipeline.yml
@ -44,6 +44,7 @@ stages:
          environment_name: $(training_env_name)
          build_type: 'conda'
          environment_file: $(training_env_conda_yaml)
+          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/register-dataset.yml@mlops-templates
        parameters:
          data_type: training
@ -51,5 +52,7 @@ stages:
        parameters:
          compute_type: training
      - template: templates/${{ variables.version }}/deploy-training-pipeline.yml@mlops-templates
+        parameters:
+          enable_monitoring: $(enable_monitoring)
      - template: templates/${{ variables.version }}/add-pipeline-to-endpoint.yml@mlops-templates
      - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
--- a/config-infra-dev.yml
+++ b/config-infra-dev.yml
@ -7,8 +7,9 @@ variables:
  ap_vm_image: ubuntu-20.04

  namespace: mlopsv2
-  postfix: 0688
-  location: northeurope
+  postfix: 0659
+  location: westus
+
  environment: dev
  enable_aml_computecluster: true
  enable_aml_secure_workspace: true
--- a/infrastructure/terraform/aml_deploy.tf
+++ b/infrastructure/terraform/aml_deploy.tf
@ -131,4 +131,4 @@ module "data_explorer" {
  client_secret = var.client_secret

  tags = local.tags
-}
+}
--- a/infrastructure/terraform/modules/data-explorer/outputs.tf
+++ b/infrastructure/terraform/modules/data-explorer/outputs.tf
@ -8,4 +8,4 @@ output "uri" {

 output "name" {
  value = azurerm_kusto_database.database[0].name
-}
+}
--- a/infrastructure/terraform/pipelines/tf-ado-deploy-infra.yml
+++ b/infrastructure/terraform/pipelines/tf-ado-deploy-infra.yml
@ -66,4 +66,4 @@ stages :
      - template: templates/infra/run-terraform-apply.yml@mlops-templates
        parameters:
            jumphost_username: ${{parameters.jumphost_username}}
-            jumphost_password: ${{parameters.jumphost_password}}
+            jumphost_password: ${{parameters.jumphost_password}}
--- a/infrastructure/terraform/variables.tf
+++ b/infrastructure/terraform/variables.tf
@ -44,4 +44,4 @@ variable "enable_monitoring" {

 variable "client_secret" {
  description = "Service Principal Secret"
-}
+}