Merge pull request #115 from jomedinagomez/jfomhover/nlpsdk

Jfomhover/nlpsdk
2022-12-01 13:40:27 -05:00 · 2022-12-01 13:40:27 -05:00 · 00124d77d8
--- a/nlp/aml-cli-v2/data-science/environments/training/requirements.txt
+++ b/nlp/aml-cli-v2/data-science/environments/training/requirements.txt
@ -16,3 +16,13 @@ psutil==5.9.0

 # for unit testing
 pytest==7.1.2
+
+# for azure ml SDK v2
+azure-ai-ml==1.1.0
+azure-common==1.1.28
+azure-core==1.26.1
+azure-identity==1.10.0
+azure-mgmt-core==1.3.0 
+azure-storage-blob==12.14.1
+azure-storage-file-datalake==12.9.1
+azure-storage-file-share==12.7.0
--- a/nlp/python-sdk-v2/data-science/environments/training/requirements.txt
+++ b/nlp/python-sdk-v2/data-science/environments/training/requirements.txt
@ -16,3 +16,6 @@ psutil==5.9.0

 # for unit testing
 pytest==7.1.2
+
+# for azure ml SDK v2
+azure-ai-ml==1.1.0
--- a/nlp/python-sdk-v2/mlops/azureml/register-env.py
+++ b/nlp/python-sdk-v2/mlops/azureml/register-env.py
@ -1,123 +0,0 @@
-"""MLOps v2 NLP Python SDK register environment script."""
-import os
-import argparse
-import traceback
-
-# Azure ML sdk v2 imports
-from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
-from azure.ai.ml import MLClient
-from azure.ai.ml.entities import Environment, BuildContext
-from azure.core.exceptions import ResourceExistsError
-
-
-def get_config_parger(parser: argparse.ArgumentParser = None):
-    """Builds the argument parser for the script."""
-    if parser is None:
-        parser = argparse.ArgumentParser(description=__doc__)
-
-    parser.add_argument(
-        "--subscription_id",
-        type=str,
-        required=True,
-        help="Subscription ID",
-    )
-    parser.add_argument(
-        "--resource_group",
-        type=str,
-        required=True,
-        help="Resource group name",
-    )
-    parser.add_argument(
-        "--workspace_name",
-        type=str,
-        required=True,
-        help="Workspace name",
-    )
-    parser.add_argument(
-        "--exists_ok",
-        default=False,
-        action="store_true",
-        help="if True, will not fail if environment already exists",
-    )
-
-    parser.add_argument(
-        "--environment_name",
-        default="nlp_summarization_train",
-        type=str,
-    )
-    parser.add_argument(
-        "--environment_version",
-        default="mlopsv2-july2022",
-        type=str,
-    )
-    parser.add_argument(
-        "--environment_context_path",
-        default=os.path.join(
-            os.path.dirname(__file__),
-            "..",
-            "..",
-            "data-science",
-            "environments",
-            "training",
-        ),
-        type=str,
-    )
-    return parser
-
-
-def connect_to_aml(args):
-    """Connect to Azure ML workspace using provided cli arguments."""
-    try:
-        credential = DefaultAzureCredential()
-        # Check if given credential can get token successfully.
-        credential.get_token("https://management.azure.com/.default")
-    except Exception as ex:
-        # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
-        credential = InteractiveBrowserCredential()
-
-    # Get a handle to workspace
-    try:
-        # ml_client to connect using local config.json
-        ml_client = MLClient.from_config(credential=credential)
-
-    except Exception as ex:
-        print(
-            "Could not find config.json, using config.yaml refs to Azure ML workspace instead."
-        )
-
-        # tries to connect using cli args if provided else using config.yaml
-        ml_client = MLClient(
-            subscription_id=args.subscription_id,
-            resource_group_name=args.resource_group,
-            workspace_name=args.workspace_name,
-            credential=credential,
-        )
-    return ml_client
-
-
-def main():
-    """Main entry point for the script."""
-    parser = get_config_parger()
-    args, _ = parser.parse_known_args()
-    ml_client = connect_to_aml(args)
-
-    custom_env = Environment(
-        name=args.environment_name,
-        build=BuildContext(path=args.environment_context_path),
-        tags={"project": "mlopsv2", "url": "https://github.com/Azure/mlops-v2"},
-        version=args.environment_version,
-    )
-
-    try:
-        custom_env_create_job = ml_client.environments.create_or_update(custom_env)
-        print(
-            f"Environment with name {custom_env_create_job.name} is registered to workspace, the environment version is {custom_env_create_job.version}"
-        )
-    except ResourceExistsError as ex:
-        print(f"Failed to create environment: {traceback.format_exc()}")
-        if not args.exists_ok:
-            raise
-
-
-if __name__ == "__main__":
-    main()
--- a/nlp/python-sdk-v2/mlops/azureml/train/pipeline-train.py
+++ b/nlp/python-sdk-v2/mlops/azureml/train/pipeline-train.py
@ -9,29 +9,34 @@ from azure.ai.ml import command
 from azure.ai.ml import Input, Output
 from azure.ai.ml import dsl, Input, Output

-
 def get_config_parger(parser: argparse.ArgumentParser = None):
    """Builds the argument parser for the script."""
    if parser is None:
        parser = argparse.ArgumentParser(description=__doc__)

    group = parser.add_argument_group("Azure ML references")
+    group.add_argument(
+        "--config_location",
+        type=str,
+        required=False,
+        help="Subscription ID",
+    )
    group.add_argument(
        "--subscription_id",
        type=str,
-        required=True,
+        required=False,
        help="Subscription ID",
    )
    group.add_argument(
        "--resource_group",
        type=str,
-        required=True,
+        required=False,
        help="Resource group name",
    )
    group.add_argument(
        "--workspace_name",
        type=str,
-        required=True,
+        required=False,
        help="Workspace name",
    )
    group.add_argument(
@ -123,7 +128,7 @@ def connect_to_aml(args):
    # Get a handle to workspace
    try:
        # ml_client to connect using local config.json
-        ml_client = MLClient.from_config(credential=credential)
+        ml_client = ml_client = MLClient.from_config(credential, path='config.json')

    except Exception as ex:
        print(
@ -143,7 +148,7 @@ def connect_to_aml(args):
 def build_components(args):
    """Builds the components for the pipeline."""
    DATA_SCIENCE_FOLDER = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "..", "..", "data-science", "src"
+        os.path.dirname(os.path.abspath(__file__)), "..","..", "..", "data-science", "src"
    )

    prep_finetuning_dataset = command(
@ -396,4 +401,4 @@ def main():


 if __name__ == "__main__":
-    main()
+    main()
--- a/nlp/python-sdk-v2/mlops/azureml/train/train-env.yml
+++ b/nlp/python-sdk-v2/mlops/azureml/train/train-env.yml
@ -0,0 +1,6 @@
+
+$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
+name: nlp_summarization_train
+version: mlopsv2-july2022
+build:
+  path: ../../../data-science/environments/training/
--- a/nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
+++ b/nlp/python-sdk-v2/mlops/devops-pipelines/deploy-model-training-pipeline.yml
@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+variables:
+- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
+    # 'main' branch: PRD environment
+    - template: ../../config-infra-prod.yml
+- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:  
+    # 'develop' or feature branches: DEV environment
+    - template: ../../config-infra-dev.yml
+- name: version
+  value: python-sdk-v2 
+
+
+trigger:
+- none
+
+pool:
+  vmImage: ubuntu-20.04
+
+
+resources:
+  repositories:
+    - repository: mlops-templates  # Template Repo
+      name: Azure/mlops-templates # need to change org name from "Azure" to your own org
+      endpoint: github-connection # need to set up and hardcode
+      type: github
+      ref: main-dec31
+
+stages:
+- stage: DeployTrainingPipeline
+  displayName: Deploy Training Pipeline
+  jobs:
+    - job: DeployTrainingPipeline
+      steps:
+      - checkout: self
+        path: s/
+      - checkout: mlops-templates
+        path: s/templates/
+      - template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
+      - template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
+      - template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
+      - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
+        parameters:
+          cluster_name: cpu-cluster
+          size: STANDARD_DS3_V2
+          min_instances: 0
+          max_instances: 1
+          cluster_tier: dedicated
+      - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
+        parameters:
+          cluster_name: cpu-cluster-lg
+          size: Standard_D14_v2
+          min_instances: 0
+          max_instances: 1
+          cluster_tier: dedicated
+      - template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
+        parameters:
+          cluster_name: gpu-cluster
+          size: Standard_NV6
+          min_instances: 0
+          max_instances: 1
+          cluster_tier: dedicated
+      - template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
+        parameters:
+          build_type: docker
+          environment_name: nlp_summarization_train
+      - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
+        parameters: 
+          pipeline_file: mlops/azureml/train/pipeline-train.py
+          experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName)