Merge pull request #115 from jomedinagomez/jfomhover/nlpsdk

Jfomhover/nlpsdk
This commit is contained in:
Jose Medina 2022-12-01 13:40:27 -05:00 коммит произвёл GitHub
Родитель 463b234344 70694f9dfe
Коммит 00124d77d8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 102 добавлений и 130 удалений

Просмотреть файл

@ -16,3 +16,13 @@ psutil==5.9.0
# for unit testing
pytest==7.1.2
# for azure ml SDK v2
azure-ai-ml==1.1.0
azure-common==1.1.28
azure-core==1.26.1
azure-identity==1.10.0
azure-mgmt-core==1.3.0
azure-storage-blob==12.14.1
azure-storage-file-datalake==12.9.1
azure-storage-file-share==12.7.0

Просмотреть файл

@ -16,3 +16,6 @@ psutil==5.9.0
# for unit testing
pytest==7.1.2
# for azure ml SDK v2
azure-ai-ml==1.1.0

Просмотреть файл

@ -1,123 +0,0 @@
"""MLOps v2 NLP Python SDK register environment script."""
import os
import argparse
import traceback
# Azure ML sdk v2 imports
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Environment, BuildContext
from azure.core.exceptions import ResourceExistsError
def get_config_parger(parser: argparse.ArgumentParser = None):
"""Builds the argument parser for the script."""
if parser is None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--subscription_id",
type=str,
required=True,
help="Subscription ID",
)
parser.add_argument(
"--resource_group",
type=str,
required=True,
help="Resource group name",
)
parser.add_argument(
"--workspace_name",
type=str,
required=True,
help="Workspace name",
)
parser.add_argument(
"--exists_ok",
default=False,
action="store_true",
help="if True, will not fail if environment already exists",
)
parser.add_argument(
"--environment_name",
default="nlp_summarization_train",
type=str,
)
parser.add_argument(
"--environment_version",
default="mlopsv2-july2022",
type=str,
)
parser.add_argument(
"--environment_context_path",
default=os.path.join(
os.path.dirname(__file__),
"..",
"..",
"data-science",
"environments",
"training",
),
type=str,
)
return parser
def connect_to_aml(args):
"""Connect to Azure ML workspace using provided cli arguments."""
try:
credential = DefaultAzureCredential()
# Check if given credential can get token successfully.
credential.get_token("https://management.azure.com/.default")
except Exception as ex:
# Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
credential = InteractiveBrowserCredential()
# Get a handle to workspace
try:
# ml_client to connect using local config.json
ml_client = MLClient.from_config(credential=credential)
except Exception as ex:
print(
"Could not find config.json, using config.yaml refs to Azure ML workspace instead."
)
# tries to connect using cli args if provided else using config.yaml
ml_client = MLClient(
subscription_id=args.subscription_id,
resource_group_name=args.resource_group,
workspace_name=args.workspace_name,
credential=credential,
)
return ml_client
def main():
"""Main entry point for the script."""
parser = get_config_parger()
args, _ = parser.parse_known_args()
ml_client = connect_to_aml(args)
custom_env = Environment(
name=args.environment_name,
build=BuildContext(path=args.environment_context_path),
tags={"project": "mlopsv2", "url": "https://github.com/Azure/mlops-v2"},
version=args.environment_version,
)
try:
custom_env_create_job = ml_client.environments.create_or_update(custom_env)
print(
f"Environment with name {custom_env_create_job.name} is registered to workspace, the environment version is {custom_env_create_job.version}"
)
except ResourceExistsError as ex:
print(f"Failed to create environment: {traceback.format_exc()}")
if not args.exists_ok:
raise
if __name__ == "__main__":
main()

Просмотреть файл

@ -9,29 +9,34 @@ from azure.ai.ml import command
from azure.ai.ml import Input, Output
from azure.ai.ml import dsl, Input, Output
def get_config_parger(parser: argparse.ArgumentParser = None):
"""Builds the argument parser for the script."""
if parser is None:
parser = argparse.ArgumentParser(description=__doc__)
group = parser.add_argument_group("Azure ML references")
group.add_argument(
"--config_location",
type=str,
required=False,
help="Subscription ID",
)
group.add_argument(
"--subscription_id",
type=str,
required=True,
required=False,
help="Subscription ID",
)
group.add_argument(
"--resource_group",
type=str,
required=True,
required=False,
help="Resource group name",
)
group.add_argument(
"--workspace_name",
type=str,
required=True,
required=False,
help="Workspace name",
)
group.add_argument(
@ -123,7 +128,7 @@ def connect_to_aml(args):
# Get a handle to workspace
try:
# ml_client to connect using local config.json
ml_client = MLClient.from_config(credential=credential)
ml_client = ml_client = MLClient.from_config(credential, path='config.json')
except Exception as ex:
print(
@ -143,7 +148,7 @@ def connect_to_aml(args):
def build_components(args):
"""Builds the components for the pipeline."""
DATA_SCIENCE_FOLDER = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "..", "..", "data-science", "src"
os.path.dirname(os.path.abspath(__file__)), "..","..", "..", "data-science", "src"
)
prep_finetuning_dataset = command(
@ -396,4 +401,4 @@ def main():
if __name__ == "__main__":
main()
main()

Просмотреть файл

@ -0,0 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
name: nlp_summarization_train
version: mlopsv2-july2022
build:
path: ../../../data-science/environments/training/

Просмотреть файл

@ -0,0 +1,71 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
variables:
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
# 'main' branch: PRD environment
- template: ../../config-infra-prod.yml
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
# 'develop' or feature branches: DEV environment
- template: ../../config-infra-dev.yml
- name: version
value: python-sdk-v2
trigger:
- none
pool:
vmImage: ubuntu-20.04
resources:
repositories:
- repository: mlops-templates # Template Repo
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
endpoint: github-connection # need to set up and hardcode
type: github
ref: main-dec31
stages:
- stage: DeployTrainingPipeline
displayName: Deploy Training Pipeline
jobs:
- job: DeployTrainingPipeline
steps:
- checkout: self
path: s/
- checkout: mlops-templates
path: s/templates/
- template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
- template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
- template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
parameters:
cluster_name: cpu-cluster
size: STANDARD_DS3_V2
min_instances: 0
max_instances: 1
cluster_tier: dedicated
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
parameters:
cluster_name: cpu-cluster-lg
size: Standard_D14_v2
min_instances: 0
max_instances: 1
cluster_tier: dedicated
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
parameters:
cluster_name: gpu-cluster
size: Standard_NV6
min_instances: 0
max_instances: 1
cluster_tier: dedicated
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
parameters:
build_type: docker
environment_name: nlp_summarization_train
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters:
pipeline_file: mlops/azureml/train/pipeline-train.py
experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName)