Merge pull request #115 from jomedinagomez/jfomhover/nlpsdk
Jfomhover/nlpsdk
This commit is contained in:
Коммит
00124d77d8
|
@ -16,3 +16,13 @@ psutil==5.9.0
|
|||
|
||||
# for unit testing
|
||||
pytest==7.1.2
|
||||
|
||||
# for azure ml SDK v2
|
||||
azure-ai-ml==1.1.0
|
||||
azure-common==1.1.28
|
||||
azure-core==1.26.1
|
||||
azure-identity==1.10.0
|
||||
azure-mgmt-core==1.3.0
|
||||
azure-storage-blob==12.14.1
|
||||
azure-storage-file-datalake==12.9.1
|
||||
azure-storage-file-share==12.7.0
|
|
@ -16,3 +16,6 @@ psutil==5.9.0
|
|||
|
||||
# for unit testing
|
||||
pytest==7.1.2
|
||||
|
||||
# for azure ml SDK v2
|
||||
azure-ai-ml==1.1.0
|
|
@ -1,123 +0,0 @@
|
|||
"""MLOps v2 NLP Python SDK register environment script."""
|
||||
import os
|
||||
import argparse
|
||||
import traceback
|
||||
|
||||
# Azure ML sdk v2 imports
|
||||
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
|
||||
from azure.ai.ml import MLClient
|
||||
from azure.ai.ml.entities import Environment, BuildContext
|
||||
from azure.core.exceptions import ResourceExistsError
|
||||
|
||||
|
||||
def get_config_parger(parser: argparse.ArgumentParser = None):
|
||||
"""Builds the argument parser for the script."""
|
||||
if parser is None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
|
||||
parser.add_argument(
|
||||
"--subscription_id",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Subscription ID",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resource_group",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Resource group name",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workspace_name",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Workspace name",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exists_ok",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="if True, will not fail if environment already exists",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--environment_name",
|
||||
default="nlp_summarization_train",
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--environment_version",
|
||||
default="mlopsv2-july2022",
|
||||
type=str,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--environment_context_path",
|
||||
default=os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
"..",
|
||||
"..",
|
||||
"data-science",
|
||||
"environments",
|
||||
"training",
|
||||
),
|
||||
type=str,
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def connect_to_aml(args):
|
||||
"""Connect to Azure ML workspace using provided cli arguments."""
|
||||
try:
|
||||
credential = DefaultAzureCredential()
|
||||
# Check if given credential can get token successfully.
|
||||
credential.get_token("https://management.azure.com/.default")
|
||||
except Exception as ex:
|
||||
# Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
|
||||
credential = InteractiveBrowserCredential()
|
||||
|
||||
# Get a handle to workspace
|
||||
try:
|
||||
# ml_client to connect using local config.json
|
||||
ml_client = MLClient.from_config(credential=credential)
|
||||
|
||||
except Exception as ex:
|
||||
print(
|
||||
"Could not find config.json, using config.yaml refs to Azure ML workspace instead."
|
||||
)
|
||||
|
||||
# tries to connect using cli args if provided else using config.yaml
|
||||
ml_client = MLClient(
|
||||
subscription_id=args.subscription_id,
|
||||
resource_group_name=args.resource_group,
|
||||
workspace_name=args.workspace_name,
|
||||
credential=credential,
|
||||
)
|
||||
return ml_client
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the script."""
|
||||
parser = get_config_parger()
|
||||
args, _ = parser.parse_known_args()
|
||||
ml_client = connect_to_aml(args)
|
||||
|
||||
custom_env = Environment(
|
||||
name=args.environment_name,
|
||||
build=BuildContext(path=args.environment_context_path),
|
||||
tags={"project": "mlopsv2", "url": "https://github.com/Azure/mlops-v2"},
|
||||
version=args.environment_version,
|
||||
)
|
||||
|
||||
try:
|
||||
custom_env_create_job = ml_client.environments.create_or_update(custom_env)
|
||||
print(
|
||||
f"Environment with name {custom_env_create_job.name} is registered to workspace, the environment version is {custom_env_create_job.version}"
|
||||
)
|
||||
except ResourceExistsError as ex:
|
||||
print(f"Failed to create environment: {traceback.format_exc()}")
|
||||
if not args.exists_ok:
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -9,29 +9,34 @@ from azure.ai.ml import command
|
|||
from azure.ai.ml import Input, Output
|
||||
from azure.ai.ml import dsl, Input, Output
|
||||
|
||||
|
||||
def get_config_parger(parser: argparse.ArgumentParser = None):
|
||||
"""Builds the argument parser for the script."""
|
||||
if parser is None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
|
||||
group = parser.add_argument_group("Azure ML references")
|
||||
group.add_argument(
|
||||
"--config_location",
|
||||
type=str,
|
||||
required=False,
|
||||
help="Subscription ID",
|
||||
)
|
||||
group.add_argument(
|
||||
"--subscription_id",
|
||||
type=str,
|
||||
required=True,
|
||||
required=False,
|
||||
help="Subscription ID",
|
||||
)
|
||||
group.add_argument(
|
||||
"--resource_group",
|
||||
type=str,
|
||||
required=True,
|
||||
required=False,
|
||||
help="Resource group name",
|
||||
)
|
||||
group.add_argument(
|
||||
"--workspace_name",
|
||||
type=str,
|
||||
required=True,
|
||||
required=False,
|
||||
help="Workspace name",
|
||||
)
|
||||
group.add_argument(
|
||||
|
@ -123,7 +128,7 @@ def connect_to_aml(args):
|
|||
# Get a handle to workspace
|
||||
try:
|
||||
# ml_client to connect using local config.json
|
||||
ml_client = MLClient.from_config(credential=credential)
|
||||
ml_client = ml_client = MLClient.from_config(credential, path='config.json')
|
||||
|
||||
except Exception as ex:
|
||||
print(
|
||||
|
@ -143,7 +148,7 @@ def connect_to_aml(args):
|
|||
def build_components(args):
|
||||
"""Builds the components for the pipeline."""
|
||||
DATA_SCIENCE_FOLDER = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "..", "..", "data-science", "src"
|
||||
os.path.dirname(os.path.abspath(__file__)), "..","..", "..", "data-science", "src"
|
||||
)
|
||||
|
||||
prep_finetuning_dataset = command(
|
||||
|
@ -396,4 +401,4 @@ def main():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
|
||||
name: nlp_summarization_train
|
||||
version: mlopsv2-july2022
|
||||
build:
|
||||
path: ../../../data-science/environments/training/
|
|
@ -0,0 +1,71 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
variables:
|
||||
- ${{ if eq(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'main' branch: PRD environment
|
||||
- template: ../../config-infra-prod.yml
|
||||
- ${{ if ne(variables['Build.SourceBranchName'], 'main') }}:
|
||||
# 'develop' or feature branches: DEV environment
|
||||
- template: ../../config-infra-dev.yml
|
||||
- name: version
|
||||
value: python-sdk-v2
|
||||
|
||||
|
||||
trigger:
|
||||
- none
|
||||
|
||||
pool:
|
||||
vmImage: ubuntu-20.04
|
||||
|
||||
|
||||
resources:
|
||||
repositories:
|
||||
- repository: mlops-templates # Template Repo
|
||||
name: Azure/mlops-templates # need to change org name from "Azure" to your own org
|
||||
endpoint: github-connection # need to set up and hardcode
|
||||
type: github
|
||||
ref: main-dec31
|
||||
|
||||
stages:
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
jobs:
|
||||
- job: DeployTrainingPipeline
|
||||
steps:
|
||||
- checkout: self
|
||||
path: s/
|
||||
- checkout: mlops-templates
|
||||
path: s/templates/
|
||||
- template: templates/aml-cli-v2/install-az-cli.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/install-aml-cli.yml@mlops-templates
|
||||
- template: templates/aml-cli-v2/connect-to-workspace.yml@mlops-templates
|
||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: cpu-cluster
|
||||
size: STANDARD_DS3_V2
|
||||
min_instances: 0
|
||||
max_instances: 1
|
||||
cluster_tier: dedicated
|
||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: cpu-cluster-lg
|
||||
size: Standard_D14_v2
|
||||
min_instances: 0
|
||||
max_instances: 1
|
||||
cluster_tier: dedicated
|
||||
- template: templates/${{ variables.version }}/create-compute.yml@mlops-templates
|
||||
parameters:
|
||||
cluster_name: gpu-cluster
|
||||
size: Standard_NV6
|
||||
min_instances: 0
|
||||
max_instances: 1
|
||||
cluster_tier: dedicated
|
||||
- template: templates/${{ variables.version }}/register-environment.yml@mlops-templates
|
||||
parameters:
|
||||
build_type: docker
|
||||
environment_name: nlp_summarization_train
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline-train.py
|
||||
experiment_name: $(environment)_nlp_summarization_$(Build.SourceBranchName)
|
Загрузка…
Ссылка в новой задаче