deploy-arm-templates api version update (#1458)

* updating api versions

* addressing comments

* black formatting

Co-authored-by: Raghu Ramaswamy <raghura@microsoft.com>
This commit is contained in:
Raghu Ramaswamy 2022-07-05 15:36:27 -07:00 коммит произвёл GitHub
Родитель 6429cbea2b
Коммит d75447d655
46 изменённых файлов: 105 добавлений и 266 удалений

7
.github/kubernetes-compute/convert.py поставляемый
Просмотреть файл

@ -98,12 +98,7 @@ if __name__ == "__main__":
required=False,
help="Output AMLARC-compatible file, if not provides, " "replace file inplace",
)
parser.add_argument(
"-c",
"--compute-target",
required=False,
help="Compute target",
)
parser.add_argument("-c", "--compute-target", required=False, help="Compute target")
parser.add_argument("-it", "--instance-type", required=False, help="Instance type")
parser.add_argument(
"-cr",

Просмотреть файл

@ -41,12 +41,7 @@ def check_test_case(input_file):
if __name__ == "__main__":
# Parse command line arguments
parser = argparse.ArgumentParser(description="Check all papermill output files.")
parser.add_argument(
"-i",
"--input",
required=True,
help="job ipynb file list",
)
parser.add_argument("-i", "--input", required=True, help="job ipynb file list")
args = parser.parse_args()

Просмотреть файл

@ -34,10 +34,7 @@ if __name__ == "__main__":
)
parser.add_argument("-r", "--regex", required=True, help="test case name selector")
parser.add_argument(
"-o",
"--output",
required=False,
help="the file selected test case send to",
"-o", "--output", required=False, help="the file selected test case send to"
)
args = parser.parse_args()

Просмотреть файл

@ -42,7 +42,7 @@
"resources": [
{
"type": "Microsoft.MachineLearningServices/workspaces/codes/versions",
"apiVersion": "2021-10-01",
"apiVersion": "2022-05-01",
"name": "[concat(parameters('workspaceName'), '/', parameters('codeAssetName'), '/', parameters('codeAssetVersion'))]",
"properties": {
"description": "[parameters('codeAssetDescription')]",

Просмотреть файл

@ -38,7 +38,7 @@
"resources": [
{
"type": "Microsoft.MachineLearningServices/workspaces/environments/versions",
"apiVersion": "2021-10-01",
"apiVersion": "2022-05-01",
"name": "[concat(parameters('workspaceName'), '/', parameters('environmentAssetName'), '/', parameters('environmentAssetVersion'))]",
"properties": {
"isAnonymous": "[parameters('isAnonymous')]",

Просмотреть файл

@ -39,7 +39,7 @@
"resources": [
{
"type": "Microsoft.MachineLearningServices/workspaces/models/versions",
"apiVersion": "2021-10-01",
"apiVersion": "2022-05-01",
"name": "[concat(parameters('workspaceName'), '/', parameters('modelAssetName'), '/', parameters('modelAssetVersion'))]",
"properties": {
"description": "[parameters('modelDescription')]",

Просмотреть файл

@ -82,7 +82,7 @@
"resources": [
{
"type": "Microsoft.MachineLearningServices/workspaces/onlineEndpoints/deployments",
"apiVersion": "2021-10-01",
"apiVersion": "2022-05-01",
"name": "[concat(parameters('workspaceName'), '/', parameters('onlineEndpointName'),'/', parameters('onlineDeploymentName'))]",
"location": "[parameters('location')]",
"tags": "[parameters('onlineDeploymentTags')]",

Просмотреть файл

@ -55,7 +55,7 @@
"resources": [
{
"type": "Microsoft.MachineLearningServices/workspaces/onlineEndpoints",
"apiVersion": "2021-10-01",
"apiVersion": "2022-05-01",
"name": "[concat(parameters('workspaceName'), '/', parameters('onlineEndpointName'))]",
"location": "[parameters('location')]",
"tags": "[parameters('onlineEndpointTags')]",

Просмотреть файл

@ -29,10 +29,7 @@ def create_jsonl_files(uri_folder_data_path):
)
# Baseline of json line dictionary
json_line_sample = {
"image_url": uri_folder_data_path,
"label": "",
}
json_line_sample = {"image_url": uri_folder_data_path, "label": ""}
index = 0
# Scan each sub directary and generate a jsonl line per image, distributed on train and valid JSONL files

Просмотреть файл

@ -29,10 +29,7 @@ def create_jsonl_files(uri_folder_data_path):
)
# Baseline of json line dictionary
json_line_sample = {
"image_url": uri_folder_data_path,
"label": [],
}
json_line_sample = {"image_url": uri_folder_data_path, "label": []}
# Path to the labels file.
labelFile = os.path.join(src_images, "labels.csv")

Просмотреть файл

@ -235,8 +235,9 @@ def get_dali_train_loader(dali_cpu=False):
pipe, size=int(pipe.epoch_size("Reader") / world_size)
)
return DALIWrapper(train_loader, num_classes, one_hot), int(
pipe.epoch_size("Reader") / (world_size * batch_size)
return (
DALIWrapper(train_loader, num_classes, one_hot),
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
)
return gdtl
@ -275,8 +276,9 @@ def get_dali_val_loader():
pipe, size=int(pipe.epoch_size("Reader") / world_size)
)
return DALIWrapper(val_loader, num_classes, one_hot), int(
pipe.epoch_size("Reader") / (world_size * batch_size)
return (
DALIWrapper(val_loader, num_classes, one_hot),
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
)
return gdvl
@ -385,10 +387,7 @@ def get_pytorch_train_loader(
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose(
[
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
]
[transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()]
),
)
@ -409,8 +408,9 @@ def get_pytorch_train_loader(
drop_last=True,
)
return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(
train_loader
return (
PrefetchedWrapper(train_loader, num_classes, fp16, one_hot),
len(train_loader),
)
@ -425,13 +425,7 @@ def get_pytorch_val_loader(
):
valdir = os.path.join(data_path, "val")
val_dataset = datasets.ImageFolder(
valdir,
transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
]
),
valdir, transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
)
if torch.distributed.is_initialized():

Просмотреть файл

@ -257,7 +257,7 @@ def lr_exponential_policy(
lr = base_lr * (epoch + 1) / warmup_length
else:
e = epoch - warmup_length
lr = base_lr * (epoch_decay**e)
lr = base_lr * (epoch_decay ** e)
return lr
return lr_policy(_lr_fn, logger=logger)

Просмотреть файл

@ -17,10 +17,7 @@ args = parser.parse_args()
print("hello training world...")
lines = [
f"Raw data path: {args.raw_data}",
f"Data output path: {args.prep_data}",
]
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
for line in lines:
print(line)

Просмотреть файл

@ -72,12 +72,7 @@ def main(args):
create_rai_tool_directories(incoming_dir)
_logger.info("Saved empty RAI Insights input to temporary directory")
insight_paths = [
args.insight_1,
args.insight_2,
args.insight_3,
args.insight_4,
]
insight_paths = [args.insight_1, args.insight_2, args.insight_3, args.insight_4]
included_tools: Dict[str, bool] = {
RAIToolType.CAUSAL: False,

Просмотреть файл

@ -160,10 +160,7 @@ def copy_insight_to_raiinsights(
src_dir = insight_dir / tool_dir_name / tool_dir_items[0].parts[-1]
dst_dir = rai_insights_dir / tool_dir_name / tool_dir_items[0].parts[-1]
shutil.copytree(
src=src_dir,
dst=dst_dir,
)
shutil.copytree(src=src_dir, dst=dst_dir)
_logger.info("Copy complete")
return tool_type
@ -186,9 +183,7 @@ def save_to_output_port(rai_i: RAIInsights, output_port_path: str, tool_type: st
_logger.info("Starting copy")
shutil.copytree(
pathlib.Path(tmpdirname) / tool_dir_name,
target_path,
dirs_exist_ok=True,
pathlib.Path(tmpdirname) / tool_dir_name, target_path, dirs_exist_ok=True
)
_logger.info("Copied to output")

Просмотреть файл

@ -17,10 +17,7 @@ args = parser.parse_args()
print("hello training world...")
lines = [
f"Raw data path: {args.raw_data}",
f"Data output path: {args.prep_data}",
]
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
for line in lines:
print(line)

Просмотреть файл

@ -31,8 +31,7 @@ mlflow.log_text(str(c), "dask_cluster1")
# read in the data from the provided file dataset (which is mounted at the same
# location on all nodes of the job)
df = dd.read_csv(
f"{dataset}/*.csv",
parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"],
f"{dataset}/*.csv", parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"]
)
# as an alternative, the below would be using abfs

Просмотреть файл

@ -7,17 +7,8 @@ import argparse
# define constants
EXCLUDED_JOBS = ["java"]
EXCLUDED_ENDPOINTS = ["batch", "online", "amlarc"]
EXCLUDED_RESOURCES = [
"workspace",
"datastore",
"vm-attach",
"instance",
"connections",
]
EXCLUDED_ASSETS = [
"conda-yamls",
"mlflow-models",
]
EXCLUDED_RESOURCES = ["workspace", "datastore", "vm-attach", "instance", "connections"]
EXCLUDED_ASSETS = ["conda-yamls", "mlflow-models"]
EXCLUDED_SCRIPTS = ["setup", "cleanup", "run-job"]
BRANCH = "main" # default - do not change
# BRANCH = "sdk-preview" # this should be deleted when this branch is merged to main

Просмотреть файл

@ -6,9 +6,9 @@ TOKEN=$(az account get-access-token --query accessToken -o tsv)
# <create_variables>
SUBSCRIPTION_ID=$(az account show --query id | tr -d '\r"')
LOCATION=$(az ml workspace show --query location | tr -d '\r"')
RESOURCE_GROUP=$(az group show --query name | tr -d '\r"')
WORKSPACE=$(az configure -l | jq -r '.[] | select(.name=="workspace") | .value')
LOCATION=$(az ml workspace show| jq -r '.location')
#</create_variables>
# <set_endpoint_name>
@ -16,7 +16,7 @@ export ENDPOINT_NAME=endpoint-`echo $RANDOM`
# </set_endpoint_name>
#<api_version>
API_VERSION="2021-10-01"
API_VERSION="2022-05-01"
#</api_version>
echo -e "Using:\nSUBSCRIPTION_ID=$SUBSCRIPTION_ID\nLOCATION=$LOCATION\nRESOURCE_GROUP=$RESOURCE_GROUP\nWORKSPACE=$WORKSPACE"
@ -58,7 +58,7 @@ export AZURE_STORAGE_ACCOUNT=$(echo $response | jq -r '.value[0].properties.acco
# </get_storage_details>
# <upload_code>
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s cli/endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
# </upload_code>
# <create_code>
@ -71,7 +71,7 @@ codeUri="https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZUREML_DEFAULT_C
# </create_code>
# <upload_model>
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s cli/endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
# </upload_model>
# <create_model>
@ -84,7 +84,7 @@ modelUri="azureml://subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROU
# </create_model>
# <read_condafile>
CONDA_FILE=$(cat endpoints/online/model-1/environment/conda.yml)
CONDA_FILE=$(cat cli/endpoints/online/model-1/environment/conda.yml)
# <read_condafile>
# <create_environment>
@ -131,7 +131,7 @@ az deployment group create -g $RESOURCE_GROUP \
codeId="$resourceScope/workspaces/$WORKSPACE/codes/score-sklearn/versions/1" \
scoringScript=score.py \
environmentId="$resourceScope/workspaces/$WORKSPACE/environments/sklearn-env/versions/$ENV_VERSION" \
model="$resourceScope/workspaces/$WORKSPACE/models/sklearn/versions/1" \
model="$resourceScope/workspaces/$WORKSPACE/models/score-sklearn/versions/1" \
endpointComputeType=Managed \
skuName=Standard_F2s_v2 \
skuCapacity=1
@ -158,7 +158,7 @@ accessToken=$(echo $response | jq -r '.accessToken')
curl --location --request POST $scoringUri \
--header "Authorization: Bearer $accessToken" \
--header "Content-Type: application/json" \
--data-raw @endpoints/online/model-1/sample-request.json
--data-raw @cli/endpoints/online/model-1/sample-request.json
# </score_endpoint>
# <get_deployment_logs>

Просмотреть файл

@ -11,7 +11,7 @@ schema='$schema'
echo -e "Using:\nSUBSCRIPTION_ID=$SUBSCRIPTION_ID\nLOCATION=$LOCATION\nRESOURCE_GROUP=$RESOURCE_GROUP\nWORKSPACE=$WORKSPACE"
# <read_condafile>
CONDA_FILE=$(< model/environment/conda.yml)
CONDA_FILE=$(< cli/endpoints/online/model-1/environment/conda.yml)
# <read_condafile>
#<get_access_token>
@ -23,7 +23,7 @@ export ENDPOINT_NAME=endpt-`echo $RANDOM`
# </set_endpoint_name>
#<api_version>
API_VERSION="2021-10-01"
API_VERSION="2022-05-01"
#</api_version>
# define how to wait
@ -62,7 +62,7 @@ export AZURE_STORAGE_ACCOUNT=$(echo $response | jq -r '.value[0].properties.acco
# </get_storage_details>
# <upload_code>
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s cli/endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
# </upload_code>
# <create_code>
@ -117,7 +117,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
[
{
\"type\": \"Microsoft.MachineLearningServices/workspaces/codes/versions\",
\"apiVersion\": \"2021-10-01\",
\"apiVersion\": \"$API_VERSION\",
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'codeAssetName\'), \'/\', parameters(\'codeAssetVersion\'))]\",
\"properties\": {
\"description\": \"[parameters(\'codeAssetDescription\')]\",
@ -148,7 +148,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
# <\create_code>
# <upload_model>
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s cli/endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
# <\upload_model>
# <create_model>
@ -234,7 +234,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
# <\create_model>
# <read_condafile>
CONDA_FILE=$(cat endpoints/online/model-1/environment/conda.yml)
CONDA_FILE=$(cat cli/endpoints/online/model-1/environment/conda.yml)
# <read_condafile>
# <create_environment>
@ -296,7 +296,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
[
{
\"type\": \"Microsoft.MachineLearningServices/workspaces/environments/versions\",
\"apiVersion\": \"2021-10-01\",
\"apiVersion\": \"$API_VERSION\",
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'environmentAssetName\'), \'/\', parameters(\'environmentAssetVersion\'))]\",
\"properties\": {
\"description\": \"[parameters(\'environmentAssetDescription\')]\",
@ -392,7 +392,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
[
{
\"type\": \"Microsoft.MachineLearningServices/workspaces/onlineEndpoints\",
\"apiVersion\": \"2021-10-01\",
\"apiVersion\": \"$API_VERSION\",
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'onlineEndpointName\'))]\",
\"location\": \"[parameters(\'location\')]\",
\"tags\": \"[parameters(\'onlineEndpointTags\')]\",
@ -527,7 +527,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
\"resources\": [
{
\"type\": \"Microsoft.MachineLearningServices/workspaces/onlineEndpoints/deployments\",
\"apiVersion\": \"2021-10-01\",
\"apiVersion\": \"$API_VERSION\",
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'onlineEndpointName\'), \'/\', parameters(\'onlineDeploymentName\'))]\",
\"location\": \"[parameters(\'location\')]\",
\"tags\": \"[parameters(\'onlineDeploymentTags\')]\",
@ -572,7 +572,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
\"value\": \"$resourceScope/workspaces/$WORKSPACE/environments/sklearn-env/versions/$ENV_VERSION\"
},
\"model\": {
\"value\": \"$resourceScope/workspaces/$WORKSPACE/models/sklearn/versions/1\"
\"value\": \"$resourceScope/workspaces/$WORKSPACE/models/score-sklearn/versions/1\"
},
\"endpointComputeType\": {
\"value\": \"Managed\"
@ -610,7 +610,7 @@ accessToken=$(echo $response | jq -r '.accessToken')
curl --location --request POST $scoringUri \
--header "Authorization: Bearer $accessToken" \
--header "Content-Type: application/json" \
--data @endpoints/online/model-1/sample-request.json
--data @cli/endpoints/online/model-1/sample-request.json
# </score_endpoint>
# <get_deployment_logs>

Просмотреть файл

@ -27,15 +27,11 @@ input_sample = pd.DataFrame(
"slope": 3,
"ca": 0,
"thal": 2,
},
}
]
)
output_sample = np.ndarray(
[
1,
]
)
output_sample = np.ndarray([1])
MODEL = None

Просмотреть файл

@ -100,16 +100,8 @@ def driver():
test(args, model, device, test_loader)
if args.save_model:
input_schema = Schema(
[
TensorSpec(np.dtype(np.uint8), (-1, 28, 28)),
]
)
output_schema = Schema(
[
TensorSpec(np.dtype(np.float32), (-1, 10)),
]
)
input_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1, 28, 28))])
output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
mlflow.pytorch.log_model(model, "model", signature=signature)

Просмотреть файл

@ -40,10 +40,7 @@ if __name__ == "__main__":
}
file_name = os.path.join(
os.path.abspath(os.path.dirname(__file__)),
"..",
"data",
args.data_file,
os.path.abspath(os.path.dirname(__file__)), "..", "data", args.data_file
)
test_sample = open(file_name, "rb").read()
resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)

Просмотреть файл

@ -756,11 +756,7 @@ class InferenceServerClient:
"""
request_uri = "v2/systemsharedmemory/region/{}/register".format(quote(name))
register_request = {
"key": key,
"offset": offset,
"byte_size": byte_size,
}
register_request = {"key": key, "offset": offset, "byte_size": byte_size}
request_body = json.dumps(register_request)
response = self._post(
@ -864,13 +860,7 @@ class InferenceServerClient:
return json.loads(content)
def register_cuda_shared_memory(
self,
name,
raw_handle,
device_id,
byte_size,
headers=None,
query_params=None,
self, name, raw_handle, device_id, byte_size, headers=None, query_params=None
):
"""Request the server to register a system shared memory with the
following specification.

Просмотреть файл

@ -25,12 +25,9 @@ def get_model_info():
for model in repo_index:
model_name = model["name"]
model_version = model["version"]
(
input_meta,
input_config,
output_meta,
output_config,
) = parse_model_http(model_name=model_name, model_version=model_version)
(input_meta, input_config, output_meta, output_config) = parse_model_http(
model_name=model_name, model_version=model_version
)
print(
f"Found model: {model_name}, version: {model_version}, \
input meta: {input_meta}, input config: {input_config}, \
@ -69,11 +66,7 @@ def parse_model_http(model_name, model_version=""):
def triton_infer(
input_mapping,
model_name,
binary_data=False,
binary_output=False,
class_count=0,
input_mapping, model_name, binary_data=False, binary_output=False, class_count=0
):
"""Helper function for setting Triton inputs and executing a request

Просмотреть файл

@ -65,14 +65,8 @@ v100computes = {
}
akscomputes = {
"aks-cpu-deploy": {
"vm_size": "STANDARD_DS3_V2",
"agent_count": 3,
},
"aks-gpu-deploy": {
"vm_size": "STANDARD_NC6S_V3",
"agent_count": 3,
},
"aks-cpu-deploy": {"vm_size": "STANDARD_DS3_V2", "agent_count": 3},
"aks-gpu-deploy": {"vm_size": "STANDARD_NC6S_V3", "agent_count": 3},
}
# create or get Workspace

Просмотреть файл

@ -149,12 +149,7 @@ def get_backtest_pipeline(
inputs=[forecasts.as_mount()],
outputs=[data_results],
source_directory=PROJECT_FOLDER,
arguments=[
"--forecasts",
forecasts,
"--output-dir",
data_results,
],
arguments=["--forecasts", forecasts, "--output-dir", data_results],
runconfig=run_config,
compute_target=compute_target,
allow_reuse=False,

Просмотреть файл

@ -23,11 +23,7 @@ except ImportError:
def infer_forecasting_dataset_tcn(
X_test,
y_test,
model,
output_path,
output_dataset_name="results",
X_test, y_test, model, output_path, output_dataset_name="results"
):
y_pred, df_all = model.forecast(X_test, y_test)
@ -71,10 +67,7 @@ def get_model(model_path, model_file_name):
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name",
type=str,
dest="model_name",
help="Model to be loaded",
"--model_name", type=str, dest="model_name", help="Model to be loaded"
)
parser.add_argument(
@ -108,12 +101,7 @@ def get_args():
return args
def get_data(
run,
fitted_model,
target_column_name,
test_dataset_name,
):
def get_data(run, fitted_model, target_column_name, test_dataset_name):
# get input dataset by name
test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
@ -159,10 +147,7 @@ if __name__ == "__main__":
fitted_model = get_model(model_path, model_file_name)
X_test_df, y_test = get_data(
run,
fitted_model,
target_column_name,
test_dataset_name,
run, fitted_model, target_column_name, test_dataset_name
)
infer_forecasting_dataset_tcn(

Просмотреть файл

@ -68,10 +68,7 @@ key1, Key2 = webservice.get_keys()
# access_token, refresh_after = webservice.get_token()
# If (key) auth is enabled, don't forget to add key to the HTTP header.
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer " + key1,
}
headers = {"Content-Type": "application/json", "Authorization": "Bearer " + key1}
# # If token auth is enabled, don't forget to add token to the HTTP header.
# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + access_token}

Просмотреть файл

@ -10,11 +10,7 @@ from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import PyTorchConfiguration
TARGET_GPU_COUNT = {
"gpu-V100-1": 1,
"gpu-V100-2": 2,
"gpu-V100-4": 4,
}
TARGET_GPU_COUNT = {"gpu-V100-1": 1, "gpu-V100-2": 2, "gpu-V100-4": 4}
@dataclass
@ -92,7 +88,7 @@ if __name__ == "__main__":
target_names = [
# "gpu-V100-1", # single GPU
# "gpu-V100-2", # two GPUs
"gpu-V100-4", # four GPUs
"gpu-V100-4" # four GPUs
]
# https://huggingface.co/transformers/pretrained_models.html

Просмотреть файл

@ -13,10 +13,7 @@ from typing import Any, Union, Dict, Callable
from datasets import load_dataset, load_metric
from datasets import DatasetDict, Dataset, Metric # used for typing
from torch.utils.data.dataset import Dataset
from transformers import (
PreTrainedTokenizerBase,
AutoTokenizer,
)
from transformers import PreTrainedTokenizerBase, AutoTokenizer
logger = logging.getLogger(__name__)
@ -113,8 +110,7 @@ def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]:
def load_encoded_glue_dataset(
task: str,
tokenizer: PreTrainedTokenizerBase,
task: str, tokenizer: PreTrainedTokenizerBase
) -> Union[DatasetDict, Dataset]:
"""Load GLUE data, apply tokenizer and split into train/validation."""
tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task)

Просмотреть файл

@ -154,9 +154,7 @@ def main():
kwargs = {"batch_size": args.batch_size}
if use_cuda:
kwargs.update(
{"num_workers": 1, "pin_memory": True, "shuffle": True},
)
kwargs.update({"num_workers": 1, "pin_memory": True, "shuffle": True})
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]

Просмотреть файл

@ -61,8 +61,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-gpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
env.docker.base_image = (
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
@ -72,8 +71,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-cpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
return env

Просмотреть файл

@ -53,8 +53,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-gpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
env.docker.base_image = (
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
@ -64,8 +63,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-cpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
return env

Просмотреть файл

@ -50,8 +50,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-gpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
env.docker.base_image = (
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
@ -61,8 +60,7 @@ def transformers_environment(use_gpu=True):
env_name = "transformers-cpu"
env = Environment.from_pip_requirements(
name=env_name,
file_path=pip_requirements_path,
name=env_name, file_path=pip_requirements_path
)
return env

Просмотреть файл

@ -13,10 +13,7 @@ from typing import Any, Union, Dict, Callable
from datasets import load_dataset, load_metric
from datasets import DatasetDict, Dataset, Metric # used for typing
from torch.utils.data.dataset import Dataset
from transformers import (
PreTrainedTokenizerBase,
AutoTokenizer,
)
from transformers import PreTrainedTokenizerBase, AutoTokenizer
logger = logging.getLogger(__name__)
@ -113,8 +110,7 @@ def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]:
def load_encoded_glue_dataset(
task: str,
tokenizer: PreTrainedTokenizerBase,
task: str, tokenizer: PreTrainedTokenizerBase
) -> Union[DatasetDict, Dataset]:
"""Load GLUE data, apply tokenizer and split into train/validation."""
tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task)

Просмотреть файл

@ -15,10 +15,7 @@ prefix = Path(__file__).parent
script_dir = str(prefix.joinpath("src"))
script_name = "train.py"
arguments = [
"--compute",
"CPU", # set to GPU for accelerated training
]
arguments = ["--compute", "CPU"] # set to GPU for accelerated training
# environment file
environment_file = str(prefix.joinpath("requirements.txt"))

Просмотреть файл

@ -174,15 +174,11 @@ def checkVisionTrainRun(child_runs, expected_min_score, expected_max_score):
)
print("Primary metric value of {}: {}".format(hd_run.id, best_metric))
lower_err_msg = (
"Primary metric value was lower than the expected min value of {}".format(
expected_min_score
)
lower_err_msg = "Primary metric value was lower than the expected min value of {}".format(
expected_min_score
)
higher_err_msg = (
"Primary metric value was higher than the expected max value of {}".format(
expected_max_score
)
higher_err_msg = "Primary metric value was higher than the expected max value of {}".format(
expected_max_score
)
assert best_metric >= expected_min_score, lower_err_msg
assert best_metric <= expected_max_score, higher_err_msg

Просмотреть файл

@ -79,13 +79,9 @@ def score_data(
(Path(score_output) / "score").write_text("scored with {}".format(model))
@command_component(
display_name="Evaluate",
environment=conda_env,
)
@command_component(display_name="Evaluate", environment=conda_env)
def eval_model(
scoring_result: Input(type="uri_folder"),
eval_output: Output(type="uri_folder"),
scoring_result: Input(type="uri_folder"), eval_output: Output(type="uri_folder")
):
"""A dummy evaluate component."""

Просмотреть файл

@ -1,13 +1,9 @@
from mldesigner import command_component, Input, Output
@command_component(
display_name="Eval Model",
version="0.0.9",
)
@command_component(display_name="Eval Model", version="0.0.9")
def eval_model(
scoring_result: Input(type="uri_folder"),
eval_output: Output(type="uri_folder"),
scoring_result: Input(type="uri_folder"), eval_output: Output(type="uri_folder")
):
"""A dummy eval component defined by dsl component."""

Просмотреть файл

@ -17,10 +17,7 @@ args = parser.parse_args()
print("hello training world...")
lines = [
f"Raw data path: {args.raw_data}",
f"Data output path: {args.prep_data}",
]
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
for line in lines:
print(line)

Просмотреть файл

@ -235,8 +235,9 @@ def get_dali_train_loader(dali_cpu=False):
pipe, size=int(pipe.epoch_size("Reader") / world_size)
)
return DALIWrapper(train_loader, num_classes, one_hot), int(
pipe.epoch_size("Reader") / (world_size * batch_size)
return (
DALIWrapper(train_loader, num_classes, one_hot),
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
)
return gdtl
@ -275,8 +276,9 @@ def get_dali_val_loader():
pipe, size=int(pipe.epoch_size("Reader") / world_size)
)
return DALIWrapper(val_loader, num_classes, one_hot), int(
pipe.epoch_size("Reader") / (world_size * batch_size)
return (
DALIWrapper(val_loader, num_classes, one_hot),
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
)
return gdvl
@ -385,10 +387,7 @@ def get_pytorch_train_loader(
train_dataset = datasets.ImageFolder(
traindir,
transforms.Compose(
[
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
]
[transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()]
),
)
@ -409,8 +408,9 @@ def get_pytorch_train_loader(
drop_last=True,
)
return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(
train_loader
return (
PrefetchedWrapper(train_loader, num_classes, fp16, one_hot),
len(train_loader),
)
@ -425,13 +425,7 @@ def get_pytorch_val_loader(
):
valdir = os.path.join(data_path, "val")
val_dataset = datasets.ImageFolder(
valdir,
transforms.Compose(
[
transforms.Resize(256),
transforms.CenterCrop(224),
]
),
valdir, transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
)
if torch.distributed.is_initialized():

Просмотреть файл

@ -257,7 +257,7 @@ def lr_exponential_policy(
lr = base_lr * (epoch + 1) / warmup_length
else:
e = epoch - warmup_length
lr = base_lr * (epoch_decay**e)
lr = base_lr * (epoch_decay ** e)
return lr
return lr_policy(_lr_fn, logger=logger)

Просмотреть файл

@ -44,9 +44,7 @@ def find_image_subfolder(current_root):
def build_image_datasets(
train_images_dir: str,
valid_images_dir: str,
input_size: int = 224,
train_images_dir: str, valid_images_dir: str, input_size: int = 224
):
"""
Args:

Просмотреть файл

@ -10,9 +10,7 @@ import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
MODEL_ARCH_LIST = [
"resnet18",
]
MODEL_ARCH_LIST = ["resnet18"]
def load_model(model_arch: str, output_dimension: int = 1, pretrained: bool = True):

Просмотреть файл

@ -613,8 +613,7 @@ def run(args):
# PROFILER: here we use a helper class to enable profiling
# see profiling.py for the implementation details
training_profiler = PyTorchProfilerHandler(
enabled=bool(args.enable_profiling),
rank=training_handler.world_rank,
enabled=bool(args.enable_profiling), rank=training_handler.world_rank
)
# PROFILER: set profiler in trainer to call profiler.step() during training
training_handler.profiler = training_profiler.start_profiler()