deploy-arm-templates api version update (#1458)
* updating api versions * addressing comments * black formatting Co-authored-by: Raghu Ramaswamy <raghura@microsoft.com>
This commit is contained in:
Родитель
6429cbea2b
Коммит
d75447d655
|
@ -98,12 +98,7 @@ if __name__ == "__main__":
|
|||
required=False,
|
||||
help="Output AMLARC-compatible file, if not provides, " "replace file inplace",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--compute-target",
|
||||
required=False,
|
||||
help="Compute target",
|
||||
)
|
||||
parser.add_argument("-c", "--compute-target", required=False, help="Compute target")
|
||||
parser.add_argument("-it", "--instance-type", required=False, help="Instance type")
|
||||
parser.add_argument(
|
||||
"-cr",
|
||||
|
|
|
@ -41,12 +41,7 @@ def check_test_case(input_file):
|
|||
if __name__ == "__main__":
|
||||
# Parse command line arguments
|
||||
parser = argparse.ArgumentParser(description="Check all papermill output files.")
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input",
|
||||
required=True,
|
||||
help="job ipynb file list",
|
||||
)
|
||||
parser.add_argument("-i", "--input", required=True, help="job ipynb file list")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
@ -34,10 +34,7 @@ if __name__ == "__main__":
|
|||
)
|
||||
parser.add_argument("-r", "--regex", required=True, help="test case name selector")
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
required=False,
|
||||
help="the file selected test case send to",
|
||||
"-o", "--output", required=False, help="the file selected test case send to"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.MachineLearningServices/workspaces/codes/versions",
|
||||
"apiVersion": "2021-10-01",
|
||||
"apiVersion": "2022-05-01",
|
||||
"name": "[concat(parameters('workspaceName'), '/', parameters('codeAssetName'), '/', parameters('codeAssetVersion'))]",
|
||||
"properties": {
|
||||
"description": "[parameters('codeAssetDescription')]",
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.MachineLearningServices/workspaces/environments/versions",
|
||||
"apiVersion": "2021-10-01",
|
||||
"apiVersion": "2022-05-01",
|
||||
"name": "[concat(parameters('workspaceName'), '/', parameters('environmentAssetName'), '/', parameters('environmentAssetVersion'))]",
|
||||
"properties": {
|
||||
"isAnonymous": "[parameters('isAnonymous')]",
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.MachineLearningServices/workspaces/models/versions",
|
||||
"apiVersion": "2021-10-01",
|
||||
"apiVersion": "2022-05-01",
|
||||
"name": "[concat(parameters('workspaceName'), '/', parameters('modelAssetName'), '/', parameters('modelAssetVersion'))]",
|
||||
"properties": {
|
||||
"description": "[parameters('modelDescription')]",
|
||||
|
|
|
@ -82,7 +82,7 @@
|
|||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.MachineLearningServices/workspaces/onlineEndpoints/deployments",
|
||||
"apiVersion": "2021-10-01",
|
||||
"apiVersion": "2022-05-01",
|
||||
"name": "[concat(parameters('workspaceName'), '/', parameters('onlineEndpointName'),'/', parameters('onlineDeploymentName'))]",
|
||||
"location": "[parameters('location')]",
|
||||
"tags": "[parameters('onlineDeploymentTags')]",
|
||||
|
|
|
@ -55,7 +55,7 @@
|
|||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.MachineLearningServices/workspaces/onlineEndpoints",
|
||||
"apiVersion": "2021-10-01",
|
||||
"apiVersion": "2022-05-01",
|
||||
"name": "[concat(parameters('workspaceName'), '/', parameters('onlineEndpointName'))]",
|
||||
"location": "[parameters('location')]",
|
||||
"tags": "[parameters('onlineEndpointTags')]",
|
||||
|
|
|
@ -29,10 +29,7 @@ def create_jsonl_files(uri_folder_data_path):
|
|||
)
|
||||
|
||||
# Baseline of json line dictionary
|
||||
json_line_sample = {
|
||||
"image_url": uri_folder_data_path,
|
||||
"label": "",
|
||||
}
|
||||
json_line_sample = {"image_url": uri_folder_data_path, "label": ""}
|
||||
|
||||
index = 0
|
||||
# Scan each sub directary and generate a jsonl line per image, distributed on train and valid JSONL files
|
||||
|
|
|
@ -29,10 +29,7 @@ def create_jsonl_files(uri_folder_data_path):
|
|||
)
|
||||
|
||||
# Baseline of json line dictionary
|
||||
json_line_sample = {
|
||||
"image_url": uri_folder_data_path,
|
||||
"label": [],
|
||||
}
|
||||
json_line_sample = {"image_url": uri_folder_data_path, "label": []}
|
||||
|
||||
# Path to the labels file.
|
||||
labelFile = os.path.join(src_images, "labels.csv")
|
||||
|
|
|
@ -235,8 +235,9 @@ def get_dali_train_loader(dali_cpu=False):
|
|||
pipe, size=int(pipe.epoch_size("Reader") / world_size)
|
||||
)
|
||||
|
||||
return DALIWrapper(train_loader, num_classes, one_hot), int(
|
||||
pipe.epoch_size("Reader") / (world_size * batch_size)
|
||||
return (
|
||||
DALIWrapper(train_loader, num_classes, one_hot),
|
||||
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
|
||||
)
|
||||
|
||||
return gdtl
|
||||
|
@ -275,8 +276,9 @@ def get_dali_val_loader():
|
|||
pipe, size=int(pipe.epoch_size("Reader") / world_size)
|
||||
)
|
||||
|
||||
return DALIWrapper(val_loader, num_classes, one_hot), int(
|
||||
pipe.epoch_size("Reader") / (world_size * batch_size)
|
||||
return (
|
||||
DALIWrapper(val_loader, num_classes, one_hot),
|
||||
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
|
||||
)
|
||||
|
||||
return gdvl
|
||||
|
@ -385,10 +387,7 @@ def get_pytorch_train_loader(
|
|||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose(
|
||||
[
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
]
|
||||
[transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()]
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -409,8 +408,9 @@ def get_pytorch_train_loader(
|
|||
drop_last=True,
|
||||
)
|
||||
|
||||
return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(
|
||||
train_loader
|
||||
return (
|
||||
PrefetchedWrapper(train_loader, num_classes, fp16, one_hot),
|
||||
len(train_loader),
|
||||
)
|
||||
|
||||
|
||||
|
@ -425,13 +425,7 @@ def get_pytorch_val_loader(
|
|||
):
|
||||
valdir = os.path.join(data_path, "val")
|
||||
val_dataset = datasets.ImageFolder(
|
||||
valdir,
|
||||
transforms.Compose(
|
||||
[
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
]
|
||||
),
|
||||
valdir, transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
|
||||
)
|
||||
|
||||
if torch.distributed.is_initialized():
|
||||
|
|
|
@ -257,7 +257,7 @@ def lr_exponential_policy(
|
|||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
lr = base_lr * (epoch_decay**e)
|
||||
lr = base_lr * (epoch_decay ** e)
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
|
|
@ -17,10 +17,7 @@ args = parser.parse_args()
|
|||
|
||||
print("hello training world...")
|
||||
|
||||
lines = [
|
||||
f"Raw data path: {args.raw_data}",
|
||||
f"Data output path: {args.prep_data}",
|
||||
]
|
||||
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
|
|
@ -72,12 +72,7 @@ def main(args):
|
|||
create_rai_tool_directories(incoming_dir)
|
||||
_logger.info("Saved empty RAI Insights input to temporary directory")
|
||||
|
||||
insight_paths = [
|
||||
args.insight_1,
|
||||
args.insight_2,
|
||||
args.insight_3,
|
||||
args.insight_4,
|
||||
]
|
||||
insight_paths = [args.insight_1, args.insight_2, args.insight_3, args.insight_4]
|
||||
|
||||
included_tools: Dict[str, bool] = {
|
||||
RAIToolType.CAUSAL: False,
|
||||
|
|
|
@ -160,10 +160,7 @@ def copy_insight_to_raiinsights(
|
|||
|
||||
src_dir = insight_dir / tool_dir_name / tool_dir_items[0].parts[-1]
|
||||
dst_dir = rai_insights_dir / tool_dir_name / tool_dir_items[0].parts[-1]
|
||||
shutil.copytree(
|
||||
src=src_dir,
|
||||
dst=dst_dir,
|
||||
)
|
||||
shutil.copytree(src=src_dir, dst=dst_dir)
|
||||
|
||||
_logger.info("Copy complete")
|
||||
return tool_type
|
||||
|
@ -186,9 +183,7 @@ def save_to_output_port(rai_i: RAIInsights, output_port_path: str, tool_type: st
|
|||
|
||||
_logger.info("Starting copy")
|
||||
shutil.copytree(
|
||||
pathlib.Path(tmpdirname) / tool_dir_name,
|
||||
target_path,
|
||||
dirs_exist_ok=True,
|
||||
pathlib.Path(tmpdirname) / tool_dir_name, target_path, dirs_exist_ok=True
|
||||
)
|
||||
_logger.info("Copied to output")
|
||||
|
||||
|
|
|
@ -17,10 +17,7 @@ args = parser.parse_args()
|
|||
|
||||
print("hello training world...")
|
||||
|
||||
lines = [
|
||||
f"Raw data path: {args.raw_data}",
|
||||
f"Data output path: {args.prep_data}",
|
||||
]
|
||||
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
|
|
@ -31,8 +31,7 @@ mlflow.log_text(str(c), "dask_cluster1")
|
|||
# read in the data from the provided file dataset (which is mounted at the same
|
||||
# location on all nodes of the job)
|
||||
df = dd.read_csv(
|
||||
f"{dataset}/*.csv",
|
||||
parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"],
|
||||
f"{dataset}/*.csv", parse_dates=["tpep_pickup_datetime", "tpep_dropoff_datetime"]
|
||||
)
|
||||
|
||||
# as an alternative, the below would be using abfs
|
||||
|
|
|
@ -7,17 +7,8 @@ import argparse
|
|||
# define constants
|
||||
EXCLUDED_JOBS = ["java"]
|
||||
EXCLUDED_ENDPOINTS = ["batch", "online", "amlarc"]
|
||||
EXCLUDED_RESOURCES = [
|
||||
"workspace",
|
||||
"datastore",
|
||||
"vm-attach",
|
||||
"instance",
|
||||
"connections",
|
||||
]
|
||||
EXCLUDED_ASSETS = [
|
||||
"conda-yamls",
|
||||
"mlflow-models",
|
||||
]
|
||||
EXCLUDED_RESOURCES = ["workspace", "datastore", "vm-attach", "instance", "connections"]
|
||||
EXCLUDED_ASSETS = ["conda-yamls", "mlflow-models"]
|
||||
EXCLUDED_SCRIPTS = ["setup", "cleanup", "run-job"]
|
||||
BRANCH = "main" # default - do not change
|
||||
# BRANCH = "sdk-preview" # this should be deleted when this branch is merged to main
|
||||
|
|
|
@ -6,9 +6,9 @@ TOKEN=$(az account get-access-token --query accessToken -o tsv)
|
|||
|
||||
# <create_variables>
|
||||
SUBSCRIPTION_ID=$(az account show --query id | tr -d '\r"')
|
||||
LOCATION=$(az ml workspace show --query location | tr -d '\r"')
|
||||
RESOURCE_GROUP=$(az group show --query name | tr -d '\r"')
|
||||
WORKSPACE=$(az configure -l | jq -r '.[] | select(.name=="workspace") | .value')
|
||||
LOCATION=$(az ml workspace show| jq -r '.location')
|
||||
#</create_variables>
|
||||
|
||||
# <set_endpoint_name>
|
||||
|
@ -16,7 +16,7 @@ export ENDPOINT_NAME=endpoint-`echo $RANDOM`
|
|||
# </set_endpoint_name>
|
||||
|
||||
#<api_version>
|
||||
API_VERSION="2021-10-01"
|
||||
API_VERSION="2022-05-01"
|
||||
#</api_version>
|
||||
|
||||
echo -e "Using:\nSUBSCRIPTION_ID=$SUBSCRIPTION_ID\nLOCATION=$LOCATION\nRESOURCE_GROUP=$RESOURCE_GROUP\nWORKSPACE=$WORKSPACE"
|
||||
|
@ -58,7 +58,7 @@ export AZURE_STORAGE_ACCOUNT=$(echo $response | jq -r '.value[0].properties.acco
|
|||
# </get_storage_details>
|
||||
|
||||
# <upload_code>
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s cli/endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
|
||||
# </upload_code>
|
||||
|
||||
# <create_code>
|
||||
|
@ -71,7 +71,7 @@ codeUri="https://$AZURE_STORAGE_ACCOUNT.blob.core.windows.net/$AZUREML_DEFAULT_C
|
|||
# </create_code>
|
||||
|
||||
# <upload_model>
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s cli/endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
|
||||
# </upload_model>
|
||||
|
||||
# <create_model>
|
||||
|
@ -84,7 +84,7 @@ modelUri="azureml://subscriptions/$SUBSCRIPTION_ID/resourceGroups/$RESOURCE_GROU
|
|||
# </create_model>
|
||||
|
||||
# <read_condafile>
|
||||
CONDA_FILE=$(cat endpoints/online/model-1/environment/conda.yml)
|
||||
CONDA_FILE=$(cat cli/endpoints/online/model-1/environment/conda.yml)
|
||||
# <read_condafile>
|
||||
|
||||
# <create_environment>
|
||||
|
@ -131,7 +131,7 @@ az deployment group create -g $RESOURCE_GROUP \
|
|||
codeId="$resourceScope/workspaces/$WORKSPACE/codes/score-sklearn/versions/1" \
|
||||
scoringScript=score.py \
|
||||
environmentId="$resourceScope/workspaces/$WORKSPACE/environments/sklearn-env/versions/$ENV_VERSION" \
|
||||
model="$resourceScope/workspaces/$WORKSPACE/models/sklearn/versions/1" \
|
||||
model="$resourceScope/workspaces/$WORKSPACE/models/score-sklearn/versions/1" \
|
||||
endpointComputeType=Managed \
|
||||
skuName=Standard_F2s_v2 \
|
||||
skuCapacity=1
|
||||
|
@ -158,7 +158,7 @@ accessToken=$(echo $response | jq -r '.accessToken')
|
|||
curl --location --request POST $scoringUri \
|
||||
--header "Authorization: Bearer $accessToken" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data-raw @endpoints/online/model-1/sample-request.json
|
||||
--data-raw @cli/endpoints/online/model-1/sample-request.json
|
||||
# </score_endpoint>
|
||||
|
||||
# <get_deployment_logs>
|
||||
|
|
|
@ -11,7 +11,7 @@ schema='$schema'
|
|||
echo -e "Using:\nSUBSCRIPTION_ID=$SUBSCRIPTION_ID\nLOCATION=$LOCATION\nRESOURCE_GROUP=$RESOURCE_GROUP\nWORKSPACE=$WORKSPACE"
|
||||
|
||||
# <read_condafile>
|
||||
CONDA_FILE=$(< model/environment/conda.yml)
|
||||
CONDA_FILE=$(< cli/endpoints/online/model-1/environment/conda.yml)
|
||||
# <read_condafile>
|
||||
|
||||
#<get_access_token>
|
||||
|
@ -23,7 +23,7 @@ export ENDPOINT_NAME=endpt-`echo $RANDOM`
|
|||
# </set_endpoint_name>
|
||||
|
||||
#<api_version>
|
||||
API_VERSION="2021-10-01"
|
||||
API_VERSION="2022-05-01"
|
||||
#</api_version>
|
||||
|
||||
# define how to wait
|
||||
|
@ -62,7 +62,7 @@ export AZURE_STORAGE_ACCOUNT=$(echo $response | jq -r '.value[0].properties.acco
|
|||
# </get_storage_details>
|
||||
|
||||
# <upload_code>
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/score -s cli/endpoints/online/model-1/onlinescoring --account-name $AZURE_STORAGE_ACCOUNT
|
||||
# </upload_code>
|
||||
|
||||
# <create_code>
|
||||
|
@ -117,7 +117,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
[
|
||||
{
|
||||
\"type\": \"Microsoft.MachineLearningServices/workspaces/codes/versions\",
|
||||
\"apiVersion\": \"2021-10-01\",
|
||||
\"apiVersion\": \"$API_VERSION\",
|
||||
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'codeAssetName\'), \'/\', parameters(\'codeAssetVersion\'))]\",
|
||||
\"properties\": {
|
||||
\"description\": \"[parameters(\'codeAssetDescription\')]\",
|
||||
|
@ -148,7 +148,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
# <\create_code>
|
||||
|
||||
# <upload_model>
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
|
||||
az storage blob upload-batch -d $AZUREML_DEFAULT_CONTAINER/model -s cli/endpoints/online/model-1/model --account-name $AZURE_STORAGE_ACCOUNT
|
||||
# <\upload_model>
|
||||
|
||||
# <create_model>
|
||||
|
@ -234,7 +234,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
# <\create_model>
|
||||
|
||||
# <read_condafile>
|
||||
CONDA_FILE=$(cat endpoints/online/model-1/environment/conda.yml)
|
||||
CONDA_FILE=$(cat cli/endpoints/online/model-1/environment/conda.yml)
|
||||
# <read_condafile>
|
||||
|
||||
# <create_environment>
|
||||
|
@ -296,7 +296,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
[
|
||||
{
|
||||
\"type\": \"Microsoft.MachineLearningServices/workspaces/environments/versions\",
|
||||
\"apiVersion\": \"2021-10-01\",
|
||||
\"apiVersion\": \"$API_VERSION\",
|
||||
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'environmentAssetName\'), \'/\', parameters(\'environmentAssetVersion\'))]\",
|
||||
\"properties\": {
|
||||
\"description\": \"[parameters(\'environmentAssetDescription\')]\",
|
||||
|
@ -392,7 +392,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
[
|
||||
{
|
||||
\"type\": \"Microsoft.MachineLearningServices/workspaces/onlineEndpoints\",
|
||||
\"apiVersion\": \"2021-10-01\",
|
||||
\"apiVersion\": \"$API_VERSION\",
|
||||
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'onlineEndpointName\'))]\",
|
||||
\"location\": \"[parameters(\'location\')]\",
|
||||
\"tags\": \"[parameters(\'onlineEndpointTags\')]\",
|
||||
|
@ -527,7 +527,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
\"resources\": [
|
||||
{
|
||||
\"type\": \"Microsoft.MachineLearningServices/workspaces/onlineEndpoints/deployments\",
|
||||
\"apiVersion\": \"2021-10-01\",
|
||||
\"apiVersion\": \"$API_VERSION\",
|
||||
\"name\": \"[concat(parameters(\'workspaceName\'), \'/\', parameters(\'onlineEndpointName\'), \'/\', parameters(\'onlineDeploymentName\'))]\",
|
||||
\"location\": \"[parameters(\'location\')]\",
|
||||
\"tags\": \"[parameters(\'onlineDeploymentTags\')]\",
|
||||
|
@ -572,7 +572,7 @@ curl --location --request PUT "https://management.azure.com/subscriptions/$SUBSC
|
|||
\"value\": \"$resourceScope/workspaces/$WORKSPACE/environments/sklearn-env/versions/$ENV_VERSION\"
|
||||
},
|
||||
\"model\": {
|
||||
\"value\": \"$resourceScope/workspaces/$WORKSPACE/models/sklearn/versions/1\"
|
||||
\"value\": \"$resourceScope/workspaces/$WORKSPACE/models/score-sklearn/versions/1\"
|
||||
},
|
||||
\"endpointComputeType\": {
|
||||
\"value\": \"Managed\"
|
||||
|
@ -610,7 +610,7 @@ accessToken=$(echo $response | jq -r '.accessToken')
|
|||
curl --location --request POST $scoringUri \
|
||||
--header "Authorization: Bearer $accessToken" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data @endpoints/online/model-1/sample-request.json
|
||||
--data @cli/endpoints/online/model-1/sample-request.json
|
||||
# </score_endpoint>
|
||||
|
||||
# <get_deployment_logs>
|
||||
|
|
|
@ -27,15 +27,11 @@ input_sample = pd.DataFrame(
|
|||
"slope": 3,
|
||||
"ca": 0,
|
||||
"thal": 2,
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
output_sample = np.ndarray(
|
||||
[
|
||||
1,
|
||||
]
|
||||
)
|
||||
output_sample = np.ndarray([1])
|
||||
|
||||
MODEL = None
|
||||
|
||||
|
|
|
@ -100,16 +100,8 @@ def driver():
|
|||
test(args, model, device, test_loader)
|
||||
|
||||
if args.save_model:
|
||||
input_schema = Schema(
|
||||
[
|
||||
TensorSpec(np.dtype(np.uint8), (-1, 28, 28)),
|
||||
]
|
||||
)
|
||||
output_schema = Schema(
|
||||
[
|
||||
TensorSpec(np.dtype(np.float32), (-1, 10)),
|
||||
]
|
||||
)
|
||||
input_schema = Schema([TensorSpec(np.dtype(np.uint8), (-1, 28, 28))])
|
||||
output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 10))])
|
||||
signature = ModelSignature(inputs=input_schema, outputs=output_schema)
|
||||
mlflow.pytorch.log_model(model, "model", signature=signature)
|
||||
|
||||
|
|
|
@ -40,10 +40,7 @@ if __name__ == "__main__":
|
|||
}
|
||||
|
||||
file_name = os.path.join(
|
||||
os.path.abspath(os.path.dirname(__file__)),
|
||||
"..",
|
||||
"data",
|
||||
args.data_file,
|
||||
os.path.abspath(os.path.dirname(__file__)), "..", "data", args.data_file
|
||||
)
|
||||
test_sample = open(file_name, "rb").read()
|
||||
resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)
|
||||
|
|
|
@ -756,11 +756,7 @@ class InferenceServerClient:
|
|||
"""
|
||||
request_uri = "v2/systemsharedmemory/region/{}/register".format(quote(name))
|
||||
|
||||
register_request = {
|
||||
"key": key,
|
||||
"offset": offset,
|
||||
"byte_size": byte_size,
|
||||
}
|
||||
register_request = {"key": key, "offset": offset, "byte_size": byte_size}
|
||||
request_body = json.dumps(register_request)
|
||||
|
||||
response = self._post(
|
||||
|
@ -864,13 +860,7 @@ class InferenceServerClient:
|
|||
return json.loads(content)
|
||||
|
||||
def register_cuda_shared_memory(
|
||||
self,
|
||||
name,
|
||||
raw_handle,
|
||||
device_id,
|
||||
byte_size,
|
||||
headers=None,
|
||||
query_params=None,
|
||||
self, name, raw_handle, device_id, byte_size, headers=None, query_params=None
|
||||
):
|
||||
"""Request the server to register a system shared memory with the
|
||||
following specification.
|
||||
|
|
|
@ -25,12 +25,9 @@ def get_model_info():
|
|||
for model in repo_index:
|
||||
model_name = model["name"]
|
||||
model_version = model["version"]
|
||||
(
|
||||
input_meta,
|
||||
input_config,
|
||||
output_meta,
|
||||
output_config,
|
||||
) = parse_model_http(model_name=model_name, model_version=model_version)
|
||||
(input_meta, input_config, output_meta, output_config) = parse_model_http(
|
||||
model_name=model_name, model_version=model_version
|
||||
)
|
||||
print(
|
||||
f"Found model: {model_name}, version: {model_version}, \
|
||||
input meta: {input_meta}, input config: {input_config}, \
|
||||
|
@ -69,11 +66,7 @@ def parse_model_http(model_name, model_version=""):
|
|||
|
||||
|
||||
def triton_infer(
|
||||
input_mapping,
|
||||
model_name,
|
||||
binary_data=False,
|
||||
binary_output=False,
|
||||
class_count=0,
|
||||
input_mapping, model_name, binary_data=False, binary_output=False, class_count=0
|
||||
):
|
||||
"""Helper function for setting Triton inputs and executing a request
|
||||
|
||||
|
|
|
@ -65,14 +65,8 @@ v100computes = {
|
|||
}
|
||||
|
||||
akscomputes = {
|
||||
"aks-cpu-deploy": {
|
||||
"vm_size": "STANDARD_DS3_V2",
|
||||
"agent_count": 3,
|
||||
},
|
||||
"aks-gpu-deploy": {
|
||||
"vm_size": "STANDARD_NC6S_V3",
|
||||
"agent_count": 3,
|
||||
},
|
||||
"aks-cpu-deploy": {"vm_size": "STANDARD_DS3_V2", "agent_count": 3},
|
||||
"aks-gpu-deploy": {"vm_size": "STANDARD_NC6S_V3", "agent_count": 3},
|
||||
}
|
||||
|
||||
# create or get Workspace
|
||||
|
|
|
@ -149,12 +149,7 @@ def get_backtest_pipeline(
|
|||
inputs=[forecasts.as_mount()],
|
||||
outputs=[data_results],
|
||||
source_directory=PROJECT_FOLDER,
|
||||
arguments=[
|
||||
"--forecasts",
|
||||
forecasts,
|
||||
"--output-dir",
|
||||
data_results,
|
||||
],
|
||||
arguments=["--forecasts", forecasts, "--output-dir", data_results],
|
||||
runconfig=run_config,
|
||||
compute_target=compute_target,
|
||||
allow_reuse=False,
|
||||
|
|
|
@ -23,11 +23,7 @@ except ImportError:
|
|||
|
||||
|
||||
def infer_forecasting_dataset_tcn(
|
||||
X_test,
|
||||
y_test,
|
||||
model,
|
||||
output_path,
|
||||
output_dataset_name="results",
|
||||
X_test, y_test, model, output_path, output_dataset_name="results"
|
||||
):
|
||||
|
||||
y_pred, df_all = model.forecast(X_test, y_test)
|
||||
|
@ -71,10 +67,7 @@ def get_model(model_path, model_file_name):
|
|||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--model_name",
|
||||
type=str,
|
||||
dest="model_name",
|
||||
help="Model to be loaded",
|
||||
"--model_name", type=str, dest="model_name", help="Model to be loaded"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
@ -108,12 +101,7 @@ def get_args():
|
|||
return args
|
||||
|
||||
|
||||
def get_data(
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
):
|
||||
def get_data(run, fitted_model, target_column_name, test_dataset_name):
|
||||
|
||||
# get input dataset by name
|
||||
test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
|
||||
|
@ -159,10 +147,7 @@ if __name__ == "__main__":
|
|||
fitted_model = get_model(model_path, model_file_name)
|
||||
|
||||
X_test_df, y_test = get_data(
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
run, fitted_model, target_column_name, test_dataset_name
|
||||
)
|
||||
|
||||
infer_forecasting_dataset_tcn(
|
||||
|
|
|
@ -68,10 +68,7 @@ key1, Key2 = webservice.get_keys()
|
|||
# access_token, refresh_after = webservice.get_token()
|
||||
|
||||
# If (key) auth is enabled, don't forget to add key to the HTTP header.
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": "Bearer " + key1,
|
||||
}
|
||||
headers = {"Content-Type": "application/json", "Authorization": "Bearer " + key1}
|
||||
|
||||
# # If token auth is enabled, don't forget to add token to the HTTP header.
|
||||
# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + access_token}
|
||||
|
|
|
@ -10,11 +10,7 @@ from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment
|
|||
from azureml.core.runconfig import PyTorchConfiguration
|
||||
|
||||
|
||||
TARGET_GPU_COUNT = {
|
||||
"gpu-V100-1": 1,
|
||||
"gpu-V100-2": 2,
|
||||
"gpu-V100-4": 4,
|
||||
}
|
||||
TARGET_GPU_COUNT = {"gpu-V100-1": 1, "gpu-V100-2": 2, "gpu-V100-4": 4}
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -92,7 +88,7 @@ if __name__ == "__main__":
|
|||
target_names = [
|
||||
# "gpu-V100-1", # single GPU
|
||||
# "gpu-V100-2", # two GPUs
|
||||
"gpu-V100-4", # four GPUs
|
||||
"gpu-V100-4" # four GPUs
|
||||
]
|
||||
|
||||
# https://huggingface.co/transformers/pretrained_models.html
|
||||
|
|
|
@ -13,10 +13,7 @@ from typing import Any, Union, Dict, Callable
|
|||
from datasets import load_dataset, load_metric
|
||||
from datasets import DatasetDict, Dataset, Metric # used for typing
|
||||
from torch.utils.data.dataset import Dataset
|
||||
from transformers import (
|
||||
PreTrainedTokenizerBase,
|
||||
AutoTokenizer,
|
||||
)
|
||||
from transformers import PreTrainedTokenizerBase, AutoTokenizer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -113,8 +110,7 @@ def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]:
|
|||
|
||||
|
||||
def load_encoded_glue_dataset(
|
||||
task: str,
|
||||
tokenizer: PreTrainedTokenizerBase,
|
||||
task: str, tokenizer: PreTrainedTokenizerBase
|
||||
) -> Union[DatasetDict, Dataset]:
|
||||
"""Load GLUE data, apply tokenizer and split into train/validation."""
|
||||
tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task)
|
||||
|
|
|
@ -154,9 +154,7 @@ def main():
|
|||
|
||||
kwargs = {"batch_size": args.batch_size}
|
||||
if use_cuda:
|
||||
kwargs.update(
|
||||
{"num_workers": 1, "pin_memory": True, "shuffle": True},
|
||||
)
|
||||
kwargs.update({"num_workers": 1, "pin_memory": True, "shuffle": True})
|
||||
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
|
||||
|
|
|
@ -61,8 +61,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-gpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
env.docker.base_image = (
|
||||
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
|
||||
|
@ -72,8 +71,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-cpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
|
||||
return env
|
||||
|
|
|
@ -53,8 +53,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-gpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
env.docker.base_image = (
|
||||
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
|
||||
|
@ -64,8 +63,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-cpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
|
||||
return env
|
||||
|
|
|
@ -50,8 +50,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-gpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
env.docker.base_image = (
|
||||
"mcr.microsoft.com/azureml/intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04"
|
||||
|
@ -61,8 +60,7 @@ def transformers_environment(use_gpu=True):
|
|||
|
||||
env_name = "transformers-cpu"
|
||||
env = Environment.from_pip_requirements(
|
||||
name=env_name,
|
||||
file_path=pip_requirements_path,
|
||||
name=env_name, file_path=pip_requirements_path
|
||||
)
|
||||
|
||||
return env
|
||||
|
|
|
@ -13,10 +13,7 @@ from typing import Any, Union, Dict, Callable
|
|||
from datasets import load_dataset, load_metric
|
||||
from datasets import DatasetDict, Dataset, Metric # used for typing
|
||||
from torch.utils.data.dataset import Dataset
|
||||
from transformers import (
|
||||
PreTrainedTokenizerBase,
|
||||
AutoTokenizer,
|
||||
)
|
||||
from transformers import PreTrainedTokenizerBase, AutoTokenizer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -113,8 +110,7 @@ def load_raw_glue_dataset(task: str) -> Union[DatasetDict, Dataset]:
|
|||
|
||||
|
||||
def load_encoded_glue_dataset(
|
||||
task: str,
|
||||
tokenizer: PreTrainedTokenizerBase,
|
||||
task: str, tokenizer: PreTrainedTokenizerBase
|
||||
) -> Union[DatasetDict, Dataset]:
|
||||
"""Load GLUE data, apply tokenizer and split into train/validation."""
|
||||
tokenizer_func = construct_tokenizer_function(tokenizer=tokenizer, task=task)
|
||||
|
|
|
@ -15,10 +15,7 @@ prefix = Path(__file__).parent
|
|||
script_dir = str(prefix.joinpath("src"))
|
||||
script_name = "train.py"
|
||||
|
||||
arguments = [
|
||||
"--compute",
|
||||
"CPU", # set to GPU for accelerated training
|
||||
]
|
||||
arguments = ["--compute", "CPU"] # set to GPU for accelerated training
|
||||
|
||||
# environment file
|
||||
environment_file = str(prefix.joinpath("requirements.txt"))
|
||||
|
|
|
@ -174,15 +174,11 @@ def checkVisionTrainRun(child_runs, expected_min_score, expected_max_score):
|
|||
)
|
||||
print("Primary metric value of {}: {}".format(hd_run.id, best_metric))
|
||||
|
||||
lower_err_msg = (
|
||||
"Primary metric value was lower than the expected min value of {}".format(
|
||||
expected_min_score
|
||||
)
|
||||
lower_err_msg = "Primary metric value was lower than the expected min value of {}".format(
|
||||
expected_min_score
|
||||
)
|
||||
higher_err_msg = (
|
||||
"Primary metric value was higher than the expected max value of {}".format(
|
||||
expected_max_score
|
||||
)
|
||||
higher_err_msg = "Primary metric value was higher than the expected max value of {}".format(
|
||||
expected_max_score
|
||||
)
|
||||
assert best_metric >= expected_min_score, lower_err_msg
|
||||
assert best_metric <= expected_max_score, higher_err_msg
|
||||
|
|
|
@ -79,13 +79,9 @@ def score_data(
|
|||
(Path(score_output) / "score").write_text("scored with {}".format(model))
|
||||
|
||||
|
||||
@command_component(
|
||||
display_name="Evaluate",
|
||||
environment=conda_env,
|
||||
)
|
||||
@command_component(display_name="Evaluate", environment=conda_env)
|
||||
def eval_model(
|
||||
scoring_result: Input(type="uri_folder"),
|
||||
eval_output: Output(type="uri_folder"),
|
||||
scoring_result: Input(type="uri_folder"), eval_output: Output(type="uri_folder")
|
||||
):
|
||||
"""A dummy evaluate component."""
|
||||
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
from mldesigner import command_component, Input, Output
|
||||
|
||||
|
||||
@command_component(
|
||||
display_name="Eval Model",
|
||||
version="0.0.9",
|
||||
)
|
||||
@command_component(display_name="Eval Model", version="0.0.9")
|
||||
def eval_model(
|
||||
scoring_result: Input(type="uri_folder"),
|
||||
eval_output: Output(type="uri_folder"),
|
||||
scoring_result: Input(type="uri_folder"), eval_output: Output(type="uri_folder")
|
||||
):
|
||||
"""A dummy eval component defined by dsl component."""
|
||||
|
||||
|
|
|
@ -17,10 +17,7 @@ args = parser.parse_args()
|
|||
|
||||
print("hello training world...")
|
||||
|
||||
lines = [
|
||||
f"Raw data path: {args.raw_data}",
|
||||
f"Data output path: {args.prep_data}",
|
||||
]
|
||||
lines = [f"Raw data path: {args.raw_data}", f"Data output path: {args.prep_data}"]
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
|
|
@ -235,8 +235,9 @@ def get_dali_train_loader(dali_cpu=False):
|
|||
pipe, size=int(pipe.epoch_size("Reader") / world_size)
|
||||
)
|
||||
|
||||
return DALIWrapper(train_loader, num_classes, one_hot), int(
|
||||
pipe.epoch_size("Reader") / (world_size * batch_size)
|
||||
return (
|
||||
DALIWrapper(train_loader, num_classes, one_hot),
|
||||
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
|
||||
)
|
||||
|
||||
return gdtl
|
||||
|
@ -275,8 +276,9 @@ def get_dali_val_loader():
|
|||
pipe, size=int(pipe.epoch_size("Reader") / world_size)
|
||||
)
|
||||
|
||||
return DALIWrapper(val_loader, num_classes, one_hot), int(
|
||||
pipe.epoch_size("Reader") / (world_size * batch_size)
|
||||
return (
|
||||
DALIWrapper(val_loader, num_classes, one_hot),
|
||||
int(pipe.epoch_size("Reader") / (world_size * batch_size)),
|
||||
)
|
||||
|
||||
return gdvl
|
||||
|
@ -385,10 +387,7 @@ def get_pytorch_train_loader(
|
|||
train_dataset = datasets.ImageFolder(
|
||||
traindir,
|
||||
transforms.Compose(
|
||||
[
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
]
|
||||
[transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip()]
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -409,8 +408,9 @@ def get_pytorch_train_loader(
|
|||
drop_last=True,
|
||||
)
|
||||
|
||||
return PrefetchedWrapper(train_loader, num_classes, fp16, one_hot), len(
|
||||
train_loader
|
||||
return (
|
||||
PrefetchedWrapper(train_loader, num_classes, fp16, one_hot),
|
||||
len(train_loader),
|
||||
)
|
||||
|
||||
|
||||
|
@ -425,13 +425,7 @@ def get_pytorch_val_loader(
|
|||
):
|
||||
valdir = os.path.join(data_path, "val")
|
||||
val_dataset = datasets.ImageFolder(
|
||||
valdir,
|
||||
transforms.Compose(
|
||||
[
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
]
|
||||
),
|
||||
valdir, transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224)])
|
||||
)
|
||||
|
||||
if torch.distributed.is_initialized():
|
||||
|
|
|
@ -257,7 +257,7 @@ def lr_exponential_policy(
|
|||
lr = base_lr * (epoch + 1) / warmup_length
|
||||
else:
|
||||
e = epoch - warmup_length
|
||||
lr = base_lr * (epoch_decay**e)
|
||||
lr = base_lr * (epoch_decay ** e)
|
||||
return lr
|
||||
|
||||
return lr_policy(_lr_fn, logger=logger)
|
||||
|
|
|
@ -44,9 +44,7 @@ def find_image_subfolder(current_root):
|
|||
|
||||
|
||||
def build_image_datasets(
|
||||
train_images_dir: str,
|
||||
valid_images_dir: str,
|
||||
input_size: int = 224,
|
||||
train_images_dir: str, valid_images_dir: str, input_size: int = 224
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
|
|
|
@ -10,9 +10,7 @@ import torch.nn as nn
|
|||
import torch.optim as optim
|
||||
import torchvision.models as models
|
||||
|
||||
MODEL_ARCH_LIST = [
|
||||
"resnet18",
|
||||
]
|
||||
MODEL_ARCH_LIST = ["resnet18"]
|
||||
|
||||
|
||||
def load_model(model_arch: str, output_dimension: int = 1, pretrained: bool = True):
|
||||
|
|
|
@ -613,8 +613,7 @@ def run(args):
|
|||
# PROFILER: here we use a helper class to enable profiling
|
||||
# see profiling.py for the implementation details
|
||||
training_profiler = PyTorchProfilerHandler(
|
||||
enabled=bool(args.enable_profiling),
|
||||
rank=training_handler.world_rank,
|
||||
enabled=bool(args.enable_profiling), rank=training_handler.world_rank
|
||||
)
|
||||
# PROFILER: set profiler in trainer to call profiler.step() during training
|
||||
training_handler.profiler = training_profiler.start_profiler()
|
||||
|
|
Загрузка…
Ссылка в новой задаче