From 0232b499f2035a33bce31984bef733a0bdd9d00b Mon Sep 17 00:00:00 2001 From: Marvin Buss Date: Fri, 24 Apr 2020 14:14:13 +0200 Subject: [PATCH] added input validation --- .cloud/.azure/workspace.json | 3 +- Dockerfile | 2 +- code/main.py | 27 ++++++--- code/schemas/azure_credential_schema.json | 26 ++++++++ code/schemas/workspace_schema.json | 74 +++++++++++++++++++++++ code/utils.py | 32 ++++++---- 6 files changed, 141 insertions(+), 23 deletions(-) create mode 100644 code/schemas/azure_credential_schema.json create mode 100644 code/schemas/workspace_schema.json diff --git a/.cloud/.azure/workspace.json b/.cloud/.azure/workspace.json index 0a35d50..432de4b 100644 --- a/.cloud/.azure/workspace.json +++ b/.cloud/.azure/workspace.json @@ -11,6 +11,5 @@ "app_insights": "Microsoft.Insights/components/", "container_registry": "Microsoft.ContainerRegistry/registries/", "cmk_key_vault": "Microsoft.KeyVault/vaults/", - "resource_cmk_uri": "", - "hbi_workspace": false + "resource_cmk_uri": "https://" } diff --git a/Dockerfile b/Dockerfile index f6f42db..00e8623 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM marvinbuss/aml-docker:1.1.5 +FROM marvinbuss/aml-docker:1.1.5.1 LABEL maintainer="azure/gh-aml" diff --git a/code/main.py b/code/main.py index 5a51317..4483f67 100644 --- a/code/main.py +++ b/code/main.py @@ -7,13 +7,12 @@ from azureml.core.authentication import ServicePrincipalAuthentication from adal.adal_error import AdalError from msrest.exceptions import AuthenticationError from json import JSONDecodeError -from utils import AMLConfigurationException, required_parameters_provided, mask_parameter +from utils import AMLConfigurationException, mask_parameter, load_json, validate_json def main(): # Loading input values - print("::debug::Loading input values") - parameters_file = os.environ.get("INPUT_PARAMETERS_FILE", default="workspace.json") + print("::debug::Loading azure credentials") azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}") try: azure_credentials = json.loads(azure_credentials) @@ -23,10 +22,11 @@ def main(): # Checking provided parameters print("::debug::Checking provided parameters") - required_parameters_provided( - parameters=azure_credentials, - keys=["tenantId", "clientId", "clientSecret", "subscriptionId"], - message="Required parameter(s) not found in your azure credentials saved in AZURE_CREDENTIALS secret for logging in to the workspace. Please provide a value for the following key(s): " + azure_credentials_schema = load_json(path=os.path.join("code", "schemas", "azure_credential_schema.json")) + validate_json( + data=azure_credentials, + schema=azure_credentials_schema, + input="AZURE_CREDENTIALS" ) # Mask values @@ -38,13 +38,22 @@ def main(): # Loading parameters file print("::debug::Loading parameters file") + parameters_file = os.environ.get("INPUT_PARAMETERS_FILE", default="workspace.json") parameters_file_path = os.path.join(".cloud", ".azure", parameters_file) try: - with open(parameters_file_path) as f: - parameters = json.load(f) + parameters = load_json(path=parameters_file_path) except FileNotFoundError: print(f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/workspace.json).") parameters = {} + + # Checking provided parameters + print("::debug::Checking provided parameters") + parameters_schema = load_json(path=os.path.join("code", "schemas", "workspace_schema.json")) + validate_json( + data=parameters, + schema=parameters_schema, + input="PARAMETERS_FILE" + ) # Loading Workspace sp_auth = ServicePrincipalAuthentication( diff --git a/code/schemas/azure_credential_schema.json b/code/schemas/azure_credential_schema.json new file mode 100644 index 0000000..5f7af9f --- /dev/null +++ b/code/schemas/azure_credential_schema.json @@ -0,0 +1,26 @@ +{ + "$id": "http://azure-ml.com/schemas/azure_credentials.json", + "$schema": "http://json-schema.org/schema", + "title": "azure_credentials", + "description": "JSON specification for your azure credentials", + "type": "object", + "required": ["clientId", "clientSecret", "subscriptionId", "tenantId"], + "properties": { + "clientId": { + "type": "string", + "description": "The client ID of the service principal." + }, + "clientSecret": { + "type": "string", + "description": "The client secret of the service principal." + }, + "subscriptionId": { + "type": "string", + "description": "The subscription ID that should be used." + }, + "tenantId": { + "type": "string", + "description": "The tenant ID of the service principal." + } + } +} \ No newline at end of file diff --git a/code/schemas/workspace_schema.json b/code/schemas/workspace_schema.json new file mode 100644 index 0000000..3a0ed79 --- /dev/null +++ b/code/schemas/workspace_schema.json @@ -0,0 +1,74 @@ +{ + "$id": "http://azure-ml.com/schemas/workspace.json", + "$schema": "http://json-schema.org/schema", + "title": "aml-workspace", + "description": "JSON specification for your workspace details", + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The workspace name.", + "minLength": 2, + "maxLength": 32 + }, + "resource_group": { + "type": "string", + "description": "The Azure resource group that contains the workspace." + }, + "create_workspace": { + "type": "boolean", + "description": "Indicates whether to create the workspace if it doesn't exist." + }, + "friendly_name": { + "type": "string", + "description": "A friendly name for the workspace that can be displayed in the UI." + }, + "create_resource_group": { + "type": "boolean", + "description": "Indicates whether to create the resource group if it doesn't exist." + }, + "location": { + "type": "string", + "description": "The location of the workspace." + }, + "sku": { + "type": "string", + "description": "The SKU name (also referred as edition).", + "pattern": "basic|enterprise" + }, + "storage_account": { + "type": "string", + "description": "An existing storage account in the Azure resource ID format.", + "pattern": "Microsoft.Storage\/storageAccounts\/.+" + }, + "key_vault": { + "type": "string", + "description": "An existing key vault in the Azure resource ID format.", + "pattern": "Microsoft.KeyVault\/vaults\/.+" + }, + "app_insights": { + "type": "string", + "description": "An existing Application Insights in the Azure resource ID format.", + "pattern": "Microsoft.Insights\/components\/.+" + }, + "container_registry": { + "type": "string", + "description": "An existing container registry in the Azure resource ID format.", + "pattern": "Microsoft.ContainerRegistry\/registries\/.+" + }, + "cmk_key_vault": { + "type": "string", + "description": "The key vault containing the customer managed key in the Azure resource ID format.", + "pattern": "Microsoft.KeyVault\/vaults\/.+" + }, + "resource_cmk_uri": { + "type": "string", + "description": "The key URI of the customer managed key to encrypt the data at rest.", + "pattern": "https:\/\/.+" + }, + "hbi_workspace": { + "type": "boolean", + "description": "Specifies whether the customer data is of High Business Impact(HBI), i.e., contains sensitive business information." + } + } +} \ No newline at end of file diff --git a/code/utils.py b/code/utils.py index 2a98e91..0c88c21 100644 --- a/code/utils.py +++ b/code/utils.py @@ -1,17 +1,27 @@ +import json +import jsonschema + + class AMLConfigurationException(Exception): pass -def required_parameters_provided(parameters, keys, message="Required parameter not found in your parameters file. Please provide a value for the following key(s): "): - missing_keys = [] - for key in keys: - if key not in parameters: - err_msg = f"{message} {key}" - print(f"::error::{err_msg}") - missing_keys.append(key) - if len(missing_keys) > 0: - raise AMLConfigurationException(f"{message} {missing_keys}") - - def mask_parameter(parameter): print(f"::add-mask::{parameter}") + + +def load_json(path): + with open(path) as f: + json_object = json.load(f) + return json_object + + +def validate_json(data, schema, input_name): + validator = jsonschema.Draft7Validator(schema) + errors = validator.iter_errors(data) + if len(errors) > 0: + for error in errors: + print(f"::error::JSON validation error: {error}") + raise AMLConfigurationException(f"JSON validation error for '{input_name}'. Provided object does not match schema. Please check the output for more details.") + else: + print(f"::debug::JSON validation passed for '{input_name}'. Provided object does match schema.")