Internal: fix pylint warnings (#651)

* inital, remove unused imports * run yapf * remove unused imports and variables, fix declaration outside init * fix some pylint warnings, add ssh_into_master * remove unused imports * unused variables * string and function normalization * stop using list comprehension for side effects, make method function * stop using protected member * various pylint fixes * formatting * formatting * add retry decorator with tests * start adding retry decorator, retry docker compose download * update pip and tests * logic fix * change no delete if * factor out reused functions * fix wait_for_all_nodes * fix download return type bug * test vsts ci update * temporarily disable integration tests * syntax fix * update vsts build * add back integration tests, remove debug branch * remove parallel unit tests * more verbose clis * update pylint * typo * fix imports * function returns nothing, don't return * make iterator list * change debug value
2018-08-24 17:21:22 -07:00 · 2018-08-24 17:21:22 -07:00 · 828162ef10
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@ -1,7 +1,6 @@
 trigger:
  - master

-
 phases:
  - phase: Test
    queue: Hosted Linux Preview
@ -24,16 +23,22 @@ phases:
      displayName: yapf

    - script: |
-        pylint -j 2 -E aztk aztk_cli
+        pylint -jobs 2 --errors-only aztk aztk_cli
      condition: succeeded()
-      displayName: pylint
+      displayName: pylint error check
    
    - script: |
-        pytest -n 20 --ignore=tests/integration_tests
+        pytest --ignore=tests/integration_tests
      condition: succeeded()
      displayName: unit tests

    - script: |
-        pytest -n 75
+        pytest --numprocesses=75
      condition: succeeded()
      displayName: integration tests
+    
+    - script: |
+        pylint -jobs 2 --disable=fixme aztk aztk_cli
+      continueOnError: true
+      condition: succeeded()
+      displayName: pylint report
--- a/aztk/client/base/base_operations.py
+++ b/aztk/client/base/base_operations.py
@ -1,10 +1,19 @@
 from aztk import models
 from aztk.internal import cluster_data
-from aztk.utils import ssh as ssh_lib

-from .helpers import (create_user_on_cluster, create_user_on_node, delete_user_on_cluster, delete_user_on_node,
-                      generate_user_on_cluster, generate_user_on_node, get_application_log, get_remote_login_settings,
-                      node_run, run, ssh_into_node)
+from .helpers import (
+    create_user_on_cluster,
+    create_user_on_node,
+    delete_user_on_cluster,
+    delete_user_on_node,
+    generate_user_on_cluster,
+    generate_user_on_node,
+    get_application_log,
+    get_remote_login_settings,
+    node_run,
+    run,
+    ssh_into_node,
+)


 class BaseOperations:
@ -15,14 +24,14 @@ class BaseOperations:
            Azure Batch service.
        blob_client (:obj:`azure.storage.blob.BlockBlobService`):  Client used to interact with the Azure Storage
            Blob service.
-        secrets_configuration (:obj:`aztk.models.SecretsConfiguration`): Model that holds AZTK secrets used to authenticate
-            with Azure and the clusters.
+        secrets_configuration (:obj:`aztk.models.SecretsConfiguration`):
+            Model that holds AZTK secrets used to authenticate with Azure and the clusters.
    """

    def __init__(self, context):
-        self.batch_client = context['batch_client']
-        self.blob_client = context['blob_client']
-        self.secrets_configuration = context['secrets_configuration']
+        self.batch_client = context["batch_client"]
+        self.blob_client = context["blob_client"]
+        self.secrets_configuration = context["secrets_configuration"]

    def get_cluster_configuration(self, id: str) -> models.ClusterConfiguration:
        """Open an ssh tunnel to a node
@ -62,7 +71,8 @@ class BaseOperations:
            id (:obj:`str`): the id of the cluster the node is in
            node_id (:obj:`str`): the id of the node to open the ssh tunnel to
            username (:obj:`str`): the username to authenticate the ssh session
-            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
+            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password.
+                Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
            port_forward_list (:obj:`List[PortForwardingSpecification`, optional): list of PortForwardingSpecifications.
                The defined ports will be forwarded to the client.
@ -89,7 +99,7 @@ class BaseOperations:
        """
        return create_user_on_node.create_user_on_node(self, id, node_id, username, ssh_key, password)

-    #TODO: remove nodes as param
+    # TODO: remove nodes as param
    def create_user_on_cluster(self, id, nodes, username, ssh_pub_key=None, password=None):
        """Create a user on every node in the cluster

@ -97,7 +107,8 @@ class BaseOperations:
            username (:obj:`str`): name of the user to create.
            id (:obj:`str`): id of the cluster to create the user on.
            nodes (:obj:`List[ComputeNode]`): list of nodes to create the user on
-            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
+            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password.
+                Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.

        Returns:
@ -117,7 +128,7 @@ class BaseOperations:
        """
        return generate_user_on_node.generate_user_on_node(self, id, node_id)

-    #TODO: remove nodes as param
+    # TODO: remove nodes as param
    def generate_user_on_cluster(self, id, nodes):
        """Create a user with an autogenerated username and ssh_key on the cluster

@ -143,7 +154,7 @@ class BaseOperations:
        """
        return delete_user_on_node.delete_user(self, id, node_id, username)

-    #TODO: remove nodes as param
+    # TODO: remove nodes as param
    def delete_user_on_cluster(self, username, id, nodes):
        """Delete a user on every node in the cluster

@ -212,10 +223,11 @@ class BaseOperations:
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            application_name (:obj:`str`): str
-            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes. Otherwise, the whole log will be retrieved.
-                Only use this if streaming the log as it is being written. Defaults to False.
-            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are retrieved.
-                Only useful is streaming the log as it is being written. Only used if tail is True.
+            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes.
+                Otherwise, the whole log will be retrieved. Only use this if streaming the log as it is being written.
+                Defaults to False.
+            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes
+                are retrieved. Only useful is streaming the log as it is being written. Only used if tail is True.

        Returns:
            :obj:`aztk.models.ApplicationLog`: a model representing the output of the application.
--- a/aztk/client/base/helpers/create_user_on_cluster.py
+++ b/aztk/client/base/helpers/create_user_on_cluster.py
@ -1,7 +1,7 @@
 import concurrent.futures


-#TODO: remove nodes param
+# TODO: remove nodes param
 def create_user_on_cluster(base_operations, id, nodes, username, ssh_pub_key=None, password=None):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {
--- a/aztk/client/base/helpers/create_user_on_node.py
+++ b/aztk/client/base/helpers/create_user_on_node.py
@ -3,7 +3,6 @@ from datetime import datetime, timedelta, timezone
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error

-from aztk import models
 from aztk.utils import get_ssh_key


--- a/aztk/client/base/helpers/delete_user_on_cluster.py
+++ b/aztk/client/base/helpers/delete_user_on_cluster.py
@ -1,7 +1,7 @@
 import concurrent.futures


-#TODO: remove nodes param
+# TODO: remove nodes param
 def delete_user_on_cluster(base_client, id, nodes, username):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(base_client.delete_user_on_node, id, node.id, username) for node in nodes]
--- a/aztk/client/base/helpers/generate_user_on_cluster.py
+++ b/aztk/client/base/helpers/generate_user_on_cluster.py
@ -5,11 +5,11 @@ from Cryptodome.PublicKey import RSA
 from aztk.utils import secure_utils


-#TODO: remove nodes param
+# TODO: remove nodes param
 def generate_user_on_cluster(base_operations, id, nodes):
    generated_username = secure_utils.generate_random_string()
    ssh_key = RSA.generate(2048)
-    ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
+    ssh_pub_key = ssh_key.publickey().exportKey("OpenSSH").decode("utf-8")
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {
            executor.submit(base_operations.create_user_on_node, id, node.id, generated_username, ssh_pub_key): node
--- a/aztk/client/base/helpers/generate_user_on_node.py
+++ b/aztk/client/base/helpers/generate_user_on_node.py
@ -6,6 +6,6 @@ from aztk.utils import secure_utils
 def generate_user_on_node(base_client, pool_id, node_id):
    generated_username = secure_utils.generate_random_string()
    ssh_key = RSA.generate(2048)
-    ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
+    ssh_pub_key = ssh_key.publickey().exportKey("OpenSSH").decode("utf-8")
    base_client.create_user_on_node(pool_id, node_id, generated_username, ssh_pub_key)
    return generated_username, ssh_key
--- a/aztk/client/base/helpers/get_application_log.py
+++ b/aztk/client/base/helpers/get_application_log.py
@ -4,12 +4,10 @@ import azure
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error

-from aztk import error
-from aztk import models
+from aztk import error, models
 from aztk.utils import constants, helpers

-output_file = constants.TASK_WORKING_DIR + \
-    "/" + constants.SPARK_SUBMIT_LOGS_FILE
+output_file = constants.TASK_WORKING_DIR + "/" + constants.SPARK_SUBMIT_LOGS_FILE


 def __check_task_node_exist(batch_client, cluster_id: str, task: batch_models.CloudTask) -> bool:
@ -50,17 +48,18 @@ def __get_output_file_properties(batch_client, cluster_id: str, application_name

 def get_log_from_storage(blob_client, container_name, application_name, task):
    try:
-        blob = blob_client.get_blob_to_text(container_name, application_name + '/' + constants.SPARK_SUBMIT_LOGS_FILE)
+        blob = blob_client.get_blob_to_text(container_name, application_name + "/" + constants.SPARK_SUBMIT_LOGS_FILE)
    except azure.common.AzureMissingResourceHttpError:
        raise error.AztkError("Logs not found in your storage account. They were either deleted or never existed.")

    return models.ApplicationLog(
        name=application_name,
        cluster_id=container_name,
-        application_state=task.state._value_,
+        application_state=task.state.name,
        log=blob.content,
        total_bytes=blob.properties.content_length,
-        exit_code=task.execution_info.exit_code)
+        exit_code=task.execution_info.exit_code,
+    )


 def get_log(batch_client, blob_client, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
@ -88,18 +87,20 @@ def get_log(batch_client, blob_client, cluster_id: str, application_name: str, t
        return models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
-            application_state=task.state._value_,
+            application_state=task.state.name,
            log=content,
            total_bytes=target_bytes,
-            exit_code=task.execution_info.exit_code)
+            exit_code=task.execution_info.exit_code,
+        )
    else:
        return models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
-            application_state=task.state._value_,
-            log='',
+            application_state=task.state.name,
+            log="",
            total_bytes=target_bytes,
-            exit_code=task.execution_info.exit_code)
+            exit_code=task.execution_info.exit_code,
+        )


 def get_application_log(base_operations, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
--- a/aztk/client/base/helpers/node_run.py
+++ b/aztk/client/base/helpers/node_run.py
@ -22,9 +22,10 @@ def node_run(base_client, cluster_id, node_id, command, internal, container_name
            generated_username,
            node_rls.ip_address,
            node_rls.port,
-            ssh_key=ssh_key.exportKey().decode('utf-8'),
+            ssh_key=ssh_key.exportKey().decode("utf-8"),
            container_name=container_name,
-            timeout=timeout)
+            timeout=timeout,
+        )
        return output
    finally:
        base_client.delete_user_on_node(cluster_id, node.id, generated_username)
--- a/aztk/client/base/helpers/run.py
+++ b/aztk/client/base/helpers/run.py
@ -26,9 +26,10 @@ def cluster_run(base_operations, cluster_id, command, internal, container_name=N
                command,
                generated_username,
                cluster_nodes,
-                ssh_key=ssh_key.exportKey().decode('utf-8'),
+                ssh_key=ssh_key.exportKey().decode("utf-8"),
                container_name=container_name,
-                timeout=timeout))
+                timeout=timeout,
+            ))
        return output
    except OSError as exc:
        raise exc
--- a/aztk/client/client.py
+++ b/aztk/client/client.py
@ -13,8 +13,6 @@ import aztk.utils.constants as constants
 import aztk.utils.get_ssh_key as get_ssh_key
 import aztk.utils.helpers as helpers
 import aztk.utils.ssh as ssh_lib
-from aztk.client.cluster import CoreClusterOperations
-from aztk.client.job import CoreJobOperations
 from aztk.internal import cluster_data
 from aztk.utils import deprecated, secure_utils

@ -27,6 +25,11 @@ class CoreClient:

    """

+    def __init__(self):
+        self.secrets_configuration = None
+        self.batch_client = None
+        self.blob_client = None
+
    def _get_context(self, secrets_configuration: models.SecretsConfiguration):
        self.secrets_configuration = secrets_configuration

@ -34,9 +37,9 @@ class CoreClient:
        self.batch_client = azure_api.make_batch_client(secrets_configuration)
        self.blob_client = azure_api.make_blob_client(secrets_configuration)
        context = {
-            'batch_client': self.batch_client,
-            'blob_client': self.blob_client,
-            'secrets_configuration': self.secrets_configuration,
+            "batch_client": self.batch_client,
+            "blob_client": self.blob_client,
+            "secrets_configuration": self.secrets_configuration,
        }
        return context

@ -52,9 +55,9 @@ class CoreClient:
        """
        return cluster_data.ClusterData(self.blob_client, cluster_id)

-    '''
+    """
    General Batch Operations
-    '''
+    """

    @deprecated("0.10.0")
    def __delete_pool_and_job(self, pool_id: str, keep_logs: bool = False):
@ -104,9 +107,8 @@ class CoreClient:
        job_id = cluster_conf.cluster_id

        # Get a verified node agent sku
-        sku_to_use, image_ref_to_use = \
-            helpers.select_latest_verified_vm_image_with_node_agent_sku(
-                VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, self.batch_client)
+        sku_to_use, image_ref_to_use = helpers.select_latest_verified_vm_image_with_node_agent_sku(
+            VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, self.batch_client)

        network_conf = None
        if cluster_conf.subnet_id is not None:
@ -130,8 +132,9 @@ class CoreClient:
            metadata=[
                batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
                batch_models.MetadataItem(
-                    name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA)
-            ])
+                    name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA),
+            ],
+        )

        # Create the pool + create user for the pool
        helpers.create_pool_if_not_exist(pool, self.batch_client)
@ -184,13 +187,16 @@ class CoreClient:
        """
        # Create new ssh user for the given node
        self.batch_client.compute_node.add_user(
-            pool_id, node_id,
+            pool_id,
+            node_id,
            batch_models.ComputeNodeUser(
                name=username,
                is_admin=True,
                password=password,
                ssh_public_key=get_ssh_key.get_user_public_key(ssh_key, self.secrets_configuration),
-                expiry_time=datetime.now(timezone.utc) + timedelta(days=365)))
+                expiry_time=datetime.now(timezone.utc) + timedelta(days=365),
+            ),
+        )

    @deprecated("0.10.0")
    def __delete_user(self, pool_id: str, node_id: str, username: str) -> str:
@ -229,7 +235,7 @@ class CoreClient:
    def __generate_user_on_node(self, pool_id, node_id):
        generated_username = secure_utils.generate_random_string()
        ssh_key = RSA.generate(2048)
-        ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
+        ssh_pub_key = ssh_key.publickey().exportKey("OpenSSH").decode("utf-8")
        self.__create_user_on_node(generated_username, pool_id, node_id, ssh_pub_key)
        return generated_username, ssh_key

@ -237,7 +243,7 @@ class CoreClient:
    def __generate_user_on_pool(self, pool_id, nodes):
        generated_username = secure_utils.generate_random_string()
        ssh_key = RSA.generate(2048)
-        ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
+        ssh_pub_key = ssh_key.publickey().exportKey("OpenSSH").decode("utf-8")
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = {
                executor.submit(self.__create_user_on_node, generated_username, pool_id, node.id, ssh_pub_key): node
@ -283,9 +289,10 @@ class CoreClient:
                generated_username,
                node_rls.ip_address,
                node_rls.port,
-                ssh_key=ssh_key.exportKey().decode('utf-8'),
+                ssh_key=ssh_key.exportKey().decode("utf-8"),
                container_name=container_name,
-                timeout=timeout)
+                timeout=timeout,
+            )
            return output
        finally:
            self.__delete_user(cluster_id, node.id, generated_username)
@ -306,9 +313,10 @@ class CoreClient:
                    command,
                    generated_username,
                    cluster_nodes,
-                    ssh_key=ssh_key.exportKey().decode('utf-8'),
+                    ssh_key=ssh_key.exportKey().decode("utf-8"),
                    container_name=container_name,
-                    timeout=timeout))
+                    timeout=timeout,
+                ))
            return output
        except OSError as exc:
            raise exc
@ -316,14 +324,16 @@ class CoreClient:
            self.__delete_user_on_pool(generated_username, pool.id, nodes)

    @deprecated("0.10.0")
-    def __cluster_copy(self,
-                       cluster_id,
-                       source_path,
-                       destination_path=None,
-                       container_name=None,
-                       internal=False,
-                       get=False,
-                       timeout=None):
+    def __cluster_copy(
+            self,
+            cluster_id,
+            source_path,
+            destination_path=None,
+            container_name=None,
+            internal=False,
+            get=False,
+            timeout=None,
+    ):
        pool, nodes = self.__get_pool_details(cluster_id)
        nodes = list(nodes)
        if internal:
@ -340,9 +350,10 @@ class CoreClient:
                    nodes=cluster_nodes,
                    source_path=source_path,
                    destination_path=destination_path,
-                    ssh_key=ssh_key.exportKey().decode('utf-8'),
+                    ssh_key=ssh_key.exportKey().decode("utf-8"),
                    get=get,
-                    timeout=timeout))
+                    timeout=timeout,
+                ))
            return output
        except (OSError, batch_error.BatchErrorException) as exc:
            raise exc
@ -375,8 +386,16 @@ class CoreClient:
        )

    @deprecated("0.10.0")
-    def __submit_job(self, job_configuration, start_task, job_manager_task, autoscale_formula,
-                     software_metadata_key: str, vm_image_model, application_metadata):
+    def __submit_job(
+            self,
+            job_configuration,
+            start_task,
+            job_manager_task,
+            autoscale_formula,
+            software_metadata_key: str,
+            vm_image_model,
+            application_metadata,
+    ):
        """
            Job Submission
            :param job_configuration -> aztk_sdk.spark.models.JobConfiguration
@ -390,9 +409,8 @@ class CoreClient:
        self._get_cluster_data(job_configuration.id).save_cluster_config(job_configuration.to_cluster_config())

        # get a verified node agent sku
-        sku_to_use, image_ref_to_use = \
-            helpers.select_latest_verified_vm_image_with_node_agent_sku(
-                vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, self.batch_client)
+        sku_to_use, image_ref_to_use = helpers.select_latest_verified_vm_image_with_node_agent_sku(
+            vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, self.batch_client)

        # set up subnet if necessary
        network_conf = None
@ -419,8 +437,10 @@ class CoreClient:
                metadata=[
                    batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
                    batch_models.MetadataItem(
-                        name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA)
-                ]))
+                        name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA),
+                ],
+            ),
+        )

        # define job specification
        job_spec = batch_models.JobSpecification(
@ -428,7 +448,8 @@ class CoreClient:
            display_name=job_configuration.id,
            on_all_tasks_complete=batch_models.OnAllTasksComplete.terminate_job,
            job_manager_task=job_manager_task,
-            metadata=[batch_models.MetadataItem(name='applications', value=application_metadata)])
+            metadata=[batch_models.MetadataItem(name="applications", value=application_metadata)],
+        )

        # define schedule
        schedule = batch_models.Schedule(
--- a/aztk/client/cluster/helpers/copy.py
+++ b/aztk/client/cluster/helpers/copy.py
@ -8,14 +8,16 @@ from aztk.utils import ssh as ssh_lib
 from aztk.utils import helpers


-def cluster_copy(cluster_operations,
-                 cluster_id,
-                 source_path,
-                 destination_path=None,
-                 container_name=None,
-                 internal=False,
-                 get=False,
-                 timeout=None):
+def cluster_copy(
+        cluster_operations,
+        cluster_id,
+        source_path,
+        destination_path=None,
+        container_name=None,
+        internal=False,
+        get=False,
+        timeout=None,
+):
    cluster = cluster_operations.get(cluster_id)
    pool, nodes = cluster.pool, list(cluster.nodes)
    if internal:
@ -36,9 +38,10 @@ def cluster_copy(cluster_operations,
                nodes=cluster_nodes,
                source_path=source_path,
                destination_path=destination_path,
-                ssh_key=ssh_key.exportKey().decode('utf-8'),
+                ssh_key=ssh_key.exportKey().decode("utf-8"),
                get=get,
-                timeout=timeout))
+                timeout=timeout,
+            ))
        return output
    except (OSError, batch_error.BatchErrorException) as exc:
        raise exc
--- a/aztk/client/cluster/helpers/create.py
+++ b/aztk/client/cluster/helpers/create.py
@ -5,8 +5,13 @@ from aztk import models
 from aztk.utils import helpers, constants


-def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterConfiguration, software_metadata_key: str,
-                        start_task, VmImageModel):
+def create_pool_and_job(
+        core_cluster_operations,
+        cluster_conf: models.ClusterConfiguration,
+        software_metadata_key: str,
+        start_task,
+        VmImageModel,
+):
    """
        Create a pool and job
        :param cluster_conf: the configuration object used to create the cluster
@ -22,9 +27,8 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon
    job_id = cluster_conf.cluster_id

    # Get a verified node agent sku
-    sku_to_use, image_ref_to_use = \
-        helpers.select_latest_verified_vm_image_with_node_agent_sku(
-            VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, core_cluster_operations.batch_client)
+    sku_to_use, image_ref_to_use = helpers.select_latest_verified_vm_image_with_node_agent_sku(
+        VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, core_cluster_operations.batch_client)

    network_conf = None
    if cluster_conf.subnet_id is not None:
@ -48,8 +52,9 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon
        metadata=[
            batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
            batch_models.MetadataItem(
-                name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA)
-        ])
+                name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA),
+        ],
+    )

    # Create the pool + create user for the pool
    helpers.create_pool_if_not_exist(pool, core_cluster_operations.batch_client)
--- a/aztk/client/cluster/helpers/delete.py
+++ b/aztk/client/cluster/helpers/delete.py
@ -1,4 +1,7 @@
 import azure.batch.models as batch_models
+from msrest.exceptions import ClientRequestError
+
+from aztk.utils import BackOffPolicy, retry


 def delete_pool_and_job(core_cluster_operations, pool_id: str, keep_logs: bool = False):
@ -19,13 +22,18 @@ def delete_pool_and_job(core_cluster_operations, pool_id: str, keep_logs: bool =
    pool_exists = core_cluster_operations.batch_client.pool.exists(pool_id)

    if job_exists:
-        core_cluster_operations.batch_client.job.delete(job_id)
+        delete_batch_object(core_cluster_operations.batch_client.job.delete, job_id)

    if pool_exists:
-        core_cluster_operations.batch_client.pool.delete(pool_id)
+        delete_batch_object(core_cluster_operations.batch_client.pool.delete, pool_id)

    if not keep_logs:
        cluster_data = core_cluster_operations.get_cluster_data(pool_id)
        cluster_data.delete_container(pool_id)

    return job_exists or pool_exists
+
+
+@retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
+def delete_batch_object(function, *args, **kwargs):
+    return function(*args, **kwargs)
--- a/aztk/client/cluster/helpers/get.py
+++ b/aztk/client/cluster/helpers/get.py
@ -1,4 +1,4 @@
-#TODO: return Cluster instead of (pool, nodes)
+# TODO: return Cluster instead of (pool, nodes)
 from aztk import models


--- a/aztk/client/cluster/operations.py
+++ b/aztk/client/cluster/operations.py
@ -13,7 +13,8 @@ class CoreClusterOperations(BaseOperations):
            cluster_configuration (:obj:`aztk.models.ClusterConfiguration`): Configuration for the cluster to be created
            software_metadata_key (:obj:`str`): the key for the primary software that will be run on the cluster
            start_task (:obj:`azure.batch.models.StartTask`): Batch StartTask defintion to configure the Batch Pool
-            vm_image_model (:obj:`azure.batch.models.VirtualMachineConfiguration`): Configuration of the virtual machine image and settings
+            vm_image_model (:obj:`azure.batch.models.VirtualMachineConfiguration`):
+                Configuration of the virtual machine image and settings

        Returns:
            :obj:`aztk.models.Cluster`: A Cluster object representing the state and configuration of the cluster.
@ -52,7 +53,8 @@ class CoreClusterOperations(BaseOperations):
                Defaults to None.

        Returns:
-            :obj:`List[aztk.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
+            :obj:`List[aztk.models.NodeOutput]`:
+                A list of NodeOutput objects representing the output of the copy command.
        """
        return copy.cluster_copy(self, id, source_path, destination_path, container_name, internal, get, timeout)

@ -65,7 +67,8 @@ class CoreClusterOperations(BaseOperations):
                Defaults to False.

        Returns:
-            :obj:`List[aztk.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
+            :obj:`List[aztk.models.NodeOutput]`:
+                A list of NodeOutput objects representing the output of the copy command.
        """
        return delete.delete_pool_and_job(self, id, keep_logs)

--- a/aztk/client/job/helpers/submit.py
+++ b/aztk/client/job/helpers/submit.py
@ -1,11 +1,20 @@
 from datetime import timedelta

 import azure.batch.models as batch_models
-from aztk.utils import helpers, constants
+
+from aztk.utils import constants, helpers


-def submit_job(job_client, job_configuration, start_task, job_manager_task, autoscale_formula,
-               software_metadata_key: str, vm_image_model, application_metadata):
+def submit_job(
+        job_client,
+        job_configuration,
+        start_task,
+        job_manager_task,
+        autoscale_formula,
+        software_metadata_key: str,
+        vm_image_model,
+        application_metadata,
+):
    """
            Job Submission
            :param job_configuration -> aztk_sdk.spark.models.JobConfiguration
@ -19,9 +28,8 @@ def submit_job(job_client, job_configuration, start_task, job_manager_task, auto
    job_client.get_cluster_data(job_configuration.id).save_cluster_config(job_configuration.to_cluster_config())

    # get a verified node agent sku
-    sku_to_use, image_ref_to_use = \
-        helpers.select_latest_verified_vm_image_with_node_agent_sku(
-            vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, job_client.batch_client)
+    sku_to_use, image_ref_to_use = helpers.select_latest_verified_vm_image_with_node_agent_sku(
+        vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, job_client.batch_client)

    # set up subnet if necessary
    network_conf = None
@ -48,8 +56,10 @@ def submit_job(job_client, job_configuration, start_task, job_manager_task, auto
            metadata=[
                batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
                batch_models.MetadataItem(
-                    name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA)
-            ]))
+                    name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA),
+            ],
+        ),
+    )

    # define job specification
    job_spec = batch_models.JobSpecification(
@ -57,7 +67,8 @@ def submit_job(job_client, job_configuration, start_task, job_manager_task, auto
        display_name=job_configuration.id,
        on_all_tasks_complete=batch_models.OnAllTasksComplete.terminate_job,
        job_manager_task=job_manager_task,
-        metadata=[batch_models.MetadataItem(name='applications', value=application_metadata)])
+        metadata=[batch_models.MetadataItem(name="applications", value=application_metadata)],
+    )

    # define schedule
    schedule = batch_models.Schedule(
--- a/aztk/client/job/operations.py
+++ b/aztk/client/job/operations.py
@ -4,8 +4,16 @@ from .helpers import submit


 class CoreJobOperations(BaseOperations):
-    def submit(self, job_configuration, start_task, job_manager_task, autoscale_formula, software_metadata_key: str,
-               vm_image_model, application_metadata):
+    def submit(
+            self,
+            job_configuration,
+            start_task,
+            job_manager_task,
+            autoscale_formula,
+            software_metadata_key: str,
+            vm_image_model,
+            application_metadata,
+    ):
        """Submit a job

        Jobs are a cluster definition and one or many application definitions which run on the cluster. The job's
@ -26,5 +34,13 @@ class CoreJobOperations(BaseOperations):
        Returns:
            :obj:`azure.batch.models.CloudJobSchedule`: Model representing the Azure Batch JobSchedule state.
        """
-        return submit.submit_job(self, job_configuration, start_task, job_manager_task, autoscale_formula,
-                                 software_metadata_key, vm_image_model, application_metadata)
+        return submit.submit_job(
+            self,
+            job_configuration,
+            start_task,
+            job_manager_task,
+            autoscale_formula,
+            software_metadata_key,
+            vm_image_model,
+            application_metadata,
+        )
--- a/aztk/core/models/fields.py
+++ b/aztk/core/models/fields.py
@ -35,8 +35,8 @@ class Field:
    """

    def __init__(self, *validators, **kwargs):
-        self.default = kwargs.get('default')
-        self.required = 'default' not in kwargs
+        self.default = kwargs.get("default")
+        self.required = "default" not in kwargs
        self.validators = []

        if self.required:
@ -44,7 +44,7 @@ class Field:

        self.validators.extend(validators)

-        choices = kwargs.get('choices')
+        choices = kwargs.get("choices")
        if choices:
            self.validators.append(aztk_validators.In(choices))

@ -134,11 +134,11 @@ class List(Field):

    def __init__(self, model=None, **kwargs):
        self.model = model
-        kwargs.setdefault('default', list)
-        self.merge_strategy = kwargs.get('merge_strategy', ListMergeStrategy.Append)
-        self.skip_none = kwargs.get('skip_none', True)
+        kwargs.setdefault("default", list)
+        self.merge_strategy = kwargs.get("merge_strategy", ListMergeStrategy.Append)
+        self.skip_none = kwargs.get("skip_none", True)

-        super().__init__(aztk_validators.List(*kwargs.get('inner_validators', [])), **kwargs)
+        super().__init__(aztk_validators.List(*kwargs.get("inner_validators", [])), **kwargs)

    def __set__(self, instance, value):
        if isinstance(value, collections.MutableSequence):
@ -175,7 +175,7 @@ class List(Field):
        output = []
        if items is not None:
            for item in items:
-                if hasattr(item, 'to_dict'):
+                if hasattr(item, "to_dict"):
                    output.append(item.to_dict())
                else:
                    output.append(item)
@ -196,7 +196,7 @@ class Model(Field):
        super().__init__(aztk_validators.Model(model), *args, **kwargs)

        self.model = model
-        self.merge_strategy = kwargs.get('merge_strategy', ModelMergeStrategy.Merge)
+        self.merge_strategy = kwargs.get("merge_strategy", ModelMergeStrategy.Merge)

    def __set__(self, instance, value):
        if isinstance(value, collections.MutableMapping):
--- a/aztk/core/models/model.py
+++ b/aztk/core/models/model.py
@ -11,19 +11,19 @@ class ModelMeta(type):
    """

    def __new__(mcs, name, bases, attrs):
-        attrs['_fields'] = {}
+        attrs["_fields"] = {}

        for base in bases:
-            if hasattr(base, '_fields'):
+            if hasattr(base, "_fields"):
                for k, v in base._fields.items():
-                    attrs['_fields'][k] = v
+                    attrs["_fields"][k] = v
            for k, v in base.__dict__.items():
                if isinstance(v, fields.Field):
-                    attrs['_fields'][k] = v
+                    attrs["_fields"][k] = v

        for k, v in attrs.items():
            if isinstance(v, fields.Field):
-                attrs['_fields'][k] = v
+                attrs["_fields"][k] = v

        return super().__new__(mcs, name, bases, attrs)

@ -84,7 +84,7 @@ class Model(metaclass=ModelMeta):
                e.model = self
                raise e

-        if hasattr(self, '__validate__'):
+        if hasattr(self, "__validate__"):
            self.__validate__()

    def merge(self, other):
--- a/aztk/core/models/validators.py
+++ b/aztk/core/models/validators.py
@ -24,7 +24,7 @@ class Required(Validator):

    def validate(self, value):
        if value is None:
-            raise InvalidModelFieldError('is required')
+            raise InvalidModelFieldError("is required")


 class String(Validator):
@ -37,7 +37,7 @@ class String(Validator):
            return

        if not isinstance(value, str):
-            raise InvalidModelFieldError('{0} should be a string'.format(value))
+            raise InvalidModelFieldError("{0} should be a string".format(value))


 class Integer(Validator):
@ -50,7 +50,7 @@ class Integer(Validator):
            return

        if not isinstance(value, int):
-            raise InvalidModelFieldError('{0} should be an integer'.format(value))
+            raise InvalidModelFieldError("{0} should be an integer".format(value))


 class Float(Validator):
@ -63,7 +63,7 @@ class Float(Validator):
            return

        if not isinstance(value, float):
-            raise InvalidModelFieldError('{0} should be a float'.format(value))
+            raise InvalidModelFieldError("{0} should be a float".format(value))


 class Boolean(Validator):
@ -74,7 +74,7 @@ class Boolean(Validator):
            return

        if not isinstance(value, bool):
-            raise InvalidModelFieldError('{0} should be a boolean'.format(value))
+            raise InvalidModelFieldError("{0} should be a boolean".format(value))


 class In(Validator):
@ -90,7 +90,7 @@ class In(Validator):
            return

        if value not in self.choices:
-            raise InvalidModelFieldError('{0} should be in {1}'.format(value, self.choices))
+            raise InvalidModelFieldError("{0} should be in {1}".format(value, self.choices))


 class InstanceOf(Validator):
@ -140,7 +140,7 @@ class List(Validator):
            return

        if not isinstance(value, collections.MutableSequence):
-            raise InvalidModelFieldError('should be a list')
+            raise InvalidModelFieldError("should be a list")

        for i in value:
            for validator in self.validators:
--- a/aztk/internal/cluster_data/blob_data.py
+++ b/aztk/internal/cluster_data/blob_data.py
@ -1,6 +1,7 @@
-import azure.batch.models as batch_models
 import datetime
-from azure.storage.blob import BlockBlobService, BlobPermissions
+
+import azure.batch.models as batch_models
+from azure.storage.blob import BlobPermissions, BlockBlobService


 class BlobData:
@ -19,7 +20,8 @@ class BlobData:
            self.container,
            self.blob,
            permission=BlobPermissions.READ,
-            expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365))
+            expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365),
+        )

        sas_url = self.blob_client.make_blob_url(self.container, self.blob, sas_token=sas_token)

--- a/aztk/internal/cluster_data/cluster_data.py
+++ b/aztk/internal/cluster_data/cluster_data.py
@ -3,8 +3,10 @@ import logging

 import azure.common
 import yaml
+from msrest.exceptions import ClientRequestError

 from aztk.models import ClusterConfiguration
+from aztk.utils import BackOffPolicy, retry

 from .blob_data import BlobData
 from .node_data import NodeData
@ -14,6 +16,7 @@ class ClusterData:
    """
    Class handling the management of data for a cluster
    """
+
    # ALl data related to cluster(config, metadata, etc.) should be under this folder
    CLUSTER_DIR = "cluster"
    APPLICATIONS_DIR = "applications"
@ -24,26 +27,30 @@ class ClusterData:
        self.cluster_id = cluster_id
        self._ensure_container()

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def save_cluster_config(self, cluster_config):
        blob_path = self.CLUSTER_DIR + "/" + self.CLUSTER_CONFIG_FILE
        content = yaml.dump(cluster_config)
        container_name = cluster_config.cluster_id
        self.blob_client.create_blob_from_text(container_name, blob_path, content)

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def read_cluster_config(self):
        blob_path = self.CLUSTER_DIR + "/" + self.CLUSTER_CONFIG_FILE
        try:
            result = self.blob_client.get_blob_to_text(self.cluster_id, blob_path)
            return yaml.load(result.content)
        except azure.common.AzureMissingResourceHttpError:
-            logging.warn("Cluster %s doesn't have cluster configuration in storage", self.cluster_id)
+            logging.warning("Cluster %s doesn't have cluster configuration in storage", self.cluster_id)
        except yaml.YAMLError:
-            logging.warn("Cluster %s contains invalid cluster configuration in blob", self.cluster_id)
+            logging.warning("Cluster %s contains invalid cluster configuration in blob", self.cluster_id)

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def upload_file(self, blob_path: str, local_path: str) -> BlobData:
        self.blob_client.create_blob_from_path(self.cluster_id, blob_path, local_path)
        return BlobData(self.blob_client, self.cluster_id, blob_path)

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def upload_bytes(self, blob_path: str, bytes_io: io.BytesIO) -> BlobData:
        self.blob_client.create_blob_from_bytes(self.cluster_id, blob_path, bytes_io.getvalue())
        return BlobData(self.blob_client, self.cluster_id, blob_path)
@ -61,8 +68,10 @@ class ClusterData:
    def upload_node_data(self, node_data: NodeData) -> BlobData:
        return self.upload_cluster_file("node-scripts.zip", node_data.zip_path)

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def _ensure_container(self):
        self.blob_client.create_container(self.cluster_id, fail_on_exist=False)

+    @retry(retry_count=4, retry_interval=1, backoff_policy=BackOffPolicy.exponential, exceptions=(ClientRequestError))
    def delete_container(self, container_name: str):
        self.blob_client.delete_container(container_name)
--- a/aztk/internal/cluster_data/node_data.py
+++ b/aztk/internal/cluster_data/node_data.py
@ -44,11 +44,11 @@ class NodeData:
            return
        if isinstance(file, (str, bytes)):
            full_file_path = Path(file)
-            with io.open(file, 'r', encoding='UTF-8') as f:
+            with io.open(file, "r", encoding="UTF-8") as f:
                if binary:
                    self.zipf.write(file, os.path.join(zip_dir, full_file_path.name))
                else:
-                    self.zipf.writestr(os.path.join(zip_dir, full_file_path.name), f.read().replace('\r\n', '\n'))
+                    self.zipf.writestr(os.path.join(zip_dir, full_file_path.name), f.read().replace("\r\n", "\n"))
        elif isinstance(file, models.File):
            self.zipf.writestr(os.path.join(zip_dir, file.name), file.payload.getvalue())

@ -77,36 +77,38 @@ class NodeData:
            return
        self.add_files(
            [
-                spark_configuration.spark_defaults_conf, spark_configuration.spark_env_sh,
-                spark_configuration.core_site_xml
+                spark_configuration.spark_defaults_conf,
+                spark_configuration.spark_env_sh,
+                spark_configuration.core_site_xml,
            ],
-            'conf',
-            binary=False)
+            "conf",
+            binary=False,
+        )

        # add ssh keys for passwordless ssh
-        self.zipf.writestr('id_rsa.pub', spark_configuration.ssh_key_pair['pub_key'])
-        self.zipf.writestr('id_rsa', spark_configuration.ssh_key_pair['priv_key'])
+        self.zipf.writestr("id_rsa.pub", spark_configuration.ssh_key_pair["pub_key"])
+        self.zipf.writestr("id_rsa", spark_configuration.ssh_key_pair["priv_key"])

        if spark_configuration.jars:
            for jar in spark_configuration.jars:
-                self.add_file(jar, 'jars', binary=True)
+                self.add_file(jar, "jars", binary=True)

    def _add_user_conf(self):
        user_conf = self.cluster_config.user_configuration
        if not user_conf:
            return
        encrypted_aes_session_key, cipher_aes_nonce, tag, ciphertext = secure_utils.encrypt_password(
-            self.cluster_config.spark_configuration.ssh_key_pair['pub_key'], user_conf.password)
+            self.cluster_config.spark_configuration.ssh_key_pair["pub_key"], user_conf.password)
        user_conf = yaml.dump({
-            'username': user_conf.username,
-            'password': ciphertext,
-            'ssh-key': user_conf.ssh_key,
-            'aes_session_key': encrypted_aes_session_key,
-            'cipher_aes_nonce': cipher_aes_nonce,
-            'tag': tag,
-            'cluster_id': self.cluster_config.cluster_id
+            "username": user_conf.username,
+            "password": ciphertext,
+            "ssh-key": user_conf.ssh_key,
+            "aes_session_key": encrypted_aes_session_key,
+            "cipher_aes_nonce": cipher_aes_nonce,
+            "tag": tag,
+            "cluster_id": self.cluster_config.cluster_id,
        })
-        self.zipf.writestr('user.yaml', user_conf)
+        self.zipf.writestr("user.yaml", user_conf)

    def _add_plugins(self):
        if not self.cluster_config.plugins:
@ -115,23 +117,22 @@ class NodeData:
        data = []
        for plugin in self.cluster_config.plugins:
            for file in plugin.files:
-                zipf = self.zipf.writestr('plugins/{0}/{1}'.format(plugin.name, file.target), file.content())
+                self.zipf.writestr("plugins/{0}/{1}".format(plugin.name, file.target), file.content())
            if plugin.execute:
                data.append(
                    dict(
                        name=plugin.name,
-                        execute='{0}/{1}'.format(plugin.name, plugin.execute),
+                        execute="{0}/{1}".format(plugin.name, plugin.execute),
                        args=plugin.args,
                        env=plugin.env,
                        target=plugin.target.value,
                        target_role=plugin.target_role.value,
                    ))

-        self.zipf.writestr(os.path.join('plugins', 'plugins-manifest.yaml'), yaml.dump(data))
-        return zipf
+        self.zipf.writestr(os.path.join("plugins", "plugins-manifest.yaml"), yaml.dump(data))

    def _add_node_scripts(self):
-        self.add_dir(os.path.join(ROOT_PATH, NODE_SCRIPT_FOLDER), NODE_SCRIPT_FOLDER, exclude=['*.pyc*', '*.png'])
+        self.add_dir(os.path.join(ROOT_PATH, NODE_SCRIPT_FOLDER), NODE_SCRIPT_FOLDER, exclude=["*.pyc*", "*.png"])

    def _includeFile(self, filename: str, exclude: List[str]) -> bool:
        exclude = exclude or []
--- a/aztk/internal/docker_cmd.py
+++ b/aztk/internal/docker_cmd.py
@ -1,4 +1,3 @@
-import os
 from aztk.utils.command_builder import CommandBuilder


@ -9,30 +8,30 @@ class DockerCmd:

    def __init__(self, name: str, docker_repo: str, docker_run_options: str, cmd: str, gpu_enabled=False):
        if gpu_enabled:
-            self.cmd = CommandBuilder('nvidia-docker run')
+            self.cmd = CommandBuilder("nvidia-docker run")
        else:
-            self.cmd = CommandBuilder('docker run')
-        self.cmd.add_option('--net', 'host')
-        self.cmd.add_option('--name', name)
-        self.cmd.add_argument('-d')
+            self.cmd = CommandBuilder("docker run")
+        self.cmd.add_option("--net", "host")
+        self.cmd.add_option("--name", name)
+        self.cmd.add_argument("-d")
        self.cmd.add_argument(docker_run_options)
        self.cmd.add_argument(docker_repo)
        self.cmd.add_argument(cmd)

    def add_env(self, env: str, value: str):
-        self.cmd.add_option('-e', '{0}={1}'.format(env, value))
+        self.cmd.add_option("-e", "{0}={1}".format(env, value))

    def pass_env(self, env: str):
        """
        Give the value of an environment variable in the main process to the docker image
        """
-        self.cmd.add_option('-e', '{0}'.format(env))
+        self.cmd.add_option("-e", "{0}".format(env))

    def share_folder(self, folder: str):
-        self.cmd.add_option('-v', '{0}:{0}'.format(folder))
+        self.cmd.add_option("-v", "{0}:{0}".format(folder))

    def open_port(self, port: int):
-        self.cmd.add_option('-p', '{0}:{0}'.format(port))    # Spark Master UI
+        self.cmd.add_option("-p", "{0}:{0}".format(port))    # Spark Master UI

    def to_str(self):
        return self.cmd.to_str()
--- a/aztk/models/application_log.py
+++ b/aztk/models/application_log.py
@ -1,9 +1,16 @@
 import azure.batch.models as batch_models


-class ApplicationLog():
-    def __init__(self, name: str, cluster_id: str, log: str, total_bytes: int,
-                 application_state: batch_models.TaskState, exit_code: int):
+class ApplicationLog:
+    def __init__(
+            self,
+            name: str,
+            cluster_id: str,
+            log: str,
+            total_bytes: int,
+            application_state: batch_models.TaskState,
+            exit_code: int,
+    ):
        self.name = name
        self.cluster_id = cluster_id    # TODO: change to something cluster/job agnostic
        self.log = log
--- a/aztk/models/cluster.py
+++ b/aztk/models/cluster.py
@ -11,10 +11,8 @@ class Cluster:
            self.visible_state = pool.allocation_state.value
        else:
            self.visible_state = pool.state.value
-        self.total_current_nodes = pool.current_dedicated_nodes + \
-            pool.current_low_priority_nodes
-        self.total_target_nodes = pool.target_dedicated_nodes + \
-            pool.target_low_priority_nodes
+        self.total_current_nodes = pool.current_dedicated_nodes + pool.current_low_priority_nodes
+        self.total_target_nodes = pool.target_dedicated_nodes + pool.target_low_priority_nodes
        self.current_dedicated_nodes = pool.current_dedicated_nodes
        self.current_low_pri_nodes = pool.current_low_priority_nodes
        self.target_dedicated_nodes = pool.target_dedicated_nodes
--- a/aztk/models/cluster_configuration.py
+++ b/aztk/models/cluster_configuration.py
@ -61,8 +61,8 @@ class ClusterConfiguration(Model):
    def __validate__(self) -> bool:
        if self.size == 0 and self.size_low_priority == 0:
            raise error.InvalidModelError(
-                "Please supply a valid (greater than 0) size or size_low_priority value either in the cluster.yaml configuration file or with a parameter (--size or --size-low-priority)"
-            )
+                "Please supply a valid (greater than 0) size or size_low_priority value either "
+                "in the cluster.yaml configuration file or with a parameter (--size or --size-low-priority)")

        if self.vm_size is None:
            raise error.InvalidModelError(
@ -70,8 +70,8 @@ class ClusterConfiguration(Model):

        if self.mixed_mode() and not self.subnet_id:
            raise error.InvalidModelError(
-                "You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes). Set the VNET's subnet_id in your cluster.yaml or with a parameter (--subnet-id)."
-            )
+                "You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes). "
+                "Set the VNET's subnet_id in your cluster.yaml or with a parameter (--subnet-id).")

        if self.scheduling_target == SchedulingTarget.Dedicated and self.size == 0:
            raise error.InvalidModelError("Scheduling target cannot be Dedicated if dedicated vm size is 0")
--- a/aztk/models/plugins/internal/plugin_manager.py
+++ b/aztk/models/plugins/internal/plugin_manager.py
@ -1,7 +1,4 @@
-import os
 import inspect
-import importlib.util
-from aztk.utils import constants
 from aztk.error import InvalidPluginReferenceError
 from aztk.spark.models import plugins

@ -28,7 +25,8 @@ class PluginManager:
        nvblas=plugins.NvBLASPlugin,
        apt_get=plugins.AptGetPlugin,
        pip_install=plugins.PipPlugin,
-        conda_install=plugins.CondaPlugin)
+        conda_install=plugins.CondaPlugin,
+    )

    def __init__(self):
        self.loaded = False
--- a/aztk/models/plugins/internal/plugin_reference.py
+++ b/aztk/models/plugins/internal/plugin_reference.py
@ -50,7 +50,5 @@ class PluginReference(Model):
            execute=script_filename,
            target=self.target,
            target_role=self.target_role or PluginConfiguration,
-            files=[
-                PluginFile(script_filename, self.script),
-            ],
+            files=[PluginFile(script_filename, self.script)],
        )
--- a/aztk/models/plugins/plugin_configuration.py
+++ b/aztk/models/plugins/plugin_configuration.py
@ -9,6 +9,7 @@ class PluginTarget(Enum):
    """
    Where this plugin should run
    """
+
    SparkContainer = "spark-container"
    Host = "host"

@ -26,6 +27,7 @@ class PluginPort(Model):
        :param public: [Optional] Port available to the user. If none won't open any port to the user
        :param name: [Optional] name to differentiate ports if you have multiple
    """
+
    internal = fields.Integer()
    public = fields.Field(default=None)
    name = fields.Integer()
@ -55,6 +57,7 @@ class PluginConfiguration(Model):
        args: List of arguments to pass to the executing script
        env: Dict of environment variables to pass to the script
    """
+
    name = fields.String()
    files = fields.List(PluginFile)
    execute = fields.String()
--- a/aztk/models/plugins/plugin_file.py
+++ b/aztk/models/plugins/plugin_file.py
@ -15,7 +15,7 @@ class PluginFile(Model):
        super().__init__(target=target, local_path=local_path)

    def content(self):
-        with open(self.local_path, "r", encoding='UTF-8') as f:
+        with open(self.local_path, "r", encoding="UTF-8") as f:
            return f.read()


--- a/aztk/models/secrets_configuration.py
+++ b/aztk/models/secrets_configuration.py
@ -6,6 +6,7 @@ class ServicePrincipalConfiguration(Model):
    """
    Container class for AAD authentication
    """
+
    tenant_id = fields.String()
    client_id = fields.String()
    credential = fields.String()
@ -17,6 +18,7 @@ class SharedKeyConfiguration(Model):
    """
    Container class for shared key authentication
    """
+
    batch_account_name = fields.String()
    batch_account_key = fields.String()
    batch_service_url = fields.String()
@ -34,6 +36,7 @@ class DockerConfiguration(Model):
        username (str): Docker endpoint username
        password (str): Docker endpoint password
    """
+
    endpoint = fields.String(default=None)
    username = fields.String(default=None)
    password = fields.String(default=None)
--- a/aztk/models/software.py
+++ b/aztk/models/software.py
@ -2,4 +2,5 @@ class Software:
    """
        Enum with list of available softwares
    """
+
    spark = "spark"
--- a/aztk/models/ssh_log.py
+++ b/aztk/models/ssh_log.py
@ -1,4 +1,4 @@
-class SSHLog():
+class SSHLog:
    def __init__(self, output, node_id):
        self.output = output
        self.node_id = node_id
--- a/aztk/models/toolkit.py
+++ b/aztk/models/toolkit.py
@ -25,8 +25,8 @@ TOOLKIT_MAP = dict(
            r=ToolkitEnvironmentDefinition(),
            miniconda=ToolkitEnvironmentDefinition(),
            anaconda=ToolkitEnvironmentDefinition(),
-        )),
-)
+        ),
+    ))


 class Toolkit(Model):
@ -74,12 +74,12 @@ class Toolkit(Model):
                        self.environment, self.environment_version, self.software, env_def.versions))

        if self.docker_run_options:
-            invalid_character = re.search('[^A-Za-z0-9 _./:=\-\"]', self.docker_run_options)
+            invalid_character = re.search(r'[^A-Za-z0-9 _./:=\-"]', self.docker_run_options)
            if invalid_character:
                raise InvalidModelError(
                    "Docker run options contains invalid character '{0}'. Only A-Z, a-z, 0-9, space, hyphen (-), "
                    "underscore (_), period (.), forward slash (/), colon (:), equals(=), comma (,), and "
-                    "double quote (\") are allowed.".format(invalid_character.group(0)))
+                    'double quote (") are allowed.'.format(invalid_character.group(0)))

    def get_docker_repo(self, gpu: bool):
        if self.docker_repo:
@ -87,10 +87,7 @@ class Toolkit(Model):

        repo = "aztk/{0}".format(self.software)

-        return "{repo}:{tag}".format(
-            repo=repo,
-            tag=self._get_docker_tag(gpu),
-        )
+        return "{repo}:{tag}".format(repo=repo, tag=self._get_docker_tag(gpu))

    def get_docker_run_options(self):
        return self.docker_run_options
@ -109,7 +106,7 @@ class Toolkit(Model):

        array.append("gpu" if gpu else "base")

-        return '-'.join(array)
+        return "-".join(array)

    def _get_environment_definition(self) -> ToolkitEnvironmentDefinition:
        toolkit = TOOLKIT_MAP.get(self.software)
--- a/aztk/node_scripts/core/config.py
+++ b/aztk/node_scripts/core/config.py
@ -1,19 +1,20 @@
 import os
 import re
-import logging
+
+import azure.batch.batch_auth as batchauth
 import azure.batch.batch_service_client as batch
 import azure.storage.blob as blob
-import azure.batch.batch_auth as batchauth
-from core import log
 from azure.common.credentials import ServicePrincipalCredentials
 from azure.mgmt.batch import BatchManagementClient
 from azure.mgmt.storage import StorageManagementClient
 from azure.storage.common import CloudStorageAccount

-RESOURCE_ID_PATTERN = re.compile('^/subscriptions/(?P<subscription>[^/]+)'
-                                 '/resourceGroups/(?P<resourcegroup>[^/]+)'
-                                 '/providers/[^/]+'
-                                 '/[^/]+Accounts/(?P<account>[^/]+)$')
+from core import log
+
+RESOURCE_ID_PATTERN = re.compile("^/subscriptions/(?P<subscription>[^/]+)"
+                                 "/resourceGroups/(?P<resourcegroup>[^/]+)"
+                                 "/providers/[^/]+"
+                                 "/[^/]+Accounts/(?P<account>[^/]+)$")

 batch_account_name = os.environ.get("AZ_BATCH_ACCOUNT_NAME")
 batch_account_key = os.environ.get("BATCH_ACCOUNT_KEY")
@ -44,14 +45,14 @@ def get_blob_client() -> blob.BlockBlobService:
            account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix)
    else:
        credentials = ServicePrincipalCredentials(
-            client_id=client_id, secret=credential, tenant=tenant_id, resource='https://management.core.windows.net/')
+            client_id=client_id, secret=credential, tenant=tenant_id, resource="https://management.core.windows.net/")
        m = RESOURCE_ID_PATTERN.match(storage_resource_id)
-        accountname = m.group('account')
-        subscription = m.group('subscription')
-        resourcegroup = m.group('resourcegroup')
+        accountname = m.group("account")
+        subscription = m.group("subscription")
+        resourcegroup = m.group("resourcegroup")
        mgmt_client = StorageManagementClient(credentials, subscription)
-        key = mgmt_client.storage_accounts.list_keys(
-            resource_group_name=resourcegroup, account_name=accountname).keys[0].value
+        key = (mgmt_client.storage_accounts.list_keys(resource_group_name=resourcegroup, account_name=accountname)
+               .keys[0].value)
        storage_client = CloudStorageAccount(accountname, key)
        return storage_client.create_block_blob_service()

@ -62,13 +63,13 @@ def get_batch_client() -> batch.BatchServiceClient:
        credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key)
    else:
        credentials = ServicePrincipalCredentials(
-            client_id=client_id, secret=credential, tenant=tenant_id, resource='https://management.core.windows.net/')
+            client_id=client_id, secret=credential, tenant=tenant_id, resource="https://management.core.windows.net/")
        m = RESOURCE_ID_PATTERN.match(batch_resource_id)
-        batch_client = BatchManagementClient(credentials, m.group('subscription'))
-        account = batch_client.batch_account.get(m.group('resourcegroup'), m.group('account'))
-        base_url = 'https://%s/' % account.account_endpoint
+        batch_client = BatchManagementClient(credentials, m.group("subscription"))
+        account = batch_client.batch_account.get(m.group("resourcegroup"), m.group("account"))
+        base_url = "https://%s/" % account.account_endpoint
        credentials = ServicePrincipalCredentials(
-            client_id=client_id, secret=credential, tenant=tenant_id, resource='https://batch.core.windows.net/')
+            client_id=client_id, secret=credential, tenant=tenant_id, resource="https://batch.core.windows.net/")

    return batch.BatchServiceClient(credentials, base_url=base_url)

--- a/aztk/node_scripts/core/logger.py
+++ b/aztk/node_scripts/core/logger.py
@ -3,7 +3,7 @@ import logging

 log = logging.getLogger("aztk.node-agent")

-DEFAULT_FORMAT = '%(message)s'
+DEFAULT_FORMAT = "%(message)s"


 def setup_logging():
--- a/aztk/node_scripts/install/create_user.py
+++ b/aztk/node_scripts/install/create_user.py
@ -5,45 +5,47 @@ from Cryptodome.PublicKey import RSA
 from Cryptodome.Cipher import AES, PKCS1_OAEP
 from datetime import datetime, timezone, timedelta
 import yaml
-'''
+"""
    Creates a user if the user configuration file at $AZTK_WORKING_DIR/user.yaml exists
-'''
+"""


 def create_user(batch_client):
-    path = os.path.join(os.environ['AZTK_WORKING_DIR'], "user.yaml")
+    path = os.path.join(os.environ["AZTK_WORKING_DIR"], "user.yaml")

    if not os.path.isfile(path):
        print("No user to create.")
        return

-    with open(path, 'r', encoding='UTF-8') as file:
+    with open(path, "r", encoding="UTF-8") as file:
        user_conf = yaml.load(file.read())

    try:
-        password = None if user_conf['ssh-key'] else decrypt_password(user_conf)
+        password = None if user_conf["ssh-key"] else decrypt_password(user_conf)

        batch_client.compute_node.add_user(
-            pool_id=os.environ['AZ_BATCH_POOL_ID'],
-            node_id=os.environ['AZ_BATCH_NODE_ID'],
+            pool_id=os.environ["AZ_BATCH_POOL_ID"],
+            node_id=os.environ["AZ_BATCH_NODE_ID"],
            user=batch_models.ComputeNodeUser(
-                name=user_conf['username'],
+                name=user_conf["username"],
                is_admin=True,
                password=password,
-                ssh_public_key=str(user_conf['ssh-key']),
-                expiry_time=datetime.now(timezone.utc) + timedelta(days=365)))
+                ssh_public_key=str(user_conf["ssh-key"]),
+                expiry_time=datetime.now(timezone.utc) + timedelta(days=365),
+            ),
+        )
    except batch_error.BatchErrorException as e:
        print(e)


 def decrypt_password(user_conf):
-    cipher_text = user_conf['password']
-    encrypted_aes_session_key = user_conf['aes_session_key']
-    cipher_aes_nonce = user_conf['cipher_aes_nonce']
-    tag = user_conf['tag']
+    cipher_text = user_conf["password"]
+    encrypted_aes_session_key = user_conf["aes_session_key"]
+    cipher_aes_nonce = user_conf["cipher_aes_nonce"]
+    tag = user_conf["tag"]

    # Read private key
-    with open(os.path.join(os.environ['AZTK_WORKING_DIR'], 'id_rsa'), encoding='UTF-8') as f:
+    with open(os.path.join(os.environ["AZTK_WORKING_DIR"], "id_rsa"), encoding="UTF-8") as f:
        private_key = RSA.import_key(f.read())
    # Decrypt the session key with the public RSA key
    cipher_rsa = PKCS1_OAEP.new(private_key)
--- a/aztk/node_scripts/install/install.py
+++ b/aztk/node_scripts/install/install.py
@ -25,7 +25,7 @@ def setup_host(docker_repo: str, docker_run_options: str):
    client = config.batch_client

    create_user.create_user(batch_client=client)
-    if os.environ['AZ_BATCH_NODE_IS_DEDICATED'] == "true" or os.environ['AZTK_MIXED_MODE'] == "false":
+    if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ["AZTK_MIXED_MODE"] == "false":
        is_master = pick_master.find_master(client)
    else:
        is_master = False
@ -50,7 +50,7 @@ def setup_host(docker_repo: str, docker_run_options: str):

    setup_node_scheduling(client, cluster_conf, is_master)

-    #TODO pass azure file shares
+    # TODO pass azure file shares
    spark_container.start_spark_container(
        docker_repo=docker_repo,
        docker_run_options=docker_run_options,
@ -82,4 +82,4 @@ def setup_spark_container():

    plugins.setup_plugins(target=PluginTarget.SparkContainer, is_master=is_master, is_worker=is_worker)

-    open("/tmp/setup_complete", 'a').close()
+    open("/tmp/setup_complete", "a").close()
--- a/aztk/node_scripts/install/pick_master.py
+++ b/aztk/node_scripts/install/pick_master.py
@ -37,8 +37,8 @@ def try_assign_self_as_master(client: batch.BatchServiceClient, pool: batchmodel
        client.pool.patch(
            config.pool_id,
            batchmodels.PoolPatchParameter(metadata=new_metadata),
-            batchmodels.PoolPatchOptions(if_match=pool.e_tag,
-                                        ))
+            batchmodels.PoolPatchOptions(if_match=pool.e_tag),
+        )
        return True
    except (batcherror.BatchErrorException, ClientRequestError):
        print("Couldn't assign itself as master the pool because the pool was modified since last get.")
--- a/aztk/node_scripts/install/plugins.py
+++ b/aztk/node_scripts/install/plugins.py
@ -1,18 +1,19 @@
-import os
 import json
-import yaml
+import os
 import subprocess
-from pathlib import Path
+
+import yaml
+
 from aztk.models.plugins import PluginTarget, PluginTargetRole

-log_folder = os.path.join(os.environ['AZTK_WORKING_DIR'], 'logs', 'plugins')
+log_folder = os.path.join(os.environ["AZTK_WORKING_DIR"], "logs", "plugins")


 def _read_manifest_file(path=None):
    if not os.path.isfile(path):
        print("Plugins manifest file doesn't exist at {0}".format(path))
    else:
-        with open(path, 'r', encoding='UTF-8') as stream:
+        with open(path, "r", encoding="UTF-8") as stream:
            try:
                return yaml.load(stream)
            except json.JSONDecodeError as err:
@ -22,7 +23,7 @@ def _read_manifest_file(path=None):
 def setup_plugins(target: PluginTarget, is_master: bool = False, is_worker: bool = False):

    plugins_dir = _plugins_dir()
-    plugins_manifest = _read_manifest_file(os.path.join(plugins_dir, 'plugins-manifest.yaml'))
+    plugins_manifest = _read_manifest_file(os.path.join(plugins_dir, "plugins-manifest.yaml"))

    if not os.path.exists(log_folder):
        os.makedirs(log_folder)
@ -32,28 +33,41 @@ def setup_plugins(target: PluginTarget, is_master: bool = False, is_worker: bool


 def _plugins_dir():
-    return os.path.join(os.environ['AZTK_WORKING_DIR'], 'plugins')
+    return os.path.join(os.environ["AZTK_WORKING_DIR"], "plugins")


 def _run_on_this_node(plugin_obj, target: PluginTarget, is_master, is_worker):

-    print("Loading plugin {} in {} on {}".format(plugin_obj["execute"], plugin_obj['target'],
-                                                 plugin_obj['target_role']))
+    print("Loading plugin {} in {} on {}".format(plugin_obj["execute"], plugin_obj["target"],
+                                                 plugin_obj["target_role"]))

-    if plugin_obj['target'] != target.value:
-        print("Ignoring ", plugin_obj["execute"], "as target is for ", plugin_obj['target'],
-              "but is currently running in ", target.value)
+    if plugin_obj["target"] != target.value:
+        print(
+            "Ignoring ",
+            plugin_obj["execute"],
+            "as target is for ",
+            plugin_obj["target"],
+            "but is currently running in ",
+            target.value,
+        )
        return False

-    if plugin_obj['target_role'] == PluginTargetRole.Master.value and is_master is True:
+    if plugin_obj["target_role"] == PluginTargetRole.Master.value and is_master is True:
        return True
-    if plugin_obj['target_role'] == PluginTargetRole.Worker.value and is_worker is True:
+    if plugin_obj["target_role"] == PluginTargetRole.Worker.value and is_worker is True:
        return True
-    if plugin_obj['target_role'] == PluginTargetRole.All.value:
+    if plugin_obj["target_role"] == PluginTargetRole.All.value:
        return True

-    print("Ignoring plugin", plugin_obj["execute"], "as target role is ", plugin_obj['target_role'],
-          "and node is master: ", is_master, is_worker)
+    print(
+        "Ignoring plugin",
+        plugin_obj["execute"],
+        "as target role is ",
+        plugin_obj["target_role"],
+        "and node is master: ",
+        is_master,
+        is_worker,
+    )

    return False

@ -63,8 +77,8 @@ def _setup_plugins(plugins_manifest, target: PluginTarget, is_master, is_worker)

    for plugin in plugins_manifest:
        if _run_on_this_node(plugin, target, is_master, is_worker):
-            path = os.path.join(plugins_dir, plugin['execute'])
-            _run_script(plugin.get("name"), path, plugin.get('args'), plugin.get('env'))
+            path = os.path.join(plugins_dir, plugin["execute"])
+            _run_script(plugin.get("name"), path, plugin.get("args"), plugin.get("env"))


 def _run_script(name: str, script_path: str = None, args: dict = None, env: dict = None):
@ -84,7 +98,7 @@ def _run_script(name: str, script_path: str = None, args: dict = None, env: dict
    if args is None:
        args = []

-    out_file = open(os.path.join(log_folder, '{0}.txt'.format(name)), 'w', encoding='UTF-8')
+    out_file = open(os.path.join(log_folder, "{0}.txt".format(name)), "w", encoding="UTF-8")
    try:
        subprocess.call([script_path] + args, env=my_env, stdout=out_file, stderr=out_file)
        print("Finished running")
--- a/aztk/node_scripts/install/spark.py
+++ b/aztk/node_scripts/install/spark.py
@ -2,13 +2,14 @@
    Code that handle spark configuration
 """
 import datetime
-import time
 import os
-import json
 import shutil
-from subprocess import call, Popen, check_output
+import time
+from subprocess import call
 from typing import List
+
 import azure.batch.models as batchmodels
+
 from core import config
 from install import pick_master

@ -55,7 +56,7 @@ def setup_connection():
    master_node = get_node(master_node_id)

    master_config_file = os.path.join(spark_conf_folder, "master")
-    master_file = open(master_config_file, 'w', encoding='UTF-8')
+    master_file = open(master_config_file, "w", encoding="UTF-8")

    print("Adding master node ip {0} to config file '{1}'".format(master_node.ip_address, master_config_file))
    master_file.write("{0}\n".format(master_node.ip_address))
@ -127,9 +128,9 @@ def setup_conf():


 def setup_ssh_keys():
-    pub_key_path_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'id_rsa.pub')
-    priv_key_path_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'id_rsa')
-    ssh_key_dest = '/root/.ssh'
+    pub_key_path_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "id_rsa.pub")
+    priv_key_path_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "id_rsa")
+    ssh_key_dest = "/root/.ssh"

    if not os.path.exists(ssh_key_dest):
        os.mkdir(ssh_key_dest)
@ -139,27 +140,27 @@ def setup_ssh_keys():


 def copy_spark_env():
-    spark_env_path_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'conf/spark-env.sh')
-    spark_env_path_dest = os.path.join(spark_home, 'conf/spark-env.sh')
+    spark_env_path_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "conf/spark-env.sh")
+    spark_env_path_dest = os.path.join(spark_home, "conf/spark-env.sh")
    copyfile(spark_env_path_src, spark_env_path_dest)


 def copy_spark_defaults():
-    spark_default_path_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'conf/spark-defaults.conf')
-    spark_default_path_dest = os.path.join(spark_home, 'conf/spark-defaults.conf')
+    spark_default_path_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "conf/spark-defaults.conf")
+    spark_default_path_dest = os.path.join(spark_home, "conf/spark-defaults.conf")
    copyfile(spark_default_path_src, spark_default_path_dest)


 def copy_core_site():
-    spark_core_site_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'conf/core-site.xml')
-    spark_core_site_dest = os.path.join(spark_home, 'conf/core-site.xml')
+    spark_core_site_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "conf/core-site.xml")
+    spark_core_site_dest = os.path.join(spark_home, "conf/core-site.xml")
    copyfile(spark_core_site_src, spark_core_site_dest)


 def copy_jars():
    # Copy jars to $SPARK_HOME/jars
-    spark_default_path_src = os.path.join(os.environ['AZTK_WORKING_DIR'], 'jars')
-    spark_default_path_dest = os.path.join(spark_home, 'jars')
+    spark_default_path_src = os.path.join(os.environ["AZTK_WORKING_DIR"], "jars")
+    spark_default_path_dest = os.path.join(spark_home, "jars")

    try:
        jar_files = os.listdir(spark_default_path_src)
@ -175,10 +176,10 @@ def copy_jars():

 def parse_configuration_file(path_to_file: str):
    try:
-        file = open(path_to_file, 'r', encoding='UTF-8')
+        file = open(path_to_file, "r", encoding="UTF-8")
        properties = {}
        for line in file:
-            if (not line.startswith('#') and len(line) > 1):
+            if not line.startswith("#") and len(line) > 1:
                split = line.split()
                properties[split[0]] = split[1]
        return properties
@ -189,10 +190,10 @@ def parse_configuration_file(path_to_file: str):

 def start_history_server():
    # configure the history server
-    spark_event_log_enabled_key = 'spark.eventLog.enabled'
-    spark_event_log_directory_key = 'spark.eventLog.dir'
-    spark_history_fs_log_directory = 'spark.history.fs.logDirectory'
-    path_to_spark_defaults_conf = os.path.join(spark_home, 'conf/spark-defaults.conf')
+    spark_event_log_enabled_key = "spark.eventLog.enabled"
+    spark_event_log_directory_key = "spark.eventLog.dir"
+    spark_history_fs_log_directory = "spark.history.fs.logDirectory"
+    path_to_spark_defaults_conf = os.path.join(spark_home, "conf/spark-defaults.conf")
    properties = parse_configuration_file(path_to_spark_defaults_conf)
    required_keys = [spark_event_log_enabled_key, spark_event_log_directory_key, spark_history_fs_log_directory]

@ -208,17 +209,17 @@ def start_history_server():
 def configure_history_server_log_path(path_to_log_file):
    # Check if the file path starts with a local file extension
    # If so, create the path on disk otherwise ignore
-    print('Configuring spark history server log directory {}.'.format(path_to_log_file))
-    if path_to_log_file.startswith('file:/'):
+    print("Configuring spark history server log directory {}.".format(path_to_log_file))
+    if path_to_log_file.startswith("file:/"):
        # create the local path on disk
-        directory = path_to_log_file.replace('file:', '')
+        directory = path_to_log_file.replace("file:", "")
        if os.path.exists(directory):
-            print('Skipping. Directory {} already exists.'.format(directory))
+            print("Skipping. Directory {} already exists.".format(directory))
        else:
-            print('Create directory {}.'.format(directory))
+            print("Create directory {}.".format(directory))
            os.makedirs(directory)

            # Make sure the directory can be accessed by all users
            os.chmod(directory, mode=0o777)
    else:
-        print('Skipping. The eventLog directory is not local.')
+        print("Skipping. The eventLog directory is not local.")
--- a/aztk/node_scripts/install/spark_container.py
+++ b/aztk/node_scripts/install/spark_container.py
@ -15,42 +15,43 @@ def start_spark_container(docker_repo: str = None,
        docker_repo=docker_repo,
        docker_run_options=docker_run_options,
        cmd="/bin/bash /mnt/batch/tasks/startup/wd/aztk/node_scripts/docker_main.sh",
-        gpu_enabled=gpu_enabled)
+        gpu_enabled=gpu_enabled,
+    )

    if file_mounts:
        for mount in file_mounts:
            cmd.share_folder(mount.mount_path)
-    cmd.share_folder('/mnt')
+    cmd.share_folder("/mnt")

-    cmd.pass_env('AZTK_WORKING_DIR')
-    cmd.pass_env('AZ_BATCH_ACCOUNT_NAME')
-    cmd.pass_env('BATCH_ACCOUNT_KEY')
-    cmd.pass_env('BATCH_SERVICE_URL')
-    cmd.pass_env('STORAGE_ACCOUNT_NAME')
-    cmd.pass_env('STORAGE_ACCOUNT_KEY')
-    cmd.pass_env('STORAGE_ACCOUNT_SUFFIX')
+    cmd.pass_env("AZTK_WORKING_DIR")
+    cmd.pass_env("AZ_BATCH_ACCOUNT_NAME")
+    cmd.pass_env("BATCH_ACCOUNT_KEY")
+    cmd.pass_env("BATCH_SERVICE_URL")
+    cmd.pass_env("STORAGE_ACCOUNT_NAME")
+    cmd.pass_env("STORAGE_ACCOUNT_KEY")
+    cmd.pass_env("STORAGE_ACCOUNT_SUFFIX")

-    cmd.pass_env('SP_TENANT_ID')
-    cmd.pass_env('SP_CLIENT_ID')
-    cmd.pass_env('SP_CREDENTIAL')
-    cmd.pass_env('SP_BATCH_RESOURCE_ID')
-    cmd.pass_env('SP_STORAGE_RESOURCE_ID')
+    cmd.pass_env("SP_TENANT_ID")
+    cmd.pass_env("SP_CLIENT_ID")
+    cmd.pass_env("SP_CREDENTIAL")
+    cmd.pass_env("SP_BATCH_RESOURCE_ID")
+    cmd.pass_env("SP_STORAGE_RESOURCE_ID")

-    cmd.pass_env('AZ_BATCH_POOL_ID')
-    cmd.pass_env('AZ_BATCH_NODE_ID')
-    cmd.pass_env('AZ_BATCH_NODE_IS_DEDICATED')
+    cmd.pass_env("AZ_BATCH_POOL_ID")
+    cmd.pass_env("AZ_BATCH_NODE_ID")
+    cmd.pass_env("AZ_BATCH_NODE_IS_DEDICATED")

-    cmd.pass_env('AZTK_WORKER_ON_MASTER')
-    cmd.pass_env('AZTK_MIXED_MODE')
-    cmd.pass_env('AZTK_IS_MASTER')
-    cmd.pass_env('AZTK_IS_WORKER')
-    cmd.pass_env('AZTK_MASTER_IP')
+    cmd.pass_env("AZTK_WORKER_ON_MASTER")
+    cmd.pass_env("AZTK_MIXED_MODE")
+    cmd.pass_env("AZTK_IS_MASTER")
+    cmd.pass_env("AZTK_IS_WORKER")
+    cmd.pass_env("AZTK_MASTER_IP")

-    cmd.pass_env('SPARK_WEB_UI_PORT')
-    cmd.pass_env('SPARK_WORKER_UI_PORT')
-    cmd.pass_env('SPARK_CONTAINER_NAME')
-    cmd.pass_env('SPARK_SUBMIT_LOGS_FILE')
-    cmd.pass_env('SPARK_JOB_UI_PORT')
+    cmd.pass_env("SPARK_WEB_UI_PORT")
+    cmd.pass_env("SPARK_WORKER_UI_PORT")
+    cmd.pass_env("SPARK_CONTAINER_NAME")
+    cmd.pass_env("SPARK_SUBMIT_LOGS_FILE")
+    cmd.pass_env("SPARK_JOB_UI_PORT")

    cmd.open_port(8080)    # Spark Master UI
    cmd.open_port(7077)    # Spark Master
@ -69,5 +70,5 @@ def start_spark_container(docker_repo: str = None,
    print("-" * 60)
    print(cmd.to_str())
    print("=" * 60)
-    subprocess.call(['/bin/bash', '-c', 'echo Is master?: $AZTK_IS_MASTER _ $AZTK_IS_WORKER'])
-    subprocess.call(['/bin/bash', '-c', cmd.to_str()])
+    subprocess.call(["/bin/bash", "-c", "echo Is master?: $AZTK_IS_MASTER _ $AZTK_IS_WORKER"])
+    subprocess.call(["/bin/bash", "-c", cmd.to_str()])
--- a/aztk/node_scripts/job_submission.py
+++ b/aztk/node_scripts/job_submission.py
@ -1,12 +1,8 @@
-import datetime
 import os
-import subprocess
-import sys
-from typing import List
+
 import azure.batch.models as batch_models
-import azure.storage.blob as blob
 import yaml
-from aztk.utils.command_builder import CommandBuilder
+
 from core import config
 from install.pick_master import get_master_node_id

@ -20,14 +16,13 @@ def affinitize_task_to_master(batch_client, cluster_id, task):


 def schedule_tasks(tasks_path):
-    '''
+    """
        Handle the request to submit a task
-    '''
+    """
    batch_client = config.batch_client
-    blob_client = config.blob_client

    for task_definition in tasks_path:
-        with open(task_definition, 'r', encoding='UTF-8') as stream:
+        with open(task_definition, "r", encoding="UTF-8") as stream:
            try:
                task = yaml.load(stream)
            except yaml.YAMLError as exc:
@ -36,13 +31,13 @@ def schedule_tasks(tasks_path):
        # affinitize task to master
        task = affinitize_task_to_master(batch_client, os.environ["AZ_BATCH_POOL_ID"], task)
        # schedule the task
-        batch_client.task.add(job_id=os.environ['AZ_BATCH_JOB_ID'], task=task)
+        batch_client.task.add(job_id=os.environ["AZ_BATCH_JOB_ID"], task=task)


 if __name__ == "__main__":
    tasks_path = []
-    for file in os.listdir(os.environ['AZ_BATCH_TASK_WORKING_DIR']):
+    for file in os.listdir(os.environ["AZ_BATCH_TASK_WORKING_DIR"]):
        if file.endswith(".yaml"):
-            tasks_path.append(os.path.join(os.environ['AZ_BATCH_TASK_WORKING_DIR'], file))
+            tasks_path.append(os.path.join(os.environ["AZ_BATCH_TASK_WORKING_DIR"], file))

    schedule_tasks(tasks_path)
--- a/aztk/node_scripts/setup_host.sh
+++ b/aztk/node_scripts/setup_host.sh
@ -42,7 +42,9 @@ install_prerequisites () {

 install_docker_compose () {
    echo "Installing Docker-Compose"
-    sudo curl -L https://github.com/docker/compose/releases/download/1.19.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose
+    for i in {1..5}; do 
+        sudo curl -L https://github.com/docker/compose/releases/download/1.19.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose && break || sleep 2; 
+    done
    sudo chmod +x /usr/local/bin/docker-compose
    echo "Finished installing Docker-Compose"
 }
@ -64,9 +66,9 @@ pull_docker_container () {
 install_python_dependencies () {
    echo "Installing python dependencies"
    pipenv install --python /usr/bin/python3.5m
-    pipenv run pip install --upgrade setuptools wheel #TODO: add pip when pipenv is compatible with pip10
+    pipenv run pip install --upgrade pip setuptools wheel
+    pip --version
    echo "Finished installing python dependencies"
-
 }

 run_docker_container () {
--- a/aztk/node_scripts/submit.py
+++ b/aztk/node_scripts/submit.py
@ -1,20 +1,22 @@
-import sys
-import os
-import logging
-import yaml
-import subprocess
 import datetime
+import logging
+import os
+import subprocess
+import sys
 from typing import List
-import azure.storage.blob as blob
+
 import azure.batch.models as batch_models
+import azure.storage.blob as blob
+import yaml
+
 from aztk.utils.command_builder import CommandBuilder
 from core import config

 # limit azure.storage logging
 logging.getLogger("azure.storage").setLevel(logging.CRITICAL)
-'''
+"""
 Submit helper methods
-'''
+"""


 def upload_file_to_container(container_name,
@ -40,7 +42,7 @@ def upload_file_to_container(container_name,
        blob_name = file_path.strip("/")
    else:
        blob_name = os.path.basename(file_path)
-        blob_path = application_name + '/' + blob_name
+        blob_path = application_name + "/" + blob_name

    if not node_path:
        node_path = blob_name
@ -53,47 +55,60 @@ def upload_file_to_container(container_name,
        container_name,
        blob_path,
        permission=blob.BlobPermissions.READ,
-        expiry=datetime.datetime.utcnow() + datetime.timedelta(days=7))
+        expiry=datetime.datetime.utcnow() + datetime.timedelta(days=7),
+    )

    sas_url = blob_client.make_blob_url(container_name, blob_path, sas_token=sas_token)

    return batch_models.ResourceFile(file_path=node_path, blob_source=sas_url)


-def __app_submit_cmd(name: str, app: str, app_args: List[str], main_class: str, jars: List[str], py_files: List[str],
-                     files: List[str], driver_java_options: str, driver_library_path: str, driver_class_path: str,
-                     driver_memory: str, executor_memory: str, driver_cores: int, executor_cores: int):
-    cluster_id = os.environ['AZ_BATCH_POOL_ID']
-    spark_home = os.environ['SPARK_HOME']
-    with open(os.path.join(spark_home, 'conf', 'master')) as f:
+def __app_submit_cmd(
+        name: str,
+        app: str,
+        app_args: List[str],
+        main_class: str,
+        jars: List[str],
+        py_files: List[str],
+        files: List[str],
+        driver_java_options: str,
+        driver_library_path: str,
+        driver_class_path: str,
+        driver_memory: str,
+        executor_memory: str,
+        driver_cores: int,
+        executor_cores: int,
+):
+    spark_home = os.environ["SPARK_HOME"]
+    with open(os.path.join(spark_home, "conf", "master")) as f:
        master_ip = f.read().rstrip()

    # set file paths to correct path on container
-    files_path = os.environ['AZ_BATCH_TASK_WORKING_DIR']
+    files_path = os.environ["AZ_BATCH_TASK_WORKING_DIR"]
    jars = [os.path.join(files_path, os.path.basename(jar)) for jar in jars]
    py_files = [os.path.join(files_path, os.path.basename(py_file)) for py_file in py_files]
    files = [os.path.join(files_path, os.path.basename(f)) for f in files]

    # 2>&1 redirect stdout and stderr to be in the same file
-    spark_submit_cmd = CommandBuilder('{0}/bin/spark-submit'.format(spark_home))
-    spark_submit_cmd.add_option('--master', 'spark://{0}:7077'.format(master_ip))
-    spark_submit_cmd.add_option('--name', name)
-    spark_submit_cmd.add_option('--class', main_class)
-    spark_submit_cmd.add_option('--jars', jars and ','.join(jars))
-    spark_submit_cmd.add_option('--py-files', py_files and ','.join(py_files))
-    spark_submit_cmd.add_option('--files', files and ','.join(files))
-    spark_submit_cmd.add_option('--driver-java-options', driver_java_options)
-    spark_submit_cmd.add_option('--driver-library-path', driver_library_path)
-    spark_submit_cmd.add_option('--driver-class-path', driver_class_path)
-    spark_submit_cmd.add_option('--driver-memory', driver_memory)
-    spark_submit_cmd.add_option('--executor-memory', executor_memory)
+    spark_submit_cmd = CommandBuilder("{0}/bin/spark-submit".format(spark_home))
+    spark_submit_cmd.add_option("--master", "spark://{0}:7077".format(master_ip))
+    spark_submit_cmd.add_option("--name", name)
+    spark_submit_cmd.add_option("--class", main_class)
+    spark_submit_cmd.add_option("--jars", jars and ",".join(jars))
+    spark_submit_cmd.add_option("--py-files", py_files and ",".join(py_files))
+    spark_submit_cmd.add_option("--files", files and ",".join(files))
+    spark_submit_cmd.add_option("--driver-java-options", driver_java_options)
+    spark_submit_cmd.add_option("--driver-library-path", driver_library_path)
+    spark_submit_cmd.add_option("--driver-class-path", driver_class_path)
+    spark_submit_cmd.add_option("--driver-memory", driver_memory)
+    spark_submit_cmd.add_option("--executor-memory", executor_memory)
    if driver_cores:
-        spark_submit_cmd.add_option('--driver-cores', str(driver_cores))
+        spark_submit_cmd.add_option("--driver-cores", str(driver_cores))
    if executor_cores:
-        spark_submit_cmd.add_option('--executor-cores', str(executor_cores))
+        spark_submit_cmd.add_option("--executor-cores", str(executor_cores))

    spark_submit_cmd.add_argument(
-        os.path.expandvars(app) + ' ' + ' '.join(['\'' + str(app_arg) + '\'' for app_arg in (app_args or [])]))
+        os.path.expandvars(app) + " " + " ".join(["'" + str(app_arg) + "'" for app_arg in (app_args or [])]))

    with open("spark-submit.txt", mode="w", encoding="UTF-8") as stream:
        stream.write(spark_submit_cmd.to_str())
@ -102,50 +117,51 @@ def __app_submit_cmd(name: str, app: str, app_args: List[str], main_class: str,


 def load_application(application_file_path):
-    '''
+    """
        Read and parse the application from file
-    '''
-    with open(application_file_path, encoding='UTF-8') as f:
+    """
+    with open(application_file_path, encoding="UTF-8") as f:
        application = yaml.load(f)
    return application


 def upload_log(blob_client, application):
-    '''
+    """
        upload output.log to storage account
-    '''
-    log_file = os.path.join(os.environ['AZ_BATCH_TASK_WORKING_DIR'], os.environ['SPARK_SUBMIT_LOGS_FILE'])
+    """
+    log_file = os.path.join(os.environ["AZ_BATCH_TASK_WORKING_DIR"], os.environ["SPARK_SUBMIT_LOGS_FILE"])
    upload_file_to_container(
-        container_name=os.environ['STORAGE_LOGS_CONTAINER'],
-        application_name=application['name'],
+        container_name=os.environ["STORAGE_LOGS_CONTAINER"],
+        application_name=application["name"],
        file_path=log_file,
        blob_client=blob_client,
-        use_full_path=False)
+        use_full_path=False,
+    )


 def receive_submit_request(application_file_path):
-    '''
+    """
        Handle the request to submit a task
-    '''
-    batch_client = config.batch_client
+    """
    blob_client = config.blob_client
    application = load_application(application_file_path)

    cmd = __app_submit_cmd(
-        name=application['name'],
-        app=application['application'],
-        app_args=application['application_args'],
-        main_class=application['main_class'],
-        jars=application['jars'],
-        py_files=application['py_files'],
-        files=application['files'],
-        driver_java_options=application['driver_java_options'],
-        driver_library_path=application['driver_library_path'],
-        driver_class_path=application['driver_class_path'],
-        driver_memory=application['driver_memory'],
-        executor_memory=application['executor_memory'],
-        driver_cores=application['driver_cores'],
-        executor_cores=application['executor_cores'])
+        name=application["name"],
+        app=application["application"],
+        app_args=application["application_args"],
+        main_class=application["main_class"],
+        jars=application["jars"],
+        py_files=application["py_files"],
+        files=application["files"],
+        driver_java_options=application["driver_java_options"],
+        driver_library_path=application["driver_library_path"],
+        driver_class_path=application["driver_class_path"],
+        driver_memory=application["driver_memory"],
+        executor_memory=application["executor_memory"],
+        driver_cores=application["driver_cores"],
+        executor_cores=application["executor_cores"],
+    )

    return_code = subprocess.call(cmd.to_str(), shell=True)
    upload_log(blob_client, application)
@ -157,24 +173,25 @@ def upload_error_log(error, application_file_path):
    blob_client = config.blob_client

    error_log_path = os.path.join(os.environ["AZ_BATCH_TASK_WORKING_DIR"], "error.log")
-    with open(error_log_path, "w", encoding='UTF-8') as error_log:
+    with open(error_log_path, "w", encoding="UTF-8") as error_log:
        error_log.write(error)

    upload_file_to_container(
-        container_name=os.environ['STORAGE_LOGS_CONTAINER'],
-        application_name=application['name'],
+        container_name=os.environ["STORAGE_LOGS_CONTAINER"],
+        application_name=application["name"],
        file_path=os.path.realpath(error_log.name),
        blob_client=blob_client,
-        use_full_path=False)
+        use_full_path=False,
+    )
    upload_log(blob_client, application)


 if __name__ == "__main__":
    return_code = 1
    try:
-        return_code = receive_submit_request(os.path.join(os.environ['AZ_BATCH_TASK_WORKING_DIR'], 'application.yaml'))
+        return_code = receive_submit_request(os.path.join(os.environ["AZ_BATCH_TASK_WORKING_DIR"], "application.yaml"))
    except Exception as e:
-        upload_error_log(str(e), os.path.join(os.environ['AZ_BATCH_TASK_WORKING_DIR'], 'application.yaml'))
+        upload_error_log(str(e), os.path.join(os.environ["AZ_BATCH_TASK_WORKING_DIR"], "application.yaml"))

    # force batch task exit code to match spark exit code
    sys.exit(return_code)
--- a/aztk/node_scripts/wait_until_setup_complete.py
+++ b/aztk/node_scripts/wait_until_setup_complete.py
@ -1,8 +1,8 @@
 import time
 import os

-while not os.path.exists('/tmp/setup_complete'):
+while not os.path.exists("/tmp/setup_complete"):
    time.sleep(1)

 print("SETUP FINISHED")
-os.remove('/tmp/setup_complete')
+os.remove("/tmp/setup_complete")
--- a/aztk/spark/client/base/helpers/generate_application_task.py
+++ b/aztk/spark/client/base/helpers/generate_application_task.py
@ -17,12 +17,13 @@ def generate_application_task(core_base_operations, container_id, application, r
            application_name=application.name,
            file_path=application.application,
            blob_client=core_base_operations.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )

        # Upload application file
        resource_files.append(app_resource_file)

-        application.application = '$AZ_BATCH_TASK_WORKING_DIR/' + os.path.basename(application.application)
+        application.application = "$AZ_BATCH_TASK_WORKING_DIR/" + os.path.basename(application.application)

    # Upload dependent JARS
    jar_resource_file_paths = []
@ -32,7 +33,8 @@ def generate_application_task(core_base_operations, container_id, application, r
            application_name=application.name,
            file_path=jar,
            blob_client=core_base_operations.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

@ -44,7 +46,8 @@ def generate_application_task(core_base_operations, container_id, application, r
            application_name=application.name,
            file_path=py_file,
            blob_client=core_base_operations.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        py_files_resource_file_paths.append(current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

@ -56,7 +59,8 @@ def generate_application_task(core_base_operations, container_id, application, r
            application_name=application.name,
            file_path=file,
            blob_client=core_base_operations.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

@ -67,21 +71,23 @@ def generate_application_task(core_base_operations, container_id, application, r
    application_definition_file = helpers.upload_text_to_container(
        container_name=container_id,
        application_name=application.name,
-        file_path='application.yaml',
+        file_path="application.yaml",
        content=yaml.dump(vars(application)),
-        blob_client=core_base_operations.blob_client)
+        blob_client=core_base_operations.blob_client,
+    )
    resource_files.append(application_definition_file)

    # create command to submit task
-    task_cmd = CommandBuilder('sudo docker exec')
-    task_cmd.add_argument('-i')
-    task_cmd.add_option('-e', 'AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR')
-    task_cmd.add_option('-e', 'STORAGE_LOGS_CONTAINER={0}'.format(container_id))
-    task_cmd.add_argument('spark /bin/bash >> output.log 2>&1')
-    task_cmd.add_argument('-c "source ~/.bashrc; ' \
-                          'export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; ' \
-                          'cd \$AZ_BATCH_TASK_WORKING_DIR; ' \
-                          '\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"')
+    task_cmd = CommandBuilder("sudo docker exec")
+    task_cmd.add_argument("-i")
+    task_cmd.add_option("-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
+    task_cmd.add_option("-e", "STORAGE_LOGS_CONTAINER={0}".format(container_id))
+    task_cmd.add_argument("spark /bin/bash >> output.log 2>&1")
+    task_cmd.add_argument(
+        r'-c "source ~/.bashrc; '
+        r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; "
+        r"cd \$AZ_BATCH_TASK_WORKING_DIR; "
+        r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"')

    # Create task
    task = batch_models.TaskAddParameter(
@ -91,6 +97,7 @@ def generate_application_task(core_base_operations, container_id, application, r
        constraints=batch_models.TaskConstraints(max_task_retry_count=application.max_retry_count),
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
-                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
+                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
+    )

    return task
--- a/aztk/spark/client/base/helpers/generate_cluster_start_task.py
+++ b/aztk/spark/client/base/helpers/generate_cluster_start_task.py
@ -1,14 +1,9 @@
 from typing import List

 import azure.batch.models as batch_models
-import azure.batch.models.batch_error as batch_error

-from aztk import error
-from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
-from aztk.spark.utils import util
 from aztk.utils import constants, helpers
-from aztk.spark import models

 POOL_ADMIN_USER_IDENTITY = batch_models.UserIdentity(
    auto_user=batch_models.AutoUserSpecification(
@ -60,14 +55,13 @@ def __get_secrets_env(core_base_operations):
        ]


-def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile,
-                          gpu_enabled: bool,
-                          docker_repo: str = None,
-                          docker_run_options: str = None,
-                          plugins=None,
-                          worker_on_master: bool = True,
-                          file_mounts=None,
-                          mixed_mode: bool = False):
+def __cluster_install_cmd(
+        zip_resource_file: batch_models.ResourceFile,
+        gpu_enabled: bool,
+        docker_repo: str = None,
+        docker_run_options: str = None,
+        file_mounts=None,
+):
    """
        For Docker on ubuntu 16.04 - return the command line
        to be run on the start task of the pool to setup spark.
@ -80,41 +74,42 @@ def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile,
    if file_mounts:
        for mount in file_mounts:
            # Create the directory on the node
-            shares.append('mkdir -p {0}'.format(mount.mount_path))
+            shares.append("mkdir -p {0}".format(mount.mount_path))

            # Mount the file share
            shares.append(
-                'mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp'.
+                "mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp".
                format(mount.storage_account_name, mount.storage_account_key, mount.file_share_path, mount.mount_path))

    setup = [
-        'time('\
-            'apt-get -y update;'\
-            'apt-get -y --no-install-recommends install unzip;'\
-            'unzip -o $AZ_BATCH_TASK_WORKING_DIR/{0};'\
-            'chmod 777 $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh;'\
-        ') 2>&1'.format(zip_resource_file.file_path),
+        "time("
+        "apt-get -y update;"
+        "apt-get -y --no-install-recommends install unzip;"
+        "unzip -o $AZ_BATCH_TASK_WORKING_DIR/{0};"
+        "chmod 777 $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh;"
+        ") 2>&1".format(zip_resource_file.file_path),
        '/bin/bash $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh {0} {1} "{2}"'.format(
            constants.DOCKER_SPARK_CONTAINER_NAME,
            docker_repo,
-            "" if docker_run_options is None else docker_run_options.replace('"', '\\\"')
-        )
+            "" if docker_run_options is None else docker_run_options.replace('"', '\\"'),
+        ),
    ]

    commands = shares + setup
    return commands


-def generate_cluster_start_task(core_base_operations,
-                                zip_resource_file: batch_models.ResourceFile,
-                                cluster_id: str,
-                                gpu_enabled: bool,
-                                docker_repo: str = None,
-                                docker_run_options: str = None,
-                                file_shares: List[models.FileShare] = None,
-                                plugins: List[models.PluginConfiguration] = None,
-                                mixed_mode: bool = False,
-                                worker_on_master: bool = True):
+def generate_cluster_start_task(
+        core_base_operations,
+        zip_resource_file: batch_models.ResourceFile,
+        cluster_id: str,
+        gpu_enabled: bool,
+        docker_repo: str = None,
+        docker_run_options: str = None,
+        file_shares: List[models.FileShare] = None,
+        mixed_mode: bool = False,
+        worker_on_master: bool = True,
+):
    """
        This will return the start task object for the pool to be created.
        :param cluster_id str: Id of the cluster(Used for uploading the resource files)
@ -130,22 +125,23 @@ def generate_cluster_start_task(core_base_operations,
    spark_submit_logs_file = constants.SPARK_SUBMIT_LOGS_FILE

    # TODO use certificate
-    environment_settings = __get_secrets_env(core_base_operations) + [
+    environment_settings = (__get_secrets_env(core_base_operations) + [
        batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT", value=spark_web_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT", value=spark_job_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME", value=spark_container_name),
        batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file),
        batch_models.EnvironmentSetting(name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)),
-    ] + __get_docker_credentials(core_base_operations) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode)
+    ] + __get_docker_credentials(core_base_operations) + _get_aztk_environment(cluster_id, worker_on_master,
+                                                                               mixed_mode))

    # start task command
-    command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, docker_run_options, plugins,
-                                    worker_on_master, file_shares, mixed_mode)
+    command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, docker_run_options, file_shares)

    return batch_models.StartTask(
        command_line=helpers.wrap_commands_in_shell(command),
        resource_files=resource_files,
        environment_settings=environment_settings,
        user_identity=POOL_ADMIN_USER_IDENTITY,
-        wait_for_success=True)
+        wait_for_success=True,
+    )
--- a/aztk/spark/client/base/operations.py
+++ b/aztk/spark/client/base/operations.py
@ -2,7 +2,6 @@ from typing import List

 import azure.batch.models as batch_models

-from aztk.client.base import BaseOperations as CoreBaseOperations
 from aztk.spark import models

 from .helpers import generate_application_task, generate_cluster_start_task
@ -12,18 +11,19 @@ class SparkBaseOperations:
    """Spark Base operations object that all other Spark operations objects inherit from
    """

-    #TODO: make this private or otherwise not public
-    def _generate_cluster_start_task(self,
-                                     core_base_operations,
-                                     zip_resource_file: batch_models.ResourceFile,
-                                     id: str,
-                                     gpu_enabled: bool,
-                                     docker_repo: str = None,
-                                     docker_run_options: str = None,
-                                     file_shares: List[models.FileShare] = None,
-                                     plugins: List[models.PluginConfiguration] = None,
-                                     mixed_mode: bool = False,
-                                     worker_on_master: bool = True):
+    # TODO: make this private or otherwise not public
+    def _generate_cluster_start_task(
+            self,
+            core_base_operations,
+            zip_resource_file: batch_models.ResourceFile,
+            id: str,
+            gpu_enabled: bool,
+            docker_repo: str = None,
+            docker_run_options: str = None,
+            file_shares: List[models.FileShare] = None,
+            mixed_mode: bool = False,
+            worker_on_master: bool = True,
+    ):
        """Generate the Azure Batch Start Task to provision a Spark cluster.

        Args:
@ -35,10 +35,8 @@ class SparkBaseOperations:
                If None, the default Docker image will be used. Defaults to None.
            file_shares (:obj:`aztk.spark.models.FileShare`, optional): a list of FileShares to mount on the cluster.
                Defaults to None.
-            plugins (:obj:`aztk.spark.models.PluginConfiguration`, optional): a list of plugins to set up on the cluster.
-                Defaults to None.
-            mixed_mode (:obj:`bool`, optional): If True, the cluster is configured to use both dedicated and low priority VMs.
-                Defaults to False.
+            mixed_mode (:obj:`bool`, optional): If True, the cluster is configured to use both dedicated
+                and low priority VMs. Defaults to False.
            worker_on_master (:obj:`bool`, optional): If True, the cluster is configured to provision a Spark worker
                on the VM that runs the Spark master. Defaults to True.

@ -46,10 +44,18 @@ class SparkBaseOperations:
            :obj:`azure.batch.models.StartTask`: the StartTask definition to provision the cluster.
        """
        return generate_cluster_start_task.generate_cluster_start_task(
-            core_base_operations, zip_resource_file, id, gpu_enabled, docker_repo, docker_run_options, file_shares,
-            plugins, mixed_mode, worker_on_master)
+            core_base_operations,
+            zip_resource_file,
+            id,
+            gpu_enabled,
+            docker_repo,
+            docker_run_options,
+            file_shares,
+            mixed_mode,
+            worker_on_master,
+        )

-    #TODO: make this private or otherwise not public
+    # TODO: make this private or otherwise not public
    def _generate_application_task(self, core_base_operations, container_id, application, remote=False):
        """Generate the Azure Batch Start Task to provision a Spark cluster.

--- a/aztk/spark/client/client.py
+++ b/aztk/spark/client/client.py
@ -2,21 +2,15 @@ from typing import List

 import azure.batch.models.batch_error as batch_error

-import aztk
 from aztk import error
 from aztk import models as base_models
 from aztk.client import CoreClient
-from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.spark.client.cluster import ClusterOperations
 from aztk.spark.client.job import JobOperations
-from aztk.spark.helpers import cluster_diagnostic_helper
-from aztk.spark.helpers import create_cluster as create_cluster_helper
-from aztk.spark.helpers import get_log as get_log_helper
 from aztk.spark.helpers import job_submission as job_submit_helper
-from aztk.spark.helpers import submit as cluster_submit_helper
 from aztk.spark.utils import util
-from aztk.utils import azure_api, deprecated, deprecate, helpers
+from aztk.utils import deprecate, deprecated, helpers


 class Client(CoreClient):
@ -28,13 +22,14 @@ class Client(CoreClient):
    """

    def __init__(self, secrets_configuration: models.SecretsConfiguration = None, **kwargs):
-        self.secrets_configuration = None
+        super().__init__()
        context = None
        if kwargs.get("secrets_config"):
            deprecate(
                version="0.10.0",
                message="secrets_config key is deprecated in secrets.yaml",
-                advice="Please use secrets_configuration key instead.")
+                advice="Please use secrets_configuration key instead.",
+            )
            context = self._get_context(kwargs.get("secrets_config"))
        else:
            context = self._get_context(secrets_configuration)
@ -133,36 +128,42 @@ class Client(CoreClient):
            id=cluster_id, node_id=node_id, command=command, host=host, internal=internal, timeout=timeout)

    @deprecated("0.10.0")
-    def cluster_copy(self,
-                     cluster_id: str,
-                     source_path: str,
-                     destination_path: str,
-                     host: bool = False,
-                     internal: bool = False,
-                     timeout: int = None):
+    def cluster_copy(
+            self,
+            cluster_id: str,
+            source_path: str,
+            destination_path: str,
+            host: bool = False,
+            internal: bool = False,
+            timeout: int = None,
+    ):
        return self.cluster.copy(
            id=cluster_id,
            source_path=source_path,
            destination_path=destination_path,
            host=host,
            internal=internal,
-            timeout=timeout)
+            timeout=timeout,
+        )

    @deprecated("0.10.0")
-    def cluster_download(self,
-                         cluster_id: str,
-                         source_path: str,
-                         destination_path: str = None,
-                         host: bool = False,
-                         internal: bool = False,
-                         timeout: int = None):
+    def cluster_download(
+            self,
+            cluster_id: str,
+            source_path: str,
+            destination_path: str = None,
+            host: bool = False,
+            internal: bool = False,
+            timeout: int = None,
+    ):
        return self.cluster.download(
            id=cluster_id,
            source_path=source_path,
            destination_path=destination_path,
            host=host,
            internal=internal,
-            timeout=timeout)
+            timeout=timeout,
+        )

    @deprecated("0.10.0")
    def cluster_ssh_into_master(self,
@ -176,9 +177,9 @@ class Client(CoreClient):
        return self.cluster._core_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password,
                                                                   port_forward_list, internal)

-    '''
+    """
        job submission
-    '''
+    """

    @deprecated("0.10.0")
    def submit_job(self, job_configuration: models.JobConfiguration):
--- a/aztk/spark/client/cluster/helpers/copy.py
+++ b/aztk/spark/client/cluster/helpers/copy.py
@ -4,15 +4,17 @@ from aztk import error
 from aztk.utils import helpers


-def cluster_copy(core_cluster_operations,
-                 cluster_id: str,
-                 source_path: str,
-                 destination_path: str,
-                 host: bool = False,
-                 internal: bool = False,
-                 timeout: int = None):
+def cluster_copy(
+        core_cluster_operations,
+        cluster_id: str,
+        source_path: str,
+        destination_path: str,
+        host: bool = False,
+        internal: bool = False,
+        timeout: int = None,
+):
    try:
-        container_name = None if host else 'spark'
+        container_name = None if host else "spark"
        return core_cluster_operations.copy(
            cluster_id,
            source_path,
@ -20,6 +22,7 @@ def cluster_copy(core_cluster_operations,
            container_name=container_name,
            get=False,
            internal=internal,
-            timeout=timeout)
+            timeout=timeout,
+        )
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/create.py
+++ b/aztk/spark/client/cluster/helpers/create.py
@ -52,9 +52,16 @@ def create_cluster(core_cluster_operations,
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

        start_task = spark_cluster_operations._generate_cluster_start_task(
-            core_cluster_operations, zip_resource_files, cluster_conf.cluster_id, cluster_conf.gpu_enabled(),
-            cluster_conf.get_docker_repo(), cluster_conf.get_docker_run_options(), cluster_conf.file_shares,
-            cluster_conf.plugins, cluster_conf.mixed_mode(), cluster_conf.worker_on_master)
+            core_cluster_operations,
+            zip_resource_files,
+            cluster_conf.cluster_id,
+            cluster_conf.gpu_enabled(),
+            cluster_conf.get_docker_repo(),
+            cluster_conf.get_docker_run_options(),
+            cluster_conf.file_shares,
+            cluster_conf.mixed_mode(),
+            cluster_conf.worker_on_master,
+        )

        software_metadata_key = base_models.Software.spark

--- a/aztk/spark/client/cluster/helpers/create_user.py
+++ b/aztk/spark/client/cluster/helpers/create_user.py
@ -4,12 +4,14 @@ from aztk import error
 from aztk.utils import helpers


-def create_user(core_cluster_operations,
-                spark_cluster_operations,
-                cluster_id: str,
-                username: str,
-                password: str = None,
-                ssh_key: str = None) -> str:
+def create_user(
+        core_cluster_operations,
+        spark_cluster_operations,
+        cluster_id: str,
+        username: str,
+        password: str = None,
+        ssh_key: str = None,
+) -> str:
    try:
        cluster = spark_cluster_operations.get(cluster_id)
        master_node_id = cluster.master_node_id
--- a/aztk/spark/client/cluster/helpers/diagnostics.py
+++ b/aztk/spark/client/cluster/helpers/diagnostics.py
@ -6,18 +6,13 @@ from aztk import error
 from aztk.utils import helpers


-def _write_error(stream, node_output):
-    stream.write(node_output.error)
-
-
-def _write_output(stream, node_output):
-    stream.write(node_output.output)
-
-
 def _run(spark_cluster_operations, cluster_id, output_directory=None, brief=False):
    # copy debug program to each node
-    output = spark_cluster_operations.copy(
+    copy_output = spark_cluster_operations.copy(
        cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
+    for node_output in copy_output:
+        if node_output.error:
+            raise error.AztkError("Failed to copy diagnostic script to cluster.")
    ssh_cmd = _build_diagnostic_ssh_command(brief)
    run_output = spark_cluster_operations.run(cluster_id, ssh_cmd, host=True)
    remote_path = "/tmp/debug.zip"
@ -27,9 +22,9 @@ def _run(spark_cluster_operations, cluster_id, output_directory=None, brief=Fals
        result = spark_cluster_operations.download(cluster_id, remote_path, local_path, host=True)

        # write run output or error to debug/ directory
-        with open(os.path.join(output_directory, "debug-output.txt"), 'w', encoding="UTF-8") as stream:
+        with open(os.path.join(output_directory, "debug-output.txt"), "w", encoding="UTF-8") as stream:
            for node_output in run_output:
-                _write_error(stream, node_output) if node_output.error else _write_output(stream, node_output)
+                stream.write(node_output.error) if node_output.error else stream.write(node_output.output)
    else:
        result = spark_cluster_operations.download(cluster_id, remote_path, host=True)

@ -37,11 +32,11 @@ def _run(spark_cluster_operations, cluster_id, output_directory=None, brief=Fals


 def _build_diagnostic_ssh_command(brief):
-    return "sudo rm -rf /tmp/debug.zip; "\
-           "sudo apt-get install -y python3-pip; "\
-           "sudo -H pip3 install --upgrade pip; "\
-           "sudo -H pip3 install docker; "\
-           "sudo python3 /tmp/debug.py {}".format(brief)
+    return ("sudo rm -rf /tmp/debug.zip; "
+            "sudo apt-get install -y python3-pip; "
+            "sudo -H pip3 install --upgrade pip; "
+            "sudo -H pip3 install docker; "
+            "sudo python3 /tmp/debug.py {}".format(brief))


 def run_cluster_diagnostics(spark_cluster_operations, cluster_id, output_directory=None, brief=False):
--- a/aztk/spark/client/cluster/helpers/download.py
+++ b/aztk/spark/client/cluster/helpers/download.py
@ -4,15 +4,17 @@ from aztk import error
 from aztk.utils import helpers


-def cluster_download(core_cluster_operations,
-                     cluster_id: str,
-                     source_path: str,
-                     destination_path: str = None,
-                     host: bool = False,
-                     internal: bool = False,
-                     timeout: int = None):
+def cluster_download(
+        core_cluster_operations,
+        cluster_id: str,
+        source_path: str,
+        destination_path: str = None,
+        host: bool = False,
+        internal: bool = False,
+        timeout: int = None,
+):
    try:
-        container_name = None if host else 'spark'
+        container_name = None if host else "spark"
        return core_cluster_operations.copy(
            cluster_id,
            source_path,
@ -20,6 +22,7 @@ def cluster_download(core_cluster_operations,
            container_name=container_name,
            get=True,
            internal=internal,
-            timeout=timeout)
+            timeout=timeout,
+        )
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/get_application_status.py
+++ b/aztk/spark/client/cluster/helpers/get_application_status.py
@ -7,6 +7,6 @@ from aztk.utils import helpers
 def get_application_status(core_cluster_operations, cluster_id: str, app_name: str):
    try:
        task = core_cluster_operations.batch_client.task.get(cluster_id, app_name)
-        return task.state._value_
+        return task.state.name
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/node_run.py
+++ b/aztk/spark/client/cluster/helpers/node_run.py
@ -4,15 +4,17 @@ from aztk import error
 from aztk.utils import helpers


-def node_run(core_cluster_operations,
-             cluster_id: str,
-             node_id: str,
-             command: str,
-             host=False,
-             internal: bool = False,
-             timeout=None):
+def node_run(
+        core_cluster_operations,
+        cluster_id: str,
+        node_id: str,
+        command: str,
+        host=False,
+        internal: bool = False,
+        timeout=None,
+):
    try:
        return core_cluster_operations.node_run(
-            cluster_id, node_id, command, internal, container_name='spark' if not host else None, timeout=timeout)
+            cluster_id, node_id, command, internal, container_name="spark" if not host else None, timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/run.py
+++ b/aztk/spark/client/cluster/helpers/run.py
@ -12,6 +12,6 @@ def cluster_run(core_cluster_operations,
                timeout=None):
    try:
        return core_cluster_operations.run(
-            cluster_id, command, internal, container_name='spark' if not host else None, timeout=timeout)
+            cluster_id, command, internal, container_name="spark" if not host else None, timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/ssh_into_master.py
+++ b/aztk/spark/client/cluster/helpers/ssh_into_master.py
@ -4,16 +4,19 @@ from aztk import error
 from aztk.utils import helpers


-def cluster_ssh_into_master(spark_cluster_operations,
-                            cluster_id,
-                            node_id,
-                            username,
-                            ssh_key=None,
-                            password=None,
-                            port_forward_list=None,
-                            internal=False):
+def ssh_into_master(
+        spark_cluster_operations,
+        core_cluster_operations,
+        cluster_id,
+        username,
+        ssh_key=None,
+        password=None,
+        port_forward_list=None,
+        internal=False,
+):
    try:
-        spark_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list,
-                                               internal)
+        master_node_id = spark_cluster_operations.get(cluster_id).master_node_id
+        core_cluster_operations.ssh_into_node(cluster_id, master_node_id, username, ssh_key, password,
+                                              port_forward_list, internal)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/submit.py
+++ b/aztk/spark/client/cluster/helpers/submit.py
@ -42,12 +42,14 @@ def submit_application(core_cluster_operations,
            job_id=job_id, task_id=task.id, batch_client=core_cluster_operations.batch_client)


-def submit(core_cluster_operations,
-           spark_cluster_operations,
-           cluster_id: str,
-           application: models.ApplicationConfiguration,
-           remote: bool = False,
-           wait: bool = False):
+def submit(
+        core_cluster_operations,
+        spark_cluster_operations,
+        cluster_id: str,
+        application: models.ApplicationConfiguration,
+        remote: bool = False,
+        wait: bool = False,
+):
    try:
        submit_application(core_cluster_operations, spark_cluster_operations, cluster_id, application, remote, wait)
    except batch_error.BatchErrorException as e:
--- a/aztk/spark/client/cluster/operations.py
+++ b/aztk/spark/client/cluster/operations.py
@ -2,9 +2,25 @@ from aztk.client.cluster import CoreClusterOperations
 from aztk.spark import models
 from aztk.spark.client.base import SparkBaseOperations

-from .helpers import (copy, create, create_user, delete, diagnostics, download, get, get_application_log,
-                      get_application_status, get_configuration, get_remote_login_settings, list, node_run, run, submit,
-                      wait)
+from .helpers import (
+    copy,
+    create,
+    create_user,
+    delete,
+    diagnostics,
+    download,
+    get,
+    get_application_log,
+    get_application_status,
+    get_configuration,
+    get_remote_login_settings,
+    list,
+    node_run,
+    run,
+    ssh_into_master,
+    submit,
+    wait,
+)


 class ClusterOperations(SparkBaseOperations):
@ -58,7 +74,8 @@ class ClusterOperations(SparkBaseOperations):
        """List all clusters.

        Returns:
-            :obj:`List[aztk.spark.models.Cluster]`: List of Cluster objects each representing the state and configuration of the cluster.
+            :obj:`List[aztk.spark.models.Cluster]`: List of Cluster objects each representing the state
+                and configuration of the cluster.
        """
        return list.list_clusters(self._core_cluster_operations)

@ -71,7 +88,8 @@ class ClusterOperations(SparkBaseOperations):
            remote (:obj:`bool`): If True, the application file will not be uploaded, it is assumed to be reachable
                by the cluster already. This is useful when your application is stored in a mounted Azure File Share
                and not the client. Defaults to False.
-            wait (:obj:`bool`, optional): If True, this function blocks until the application has completed. Defaults to False.
+            wait (:obj:`bool`, optional): If True, this function blocks until the application has completed.
+                Defaults to False.

        Returns:
            :obj:`None`
@ -84,7 +102,8 @@ class ClusterOperations(SparkBaseOperations):
        Args:
            username (:obj:`str`): name of the user to create.
            pool_id (:obj:`str`): id of the cluster to create the user on.
-            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
+            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password.
+                Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.

        Returns:
@ -118,7 +137,8 @@ class ClusterOperations(SparkBaseOperations):
                Defaults to None.

        Returns:
-            :obj:`List[aztk.spark.models.NodeOutput]`: list of NodeOutput objects containing the output of the run command
+            :obj:`List[aztk.spark.models.NodeOutput]`:
+                list of NodeOutput objects containing the output of the run command
        """
        return run.cluster_run(self._core_cluster_operations, id, command, host, internal, timeout)

@ -141,13 +161,15 @@ class ClusterOperations(SparkBaseOperations):
        """
        return node_run.node_run(self._core_cluster_operations, id, node_id, command, host, internal, timeout)

-    def copy(self,
-             id: str,
-             source_path: str,
-             destination_path: str,
-             host: bool = False,
-             internal: bool = False,
-             timeout: int = None):
+    def copy(
+            self,
+            id: str,
+            source_path: str,
+            destination_path: str,
+            host: bool = False,
+            internal: bool = False,
+            timeout: int = None,
+    ):
        """Copy a file to every node in a cluster.

        Args:
@ -162,18 +184,21 @@ class ClusterOperations(SparkBaseOperations):
                Defaults to None.

        Returns:
-            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
+            :obj:`List[aztk.spark.models.NodeOutput]`:
+                A list of NodeOutput objects representing the output of the copy command.
        """
        return copy.cluster_copy(self._core_cluster_operations, id, source_path, destination_path, host, internal,
                                 timeout)

-    def download(self,
-                 id: str,
-                 source_path: str,
-                 destination_path: str = None,
-                 host: bool = False,
-                 internal: bool = False,
-                 timeout: int = None):
+    def download(
+            self,
+            id: str,
+            source_path: str,
+            destination_path: str = None,
+            host: bool = False,
+            internal: bool = False,
+            timeout: int = None,
+    ):
        """Download a file from every node in a cluster.

        Args:
@ -190,7 +215,8 @@ class ClusterOperations(SparkBaseOperations):
                Defaults to None.

        Returns:
-            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
+            :obj:`List[aztk.spark.models.NodeOutput]`:
+                A list of NodeOutput objects representing the output of the copy command.
        """
        return download.cluster_download(self._core_cluster_operations, id, source_path, destination_path, host,
                                         internal, timeout)
@ -205,7 +231,8 @@ class ClusterOperations(SparkBaseOperations):
                written to this path. Defaults to None.

        Returns:
-            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
+            :obj:`List[aztk.spark.models.NodeOutput]`:
+                A list of NodeOutput objects representing the output of the copy command.
        """
        return diagnostics.run_cluster_diagnostics(self, id, output_directory, brief)

@ -215,10 +242,11 @@ class ClusterOperations(SparkBaseOperations):
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            application_name (:obj:`str`): str
-            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes. Otherwise, the whole log will be retrieved.
-                Only use this if streaming the log as it is being written. Defaults to False.
-            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are retrieved.
-                Only useful is streaming the log as it is being written. Only used if tail is True.
+            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes.
+                Otherwise, the whole log will be retrieved. Only use this if streaming the log as it is being written.
+                Defaults to False.
+            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are
+                retrieved. Only useful is streaming the log as it is being written. Only used if tail is True.

        Returns:
            :obj:`aztk.spark.models.ApplicationLog`: a model representing the output of the application.
@ -234,7 +262,8 @@ class ClusterOperations(SparkBaseOperations):
            node_id (:obj:`str`): the id of the node in the cluster

        Returns:
-            :obj:`aztk.spark.models.RemoteLogin`: Object that contains the ip address and port combination to login to a node
+            :obj:`aztk.spark.models.RemoteLogin`:
+                Object that contains the ip address and port combination to login to a node
        """
        return get_remote_login_settings.get_remote_login_settings(self._core_cluster_operations, id, node_id)

@ -260,3 +289,21 @@ class ClusterOperations(SparkBaseOperations):
            :obj:`aztk.spark.models.ClusterConfiguration`
        """
        return get_configuration.get_configuration(self._core_cluster_operations, id)
+
+    def ssh_into_master(self, id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
+        """Open an SSH tunnel to the Spark master node and forward the specified ports
+
+        Args:
+            id (:obj:`str`): the id of the cluster
+            username (:obj:`str`): the name of the user to open the ssh session with
+            ssh_key (:obj:`str`, optional): the ssh_key to authenticate the ssh user with.
+                Must specify either `ssh_key` or `password`.
+            password (:obj:`str`, optional): the password to authenticate the ssh user with.
+                Must specify either `password` or `ssh_key`.
+            port_forward_list (:obj:`aztk.spark.models.PortForwardingSpecification`, optional):
+                List of the ports to forward.
+            internal (:obj:`str`, optional): if True, this will connect to the node using its internal IP.
+                Only use this if running within the same VNET as the cluster. Defaults to False.
+        """
+        return ssh_into_master.ssh_into_master(self, self._core_cluster_operations, id, username, ssh_key, password,
+                                               port_forward_list, internal)
--- a/aztk/spark/client/job/helpers/delete.py
+++ b/aztk/spark/client/job/helpers/delete.py
@ -2,7 +2,6 @@ import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error

 from aztk import error
-from aztk.spark import models
 from aztk.utils import helpers

 from .get_recent_job import get_recent_job
--- a/aztk/spark/client/job/helpers/get_application_log.py
+++ b/aztk/spark/client/job/helpers/get_application_log.py
@ -5,7 +5,6 @@ from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers

-from .list_applications import list_applications
 from .get_recent_job import get_recent_job


@ -25,8 +24,11 @@ def _get_application_log(core_job_operations, spark_job_operations, job_id, appl
                raise error.AztkError("The application {0} has not yet been created.".format(application))
        raise error.AztkError("The application {0} does not exist".format(application_name))
    else:
-        if task.state in (batch_models.TaskState.active, batch_models.TaskState.running,
-                          batch_models.TaskState.preparing):
+        if task.state in (
+                batch_models.TaskState.active,
+                batch_models.TaskState.running,
+                batch_models.TaskState.preparing,
+        ):
            raise error.AztkError("The application {0} has not yet finished executing.".format(application_name))

        return core_job_operations.get_application_log(job_id, application_name)
--- a/aztk/spark/client/job/helpers/list_applications.py
+++ b/aztk/spark/client/job/helpers/list_applications.py
@ -13,7 +13,7 @@ def _list_applications(core_job_operations, job_id):
    applications = {}
    for metadata_item in recent_run_job.metadata:
        if metadata_item.name == "applications":
-            for app_name in metadata_item.value.split('\n'):
+            for app_name in metadata_item.value.split("\n"):
                applications[app_name] = None

    # get tasks from Batch job
--- a/aztk/spark/client/job/helpers/stop.py
+++ b/aztk/spark/client/job/helpers/stop.py
@ -1,7 +1,6 @@
 import azure.batch.models.batch_error as batch_error

 from aztk import error
-from aztk.spark import models
 from aztk.utils import helpers

 from .get_recent_job import get_recent_job
--- a/aztk/spark/client/job/helpers/stop_application.py
+++ b/aztk/spark/client/job/helpers/stop_application.py
@ -1,8 +1,5 @@
 import azure.batch.models.batch_error as batch_error

-from aztk import error
-from aztk.spark import models
-from aztk.utils import helpers
 from .get_recent_job import get_recent_job


--- a/aztk/spark/client/job/helpers/submit.py
+++ b/aztk/spark/client/job/helpers/submit.py
@ -15,11 +15,12 @@ def __app_cmd():
    docker_exec.add_argument("-i")
    docker_exec.add_option("-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
    docker_exec.add_option("-e", "AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID")
-    docker_exec.add_argument("spark /bin/bash >> output.log 2>&1 -c \"" \
-                             "source ~/.bashrc; " \
-                             "export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; " \
-                             "cd \$AZ_BATCH_TASK_WORKING_DIR; " \
-                             "\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/job_submission.py\"")
+    docker_exec.add_argument(
+        r'spark /bin/bash >> output.log 2>&1 -c "'
+        r"source ~/.bashrc; "
+        r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; "
+        r"cd \$AZ_BATCH_TASK_WORKING_DIR; "
+        r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/job_submission.py"')
    return docker_exec.to_str()


@ -28,10 +29,11 @@ def generate_job_manager_task(core_job_operations, job, application_tasks):
    for application, task in application_tasks:
        task_definition_resource_file = helpers.upload_text_to_container(
            container_name=job.id,
-            application_name=application.name + '.yaml',
-            file_path=application.name + '.yaml',
+            application_name=application.name + ".yaml",
+            file_path=application.name + ".yaml",
            content=yaml.dump(task),
-            blob_client=core_job_operations.blob_client)
+            blob_client=core_job_operations.blob_client,
+        )
        resource_files.append(task_definition_resource_file)

    task_cmd = __app_cmd()
@ -45,7 +47,8 @@ def generate_job_manager_task(core_job_operations, job, application_tasks):
        allow_low_priority_node=True,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
-                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
+                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
+    )

    return task

@ -83,24 +86,24 @@ def submit_job(core_job_operations,
            job_configuration.get_docker_repo(),
            job_configuration.get_docker_run_options(),
            mixed_mode=job_configuration.mixed_mode(),
-            worker_on_master=job_configuration.worker_on_master)
+            worker_on_master=job_configuration.worker_on_master,
+        )

        application_tasks = []
        for application in job_configuration.applications:
-            application_tasks.append((application,
-                                      spark_job_operations._generate_application_task(
-                                          core_job_operations, job_configuration.id, application)))
+            application_tasks.append((
+                application,
+                spark_job_operations._generate_application_task(core_job_operations, job_configuration.id, application),
+            ))

        job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks)

        software_metadata_key = base_models.Software.spark

-        vm_image = models.VmImage(publisher='Canonical', offer='UbuntuServer', sku='16.04')
+        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku="16.04")

-        autoscale_formula = "$TargetDedicatedNodes = {0}; " \
-                            "$TargetLowPriorityNodes = {1}".format(
-                                job_configuration.max_dedicated_nodes,
-                                job_configuration.max_low_pri_nodes)
+        autoscale_formula = "$TargetDedicatedNodes = {0}; " "$TargetLowPriorityNodes = {1}".format(
+            job_configuration.max_dedicated_nodes, job_configuration.max_low_pri_nodes)

        job = core_job_operations.submit(
            job_configuration=job_configuration,
@ -109,7 +112,8 @@ def submit_job(core_job_operations,
            autoscale_formula=autoscale_formula,
            software_metadata_key=software_metadata_key,
            vm_image_model=vm_image,
-            application_metadata='\n'.join(application.name for application in (job_configuration.applications or [])))
+            application_metadata="\n".join(application.name for application in (job_configuration.applications or [])),
+        )

        if wait:
            spark_job_operations.wait(id=job_configuration.id)
--- a/aztk/spark/client/job/operations.py
+++ b/aztk/spark/client/job/operations.py
@ -2,8 +2,18 @@ from aztk.client.job import CoreJobOperations
 from aztk.spark import models
 from aztk.spark.client.base import SparkBaseOperations

-from .helpers import (delete, get, get_application, get_application_log, list, list_applications, stop,
-                      stop_application, submit, wait_until_complete)
+from .helpers import (
+    delete,
+    get,
+    get_application,
+    get_application_log,
+    list,
+    list_applications,
+    stop,
+    stop_application,
+    submit,
+    wait_until_complete,
+)


 class JobOperations(SparkBaseOperations):
--- a/aztk/spark/helpers/cluster_diagnostic_helper.py
+++ b/aztk/spark/helpers/cluster_diagnostic_helper.py
@ -1,8 +1,4 @@
 import os
-from aztk.utils import ssh
-from aztk.utils.command_builder import CommandBuilder
-from aztk import models as aztk_models
-import azure.batch.models as batch_models


 def run(spark_client, cluster_id, output_directory=None):
@ -17,8 +13,8 @@ def run(spark_client, cluster_id, output_directory=None):
        output = spark_client.cluster_download(cluster_id, remote_path, local_path, host=True)

        # write run output to debug/ directory
-        with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
-            [f.write(line + '\n') for node_output in run_output for line in node_output.output]
+        with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), "w", encoding="UTF-8") as f:
+            [f.write(line + "\n") for node_output in run_output for line in node_output.output]
    else:
        output = spark_client.cluster_download(cluster_id, remote_path, host=True)

@ -26,8 +22,4 @@ def run(spark_client, cluster_id, output_directory=None):


 def _build_diagnostic_ssh_command():
-    return "sudo rm -rf /tmp/debug.zip; "\
-           "sudo apt-get install -y python3-pip; "\
-           "sudo -H pip3 install --upgrade pip; "\
-           "sudo -H pip3 install docker; "\
-           "sudo python3 /tmp/debug.py"
+    return "sudo rm -rf /tmp/debug.zip; " "sudo apt-get install -y python3-pip; " "sudo -H pip3 install --upgrade pip; " "sudo -H pip3 install docker; " "sudo python3 /tmp/debug.py"
--- a/aztk/spark/helpers/create_cluster.py
+++ b/aztk/spark/helpers/create_cluster.py
@ -1,9 +1,7 @@
 from typing import List
-from aztk.utils.command_builder import CommandBuilder
 from aztk.utils import helpers
 from aztk.utils import constants
 from aztk import models as aztk_models
-from aztk.spark.models import ClusterConfiguration
 import azure.batch.models as batch_models

 POOL_ADMIN_USER_IDENTITY = batch_models.UserIdentity(
@ -56,14 +54,16 @@ def __get_secrets_env(spark_client):
        ]


-def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile,
-                          gpu_enabled: bool,
-                          docker_repo: str = None,
-                          docker_run_options: str = None,
-                          plugins=None,
-                          worker_on_master: bool = True,
-                          file_mounts=None,
-                          mixed_mode: bool = False):
+def __cluster_install_cmd(
+        zip_resource_file: batch_models.ResourceFile,
+        gpu_enabled: bool,
+        docker_repo: str = None,
+        docker_run_options: str = None,
+        plugins=None,
+        worker_on_master: bool = True,
+        file_mounts=None,
+        mixed_mode: bool = False,
+):
    """
        For Docker on ubuntu 16.04 - return the command line
        to be run on the start task of the pool to setup spark.
@ -77,41 +77,41 @@ def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile,
    if file_mounts:
        for mount in file_mounts:
            # Create the directory on the node
-            shares.append('mkdir -p {0}'.format(mount.mount_path))
+            shares.append("mkdir -p {0}".format(mount.mount_path))

            # Mount the file share
-            shares.append(
-                'mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp'.
-                format(mount.storage_account_name, mount.storage_account_key, mount.file_share_path, mount.mount_path))
+            shares.append("mount -t cifs //{0}.file.core.windows.net/{2} {3} "
+                          "-o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp".format(
+                              mount.storage_account_name, mount.storage_account_key, mount.file_share_path,
+                              mount.mount_path))

    setup = [
-        'time('\
-            'apt-get -y update;'\
-            'apt-get -y --no-install-recommends install unzip;'\
-            'unzip -o $AZ_BATCH_TASK_WORKING_DIR/{0};'\
-            'chmod 777 $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh;'\
-        ') 2>&1'.format(zip_resource_file.file_path),
+        "time("
+        "apt-get -y update;"
+        "apt-get -y --no-install-recommends install unzip;"
+        "unzip -o $AZ_BATCH_TASK_WORKING_DIR/{0};"
+        "chmod 777 $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh;"
+        ") 2>&1".format(zip_resource_file.file_path),
        '/bin/bash $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh {0} {1} "{2}"'.format(
-            constants.DOCKER_SPARK_CONTAINER_NAME,
-            docker_repo,
-            docker_run_options.replace('"', '\\\"')
-        )
+            constants.DOCKER_SPARK_CONTAINER_NAME, docker_repo, docker_run_options.replace('"', '\\"')),
    ]

    commands = shares + setup
    return commands


-def generate_cluster_start_task(spark_client,
-                                zip_resource_file: batch_models.ResourceFile,
-                                cluster_id: str,
-                                gpu_enabled: bool,
-                                docker_repo: str = None,
-                                docker_run_options: str = None,
-                                file_shares: List[aztk_models.FileShare] = None,
-                                plugins: List[aztk_models.PluginConfiguration] = None,
-                                mixed_mode: bool = False,
-                                worker_on_master: bool = True):
+def generate_cluster_start_task(
+        spark_client,
+        zip_resource_file: batch_models.ResourceFile,
+        cluster_id: str,
+        gpu_enabled: bool,
+        docker_repo: str = None,
+        docker_run_options: str = None,
+        file_shares: List[aztk_models.FileShare] = None,
+        plugins: List[aztk_models.PluginConfiguration] = None,
+        mixed_mode: bool = False,
+        worker_on_master: bool = True,
+):
    """
        This will return the start task object for the pool to be created.
        :param cluster_id str: Id of the cluster(Used for uploading the resource files)
@ -127,22 +127,31 @@ def generate_cluster_start_task(spark_client,
    spark_submit_logs_file = constants.SPARK_SUBMIT_LOGS_FILE

    # TODO use certificate
-    environment_settings = __get_secrets_env(spark_client) + [
+    environment_settings = (__get_secrets_env(spark_client) + [
        batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT", value=spark_web_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT", value=spark_job_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME", value=spark_container_name),
        batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file),
        batch_models.EnvironmentSetting(name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)),
-    ] + __get_docker_credentials(spark_client) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode)
+    ] + __get_docker_credentials(spark_client) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode))

    # start task command
-    command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, docker_run_options, plugins,
-                                    worker_on_master, file_shares, mixed_mode)
+    command = __cluster_install_cmd(
+        zip_resource_file,
+        gpu_enabled,
+        docker_repo,
+        docker_run_options,
+        plugins,
+        worker_on_master,
+        file_shares,
+        mixed_mode,
+    )

    return batch_models.StartTask(
        command_line=helpers.wrap_commands_in_shell(command),
        resource_files=resource_files,
        environment_settings=environment_settings,
        user_identity=POOL_ADMIN_USER_IDENTITY,
-        wait_for_success=True)
+        wait_for_success=True,
+    )
--- a/aztk/spark/helpers/get_log.py
+++ b/aztk/spark/helpers/get_log.py
@ -9,8 +9,7 @@ from aztk import models as base_models
 from aztk.spark import models
 from aztk.utils import constants, helpers

-output_file = constants.TASK_WORKING_DIR + \
-    "/" + constants.SPARK_SUBMIT_LOGS_FILE
+output_file = constants.TASK_WORKING_DIR + "/" + constants.SPARK_SUBMIT_LOGS_FILE


 def __check_task_node_exist(batch_client, cluster_id: str, task: batch_models.CloudTask) -> bool:
@ -51,16 +50,17 @@ def __get_output_file_properties(batch_client, cluster_id: str, application_name

 def get_log_from_storage(blob_client, container_name, application_name, task):
    try:
-        blob = blob_client.get_blob_to_text(container_name, application_name + '/' + constants.SPARK_SUBMIT_LOGS_FILE)
+        blob = blob_client.get_blob_to_text(container_name, application_name + "/" + constants.SPARK_SUBMIT_LOGS_FILE)
    except azure.common.AzureMissingResourceHttpError:
        raise error.AztkError("Logs not found in your storage account. They were either deleted or never existed.")
    base_model = base_models.ApplicationLog(
        name=application_name,
        cluster_id=container_name,
-        application_state=task.state._value_,
+        application_state=task.state.name,
        log=blob.content,
        total_bytes=blob.properties.content_length,
-        exit_code=task.execution_info.exit_code)
+        exit_code=task.execution_info.exit_code,
+    )
    return models.ApplicationLog(base_model)


@ -88,17 +88,19 @@ def get_log(batch_client, blob_client, cluster_id: str, application_name: str, t
        base_model = base_models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
-            application_state=task.state._value_,
+            application_state=task.state.name,
            log=content,
            total_bytes=target_bytes,
-            exit_code=task.execution_info.exit_code)
+            exit_code=task.execution_info.exit_code,
+        )
        return models.ApplicationLog(base_model)
    else:
        base_model = base_models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
-            application_state=task.state._value_,
-            log='',
+            application_state=task.state.name,
+            log="",
            total_bytes=target_bytes,
-            exit_code=task.execution_info.exit_code)
+            exit_code=task.execution_info.exit_code,
+        )
        return models.ApplicationLog(base_model)
--- a/aztk/spark/helpers/job_submission.py
+++ b/aztk/spark/helpers/job_submission.py
@ -1,17 +1,11 @@
-import datetime
-import os
 import time
-from typing import List

 import azure.batch.models as batch_models
 import yaml

 import aztk.error as error
-from aztk.utils import constants, helpers
+from aztk.utils import helpers
 from aztk.utils.command_builder import CommandBuilder
-'''
-    Job Submission helper methods
-'''


 def __app_cmd():
@ -19,11 +13,12 @@ def __app_cmd():
    docker_exec.add_argument("-i")
    docker_exec.add_option("-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
    docker_exec.add_option("-e", "AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID")
-    docker_exec.add_argument("spark /bin/bash >> output.log 2>&1 -c \"" \
-                             "source ~/.bashrc; " \
-                             "export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; " \
-                             "cd \$AZ_BATCH_TASK_WORKING_DIR; " \
-                             "\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/job_submission.py\"")
+    docker_exec.add_argument(
+        r'spark /bin/bash >> output.log 2>&1 -c "'
+        r"source ~/.bashrc; "
+        r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; "
+        r"cd \$AZ_BATCH_TASK_WORKING_DIR; "
+        r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/job_submission.py"')
    return docker_exec.to_str()


@ -32,10 +27,11 @@ def generate_task(spark_client, job, application_tasks):
    for application, task in application_tasks:
        task_definition_resource_file = helpers.upload_text_to_container(
            container_name=job.id,
-            application_name=application.name + '.yaml',
-            file_path=application.name + '.yaml',
+            application_name=application.name + ".yaml",
+            file_path=application.name + ".yaml",
            content=yaml.dump(task),
-            blob_client=spark_client.blob_client)
+            blob_client=spark_client.blob_client,
+        )
        resource_files.append(task_definition_resource_file)

    task_cmd = __app_cmd()
@ -49,7 +45,8 @@ def generate_task(spark_client, job, application_tasks):
        allow_low_priority_node=True,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
-                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
+                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
+    )

    return task

@ -69,7 +66,7 @@ def list_applications(spark_client, job_id):
    applications = {}
    for metadata_item in recent_run_job.metadata:
        if metadata_item.name == "applications":
-            for app_name in metadata_item.value.split('\n'):
+            for app_name in metadata_item.value.split("\n"):
                applications[app_name] = None

    # get tasks from Batch job
@ -177,8 +174,11 @@ def get_application_log(spark_client, job_id, application_name):
                raise error.AztkError("The application {0} has not yet been created.".format(application))
        raise error.AztkError("The application {0} does not exist".format(application_name))
    else:
-        if task.state in (batch_models.TaskState.active, batch_models.TaskState.running,
-                          batch_models.TaskState.preparing):
+        if task.state in (
+                batch_models.TaskState.active,
+                batch_models.TaskState.running,
+                batch_models.TaskState.preparing,
+        ):
            raise error.AztkError("The application {0} has not yet finished executing.".format(application_name))

        return spark_client.get_application_log(job_id, application_name)
--- a/aztk/spark/helpers/submit.py
+++ b/aztk/spark/helpers/submit.py
@ -1,14 +1,11 @@
-import datetime
 import os
-from typing import List
-import yaml
+
 import azure.batch.models as batch_models
+import yaml
+
 from aztk.error import AztkError
-from aztk.utils import constants, helpers
+from aztk.utils import helpers
 from aztk.utils.command_builder import CommandBuilder
-'''
-Submit helper methods
-'''


 def __get_node(spark_client, node_id: str, cluster_id: str) -> batch_models.ComputeNode:
@ -25,12 +22,13 @@ def generate_task(spark_client, container_id, application, remote=False):
            application_name=application.name,
            file_path=application.application,
            blob_client=spark_client.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )

        # Upload application file
        resource_files.append(app_resource_file)

-        application.application = '$AZ_BATCH_TASK_WORKING_DIR/' + os.path.basename(application.application)
+        application.application = "$AZ_BATCH_TASK_WORKING_DIR/" + os.path.basename(application.application)

    # Upload dependent JARS
    jar_resource_file_paths = []
@ -40,7 +38,8 @@ def generate_task(spark_client, container_id, application, remote=False):
            application_name=application.name,
            file_path=jar,
            blob_client=spark_client.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

@ -52,7 +51,8 @@ def generate_task(spark_client, container_id, application, remote=False):
            application_name=application.name,
            file_path=py_file,
            blob_client=spark_client.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        py_files_resource_file_paths.append(current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

@ -64,7 +64,8 @@ def generate_task(spark_client, container_id, application, remote=False):
            application_name=application.name,
            file_path=file,
            blob_client=spark_client.blob_client,
-            use_full_path=False)
+            use_full_path=False,
+        )
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

@ -75,21 +76,23 @@ def generate_task(spark_client, container_id, application, remote=False):
    application_definition_file = helpers.upload_text_to_container(
        container_name=container_id,
        application_name=application.name,
-        file_path='application.yaml',
+        file_path="application.yaml",
        content=yaml.dump(vars(application)),
-        blob_client=spark_client.blob_client)
+        blob_client=spark_client.blob_client,
+    )
    resource_files.append(application_definition_file)

    # create command to submit task
-    task_cmd = CommandBuilder('sudo docker exec')
-    task_cmd.add_argument('-i')
-    task_cmd.add_option('-e', 'AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR')
-    task_cmd.add_option('-e', 'STORAGE_LOGS_CONTAINER={0}'.format(container_id))
-    task_cmd.add_argument('spark /bin/bash >> output.log 2>&1')
-    task_cmd.add_argument('-c "source ~/.bashrc; ' \
-                          'export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; ' \
-                          'cd \$AZ_BATCH_TASK_WORKING_DIR; ' \
-                          '\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"')
+    task_cmd = CommandBuilder("sudo docker exec")
+    task_cmd.add_argument("-i")
+    task_cmd.add_option("-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
+    task_cmd.add_option("-e", "STORAGE_LOGS_CONTAINER={0}".format(container_id))
+    task_cmd.add_argument("spark /bin/bash >> output.log 2>&1")
+    task_cmd.add_argument(
+        r'-c "source ~/.bashrc; '
+        r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; "
+        r"cd \$AZ_BATCH_TASK_WORKING_DIR; "
+        r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"')

    # Create task
    task = batch_models.TaskAddParameter(
@ -99,7 +102,8 @@ def generate_task(spark_client, container_id, application, remote=False):
        constraints=batch_models.TaskConstraints(max_task_retry_count=application.max_retry_count),
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
-                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
+                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
+    )

    return task

--- a/aztk/spark/models/models.py
+++ b/aztk/spark/models/models.py
@ -12,11 +12,7 @@ from aztk.utils import constants, helpers
 class SparkToolkit(aztk.models.Toolkit):
    def __init__(self, version: str, environment: str = None, environment_version: str = None):
        super().__init__(
-            software="spark",
-            version=version,
-            environment=environment,
-            environment_version=environment_version,
-        )
+            software="spark", version=version, environment=environment, environment_version=environment_version)


 class Cluster(aztk.models.Cluster):
@ -74,9 +70,9 @@ class SparkConfiguration(Model):

    def __generate_ssh_key_pair(self):
        key = RSA.generate(2048)
-        priv_key = key.exportKey('PEM')
-        pub_key = key.publickey().exportKey('OpenSSH')
-        return {'pub_key': pub_key, 'priv_key': priv_key}
+        priv_key = key.exportKey("PEM")
+        pub_key = key.publickey().exportKey("OpenSSH")
+        return {"pub_key": pub_key, "priv_key": priv_key}


 class CustomScript(aztk.models.CustomScript):
@ -124,22 +120,24 @@ class VmImage(aztk.models.VmImage):


 class ApplicationConfiguration:
-    def __init__(self,
-                 name=None,
-                 application=None,
-                 application_args=None,
-                 main_class=None,
-                 jars=None,
-                 py_files=None,
-                 files=None,
-                 driver_java_options=None,
-                 driver_library_path=None,
-                 driver_class_path=None,
-                 driver_memory=None,
-                 executor_memory=None,
-                 driver_cores=None,
-                 executor_cores=None,
-                 max_retry_count=None):
+    def __init__(
+            self,
+            name=None,
+            application=None,
+            application_args=None,
+            main_class=None,
+            jars=None,
+            py_files=None,
+            files=None,
+            driver_java_options=None,
+            driver_library_path=None,
+            driver_class_path=None,
+            driver_memory=None,
+            executor_memory=None,
+            driver_cores=None,
+            executor_cores=None,
+            max_retry_count=None,
+    ):
        self.name = name
        self.application = application
        self.application_args = application_args
@ -162,11 +160,11 @@ class Application:
        self.name = cloud_task.id
        self.last_modified = cloud_task.last_modified
        self.creation_time = cloud_task.creation_time
-        self.state = cloud_task.state._value_
+        self.state = cloud_task.state.name
        self.state_transition_time = cloud_task.state_transition_time
        self.exit_code = cloud_task.execution_info.exit_code
        if cloud_task.previous_state:
-            self.previous_state = cloud_task.previous_state._value_
+            self.previous_state = cloud_task.previous_state.name
            self.previous_state_transition_time = cloud_task.previous_state_transition_time

        self._execution_info = cloud_task.execution_info
@ -190,17 +188,19 @@ class Application:


 class JobConfiguration:
-    def __init__(self,
-                 id=None,
-                 applications=None,
-                 vm_size=None,
-                 spark_configuration=None,
-                 toolkit=None,
-                 max_dedicated_nodes=0,
-                 max_low_pri_nodes=0,
-                 subnet_id=None,
-                 scheduling_target: SchedulingTarget = None,
-                 worker_on_master=None):
+    def __init__(
+            self,
+            id=None,
+            applications=None,
+            vm_size=None,
+            spark_configuration=None,
+            toolkit=None,
+            max_dedicated_nodes=0,
+            max_low_pri_nodes=0,
+            subnet_id=None,
+            scheduling_target: SchedulingTarget = None,
+            worker_on_master=None,
+    ):

        self.id = id
        self.applications = applications
@ -252,24 +252,23 @@ class JobConfiguration:
            raise error.AztkError("Please supply an ID for the Job in your configuration.")

        if self.max_dedicated_nodes == 0 and self.max_low_pri_nodes == 0:
-            raise error.AztkError(
-                "Please supply a valid (greater than 0) value for either max_dedicated_nodes or max_low_pri_nodes in your configuration."
-            )
+            raise error.AztkError("Please supply a valid (greater than 0) value for either max_dedicated_nodes "
+                                  "or max_low_pri_nodes in your configuration.")

        if self.vm_size is None:
            raise error.AztkError("Please supply a vm_size in your configuration.")

        if self.mixed_mode() and not self.subnet_id:
            raise error.AztkError(
-                "You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes) and pass the subnet_id in your configuration.."
-            )
+                "You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes) "
+                "and pass the subnet_id in your configuration..")

        if self.scheduling_target == SchedulingTarget.Dedicated and self.max_dedicated_nodes == 0:
            raise error.InvalidModelError("Scheduling target cannot be Dedicated if dedicated vm size is 0")


-class JobState():
-    complete = 'completed'
+class JobState:
+    complete = "completed"
    active = "active"
    completed = "completed"
    disabled = "disabled"
@ -277,15 +276,17 @@ class JobState():
    deleting = "deleting"


-class Job():
-    def __init__(self,
-                 cloud_job_schedule: batch_models.CloudJobSchedule,
-                 cloud_tasks: List[batch_models.CloudTask] = None,
-                 pool: batch_models.CloudPool = None,
-                 nodes: batch_models.ComputeNodePaged = None):
+class Job:
+    def __init__(
+            self,
+            cloud_job_schedule: batch_models.CloudJobSchedule,
+            cloud_tasks: List[batch_models.CloudTask] = None,
+            pool: batch_models.CloudPool = None,
+            nodes: batch_models.ComputeNodePaged = None,
+    ):
        self.id = cloud_job_schedule.id
        self.last_modified = cloud_job_schedule.last_modified
-        self.state = cloud_job_schedule.state._value_
+        self.state = cloud_job_schedule.state.name
        self.state_transition_time = cloud_job_schedule.state_transition_time
        self.creation_time = cloud_job_schedule.creation_time
        self.applications = [Application(task) for task in (cloud_tasks or [])]
@ -297,9 +298,11 @@ class Job():

 class ApplicationLog(aztk.models.ApplicationLog):
    def __init__(self, application_log: aztk.models.ApplicationLog):
-        self.name = application_log.name
-        self.cluster_id = application_log.cluster_id    # TODO: change to something cluster/job agnostic
-        self.log = application_log.log
-        self.total_bytes = application_log.total_bytes
-        self.application_state = application_log.application_state
-        self.exit_code = application_log.exit_code
+        super().__init__(
+            name=application_log.name,
+            cluster_id=application_log.cluster_id,  # TODO: change to something cluster/job agnostic
+            log=application_log.log,
+            total_bytes=application_log.total_bytes,
+            application_state=application_log.application_state,
+            exit_code=application_log.exit_code,
+        )
--- a/aztk/spark/models/plugins/hdfs/configuration.py
+++ b/aztk/spark/models/plugins/hdfs/configuration.py
@ -1,7 +1,6 @@
 import os
 from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -11,36 +10,14 @@ class HDFSPlugin(PluginConfiguration):
        super().__init__(
            name="hdfs",
            ports=[
-                PluginPort(
-                    name="File system metadata operations",
-                    internal=8020,
-                ),
-                PluginPort(
-                    name="File system metadata operations(Backup)",
-                    internal=9000,
-                ),
-                PluginPort(
-                    name="Datanode data transfer",
-                    internal=50010,
-                ),
-                PluginPort(
-                    name="Datanode IPC metadata operations",
-                    internal=50020,
-                ),
-                PluginPort(
-                    name="Namenode",
-                    internal=50070,
-                    public=True,
-                ),
-                PluginPort(
-                    name="Datanodes",
-                    internal=50075,
-                    public=True,
-                ),
+                PluginPort(name="File system metadata operations", internal=8020),
+                PluginPort(name="File system metadata operations(Backup)", internal=9000),
+                PluginPort(name="Datanode data transfer", internal=50010),
+                PluginPort(name="Datanode IPC metadata operations", internal=50020),
+                PluginPort(name="Namenode", internal=50070, public=True),
+                PluginPort(name="Datanodes", internal=50075, public=True),
            ],
            target_role=PluginTargetRole.All,
            execute="hdfs.sh",
-            files=[
-                PluginFile("hdfs.sh", os.path.join(dir_path, "hdfs.sh")),
-            ],
+            files=[PluginFile("hdfs.sh", os.path.join(dir_path, "hdfs.sh"))],
        )
--- a/aztk/spark/models/plugins/install/apt_get/configuration.py
+++ b/aztk/spark/models/plugins/install/apt_get/configuration.py
@ -1,8 +1,5 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
-from aztk.models.plugins.plugin_file import PluginFile
 from aztk.spark.models.plugins.install import InstallPlugin
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

--- a/aztk/spark/models/plugins/install/conda/configuration.py
+++ b/aztk/spark/models/plugins/install/conda/configuration.py
@ -1,8 +1,5 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
-from aztk.models.plugins.plugin_file import PluginFile
 from aztk.spark.models.plugins.install import InstallPlugin
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

--- a/aztk/spark/models/plugins/install/configuration.py
+++ b/aztk/spark/models/plugins/install/configuration.py
@ -1,7 +1,6 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -13,4 +12,5 @@ def InstallPlugin(name, command, packages=None):
        execute="install.sh",
        files=[PluginFile("install.sh", os.path.join(dir_path, "install.sh"))],
        args=packages,
-        env=dict(COMMAND=command))
+        env=dict(COMMAND=command),
+    )
--- a/aztk/spark/models/plugins/install/pip/configuration.py
+++ b/aztk/spark/models/plugins/install/pip/configuration.py
@ -1,8 +1,5 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
-from aztk.models.plugins.plugin_file import PluginFile
 from aztk.spark.models.plugins.install import InstallPlugin
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

--- a/aztk/spark/models/plugins/jupyter/configuration.py
+++ b/aztk/spark/models/plugins/jupyter/configuration.py
@ -8,15 +8,8 @@ dir_path = os.path.dirname(os.path.realpath(__file__))
 def JupyterPlugin():
    return PluginConfiguration(
        name="jupyter",
-        ports=[
-            PluginPort(
-                internal=8888,
-                public=True,
-            ),
-        ],
+        ports=[PluginPort(internal=8888, public=True)],
        target_role=PluginTargetRole.All,
        execute="jupyter.sh",
-        files=[
-            PluginFile("jupyter.sh", os.path.join(dir_path, "jupyter.sh")),
-        ],
+        files=[PluginFile("jupyter.sh", os.path.join(dir_path, "jupyter.sh"))],
    )
--- a/aztk/spark/models/plugins/jupyter_lab/configuration.py
+++ b/aztk/spark/models/plugins/jupyter_lab/configuration.py
@ -1,7 +1,6 @@
 import os
 from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -9,15 +8,8 @@ dir_path = os.path.dirname(os.path.realpath(__file__))
 def JupyterLabPlugin():
    return PluginConfiguration(
        name="jupyterlab",
-        ports=[
-            PluginPort(
-                internal=8889,
-                public=True,
-            ),
-        ],
+        ports=[PluginPort(internal=8889, public=True)],
        target_role=PluginTargetRole.All,
        execute="jupyter_lab.sh",
-        files=[
-            PluginFile("jupyter_lab.sh", os.path.join(dir_path, "jupyter_lab.sh")),
-        ],
+        files=[PluginFile("jupyter_lab.sh", os.path.join(dir_path, "jupyter_lab.sh"))],
    )
--- a/aztk/spark/models/plugins/nvblas/configuration.py
+++ b/aztk/spark/models/plugins/nvblas/configuration.py
@ -1,7 +1,6 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -12,6 +11,5 @@ def NvBLASPlugin():
        ports=[],
        target_role=PluginTargetRole.All,
        execute="nvblas.sh",
-        files=[
-            PluginFile("nvblas.sh", os.path.join(dir_path, "nvblas.sh")),
-        ])
+        files=[PluginFile("nvblas.sh", os.path.join(dir_path, "nvblas.sh"))],
+    )
--- a/aztk/spark/models/plugins/openblas/configuration.py
+++ b/aztk/spark/models/plugins/openblas/configuration.py
@ -1,7 +1,6 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -12,7 +11,5 @@ def OpenBLASPlugin():
        ports=[],
        target_role=PluginTargetRole.All,
        execute="openblas.sh",
-        files=[
-            PluginFile("openblas.sh", os.path.join(dir_path, "openblas.sh")),
-        ],
+        files=[PluginFile("openblas.sh", os.path.join(dir_path, "openblas.sh"))],
    )
--- a/aztk/spark/models/plugins/resource_monitor/configuration.py
+++ b/aztk/spark/models/plugins/resource_monitor/configuration.py
@ -1,7 +1,6 @@
 import os
 from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTarget, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -10,12 +9,7 @@ class ResourceMonitorPlugin(PluginConfiguration):
    def __init__(self):
        super().__init__(
            name="resource_monitor",
-            ports=[
-                PluginPort(
-                    internal=8890,
-                    public=True,
-                ),
-            ],
+            ports=[PluginPort(internal=8890, public=True)],
            target=PluginTarget.Host,
            target_role=PluginTargetRole.All,
            execute="start_monitor.sh",
@ -23,4 +17,5 @@ class ResourceMonitorPlugin(PluginConfiguration):
                PluginFile("start_monitor.sh", os.path.join(dir_path, "start_monitor.sh")),
                PluginFile("etc/telegraf.conf", os.path.join(dir_path, "telegraf.conf")),
                PluginFile("docker-compose.yml", os.path.join(dir_path, "docker-compose.yml")),
-            ])
+            ],
+        )
--- a/aztk/spark/models/plugins/rstudio_server/configuration.py
+++ b/aztk/spark/models/plugins/rstudio_server/configuration.py
@ -8,16 +8,9 @@ dir_path = os.path.dirname(os.path.realpath(__file__))
 def RStudioServerPlugin(version="1.1.383"):
    return PluginConfiguration(
        name="rstudio_server",
-        ports=[
-            PluginPort(
-                internal=8787,
-                public=True,
-            ),
-        ],
+        ports=[PluginPort(internal=8787, public=True)],
        target_role=PluginTargetRole.Master,
        execute="rstudio_server.sh",
-        files=[
-            PluginFile("rstudio_server.sh", os.path.join(dir_path, "rstudio_server.sh")),
-        ],
+        files=[PluginFile("rstudio_server.sh", os.path.join(dir_path, "rstudio_server.sh"))],
        env=dict(RSTUDIO_SERVER_VERSION=version),
    )
--- a/aztk/spark/models/plugins/simple/configuration.py
+++ b/aztk/spark/models/plugins/simple/configuration.py
@ -1,7 +1,6 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole, PluginTarget
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginTarget, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -13,7 +12,5 @@ class SimplePlugin(PluginConfiguration):
            target_role=PluginTargetRole.All,
            target=PluginTarget.Host,
            execute="simple.sh",
-            files=[
-                PluginFile("simple.sh", os.path.join(dir_path, "simple.sh")),
-            ],
+            files=[PluginFile("simple.sh", os.path.join(dir_path, "simple.sh"))],
        )
--- a/aztk/spark/models/plugins/spark_ui_proxy/configuration.py
+++ b/aztk/spark/models/plugins/spark_ui_proxy/configuration.py
@ -1,7 +1,6 @@
 import os
 from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

--- a/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py
+++ b/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py
@ -29,7 +29,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer

 BIND_ADDR = os.environ.get("BIND_ADDR", "0.0.0.0")
 SERVER_PORT = int(os.environ.get("SERVER_PORT", "80"))
-URL_PREFIX = os.environ.get("URL_PREFIX", "").rstrip('/') + '/'
+URL_PREFIX = os.environ.get("URL_PREFIX", "").rstrip("/") + "/"
 SPARK_MASTER_HOST = ""


@ -44,7 +44,7 @@ class ProxyHandler(BaseHTTPRequestHandler):
        self.proxyRequest(None)

    def do_POST(self):
-        length = int(self.headers.getheader('content-length'))
+        length = int(self.headers.getheader("content-length"))
        postData = self.rfile.read(length)
        self.proxyRequest(postData)

@ -84,17 +84,19 @@ class ProxyHandler(BaseHTTPRequestHandler):
    def rewriteLinks(self, page, targetHost):
        target = "{0}proxy:{1}/".format(URL_PREFIX, targetHost).encode()
        page = page.replace(b'href="/', b'href="' + target)
-        page = page.replace(b"'<div><a href=' + logUrl + '>'",
-                            b"'<div><a href=' + location.origin + logUrl.replace('http://', '/proxy:') + '>'")
-        page = page.replace(b'href="log', b'href="' + target + b'log')
-        page = page.replace(b'href="http://', b'href="' + URL_PREFIX.encode() + b'proxy:')
+        page = page.replace(
+            b"'<div><a href=' + logUrl + '>'",
+            b"'<div><a href=' + location.origin + logUrl.replace('http://', '/proxy:') + '>'",
+        )
+        page = page.replace(b'href="log', b'href="' + target + b"log")
+        page = page.replace(b'href="http://', b'href="' + URL_PREFIX.encode() + b"proxy:")
        page = page.replace(b'src="/', b'src="' + target)
        page = page.replace(b'action="', b'action="' + target)
-        page = page.replace(b'"/api/v1/', b'"' + target + b'api/v1/')
+        page = page.replace(b'"/api/v1/', b'"' + target + b"api/v1/")
        return page


-if __name__ == '__main__':
+if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: <proxied host:port> [<proxy port>]")
        sys.exit(1)
--- a/aztk/spark/models/plugins/tensorflow_on_spark/configuration.py
+++ b/aztk/spark/models/plugins/tensorflow_on_spark/configuration.py
@ -1,7 +1,6 @@
 import os
-from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
+from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginTargetRole
 from aztk.models.plugins.plugin_file import PluginFile
-from aztk.utils import constants

 dir_path = os.path.dirname(os.path.realpath(__file__))

@ -11,7 +10,5 @@ def TensorflowOnSparkPlugin():
        name="tensorflow_on_spark",
        target_role=PluginTargetRole.Master,
        execute="tensorflow_on_spark.sh",
-        files=[
-            PluginFile("tensorflow_on_spark.sh", os.path.join(dir_path, "tensorflow_on_spark.sh")),
-        ],
+        files=[PluginFile("tensorflow_on_spark.sh", os.path.join(dir_path, "tensorflow_on_spark.sh"))],
    )
--- a/aztk/spark/utils/constants.py
+++ b/aztk/spark/utils/constants.py
@ -1,3 +1,3 @@
 from aztk.spark import models

-SPARK_VM_IMAGE = models.VmImage(publisher='Canonical', offer='UbuntuServer', sku='16.04')
+SPARK_VM_IMAGE = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku="16.04")
--- a/aztk/spark/utils/debug.py
+++ b/aztk/spark/utils/debug.py
@ -50,9 +50,7 @@ def cmd_check_output(cmd):
    try:
        output = check_output(cmd, shell=True, stderr=STDOUT)
    except CalledProcessError as e:
-        return "CMD: {0}\n"\
-               "returncode: {1}"\
-               "output: {2}".format(e.cmd, e.returncode, e.output)
+        return "CMD: {0}\n" "returncode: {1}" "output: {2}".format(e.cmd, e.returncode, e.output)
    else:
        return output

@ -62,9 +60,9 @@ def get_disk_free():


 def get_docker_diagnostics(docker_client):
-    '''
+    """
        returns list of tuples (filename, data) to be written in the zip
-    '''
+    """
    output = []
    output.append(get_docker_images(docker_client))
    logs = get_docker_containers(docker_client)
@ -95,7 +93,7 @@ def get_docker_containers(docker_client):
            # get docker container logs
            logs.append((container.name + "/docker.log", container.logs()))
            logs.append(get_docker_process_status(container))
-            if container.name == "spark":    #TODO: find a more robust way to get specific info off specific containers
+            if container.name == "spark":    # TODO: find a more robust way to get specific info off specific containers
                logs.extend(get_container_aztk_script(container))
                logs.extend(get_spark_logs(container))
                logs.extend(get_spark_app_logs(container))
@ -158,13 +156,13 @@ def filter_members(members):


 def extract_tar_in_memory(container, data):
-    data = io.BytesIO(b''.join([item for item in data]))
+    data = io.BytesIO(b"".join([item for item in data]))
    tarf = tarfile.open(fileobj=data)
    logs = []
    for member in filter_members(tarf):
        file_bytes = tarf.extractfile(member)
        if file_bytes is not None:
-            logs.append((container.name + "/" + member.name, b''.join(file_bytes.readlines())))
+            logs.append((container.name + "/" + member.name, b"".join(file_bytes.readlines())))
    return logs


@ -174,7 +172,7 @@ def get_brief_diagnostics():
    logs = []
    for file_name in files:
        try:
-            logs.append((file_name, open(batch_dir + file_name, 'rb').read()))
+            logs.append((file_name, open(batch_dir + file_name, "rb").read()))
            # print("LOG:", (file_name, open(batch_dir+file_name, 'rb').read()))
        except FileNotFoundError as e:
            print("file not found", e)
--- a/aztk/spark/utils/util.py
+++ b/aztk/spark/utils/util.py
@ -1,16 +1,11 @@
 from __future__ import print_function
+
 import datetime
-import io
-import os
 import time
-import azure.batch.batch_service_client as batch
-import azure.batch.batch_auth as batch_auth
+
 import azure.batch.models as batch_models
-import azure.storage.blob as blob
-from aztk.version import __version__
+
 from aztk.utils import constants
-from aztk import error
-import aztk.models


 class MasterInvalidStateError(Exception):
--- a/aztk/utils/init.py
+++ b/aztk/utils/init.py
@ -1,8 +1,3 @@
-from .deprecation import deprecated, deprecate
-from . import azure_api
-from . import command_builder
-from . import constants
-from . import helpers
-from . import file_utils
-from . import get_ssh_key
-from . import secure_utils
+from . import (azure_api, command_builder, constants, file_utils, get_ssh_key, helpers, secure_utils)
+from .deprecation import deprecate, deprecated
+from .retry import BackOffPolicy, retry
--- a/aztk/utils/azure_api.py
+++ b/aztk/utils/azure_api.py
@ -1,5 +1,4 @@
 import re
-from typing import Optional

 import azure.batch.batch_auth as batch_auth
 import azure.batch.batch_service_client as batch
@ -12,10 +11,10 @@ from azure.storage.common import CloudStorageAccount
 from aztk import error
 from aztk.version import __version__

-RESOURCE_ID_PATTERN = re.compile('^/subscriptions/(?P<subscription>[^/]+)'
-                                 '/resourceGroups/(?P<resourcegroup>[^/]+)'
-                                 '/providers/[^/]+'
-                                 '/[^/]+Accounts/(?P<account>[^/]+)$')
+RESOURCE_ID_PATTERN = re.compile("^/subscriptions/(?P<subscription>[^/]+)"
+                                 "/resourceGroups/(?P<resourcegroup>[^/]+)"
+                                 "/providers/[^/]+"
+                                 "/[^/]+Accounts/(?P<account>[^/]+)$")


 def validate_secrets(secrets):
@ -48,23 +47,25 @@ def make_batch_client(secrets):
            client_id=secrets.service_principal.client_id,
            secret=secrets.service_principal.credential,
            tenant=secrets.service_principal.tenant_id,
-            resource='https://management.core.windows.net/')
+            resource="https://management.core.windows.net/",
+        )
        m = RESOURCE_ID_PATTERN.match(secrets.service_principal.batch_account_resource_id)
-        arm_batch_client = BatchManagementClient(arm_credentials, m.group('subscription'))
-        account = arm_batch_client.batch_account.get(m.group('resourcegroup'), m.group('account'))
-        base_url = 'https://{0}/'.format(account.account_endpoint)
+        arm_batch_client = BatchManagementClient(arm_credentials, m.group("subscription"))
+        account = arm_batch_client.batch_account.get(m.group("resourcegroup"), m.group("account"))
+        base_url = "https://{0}/".format(account.account_endpoint)
        credentials = ServicePrincipalCredentials(
            client_id=secrets.service_principal.client_id,
            secret=secrets.service_principal.credential,
            tenant=secrets.service_principal.tenant_id,
-            resource='https://batch.core.windows.net/')
+            resource="https://batch.core.windows.net/",
+        )

    # Set up Batch Client
    batch_client = batch.BatchServiceClient(credentials, base_url=base_url)

    # Set retry policy
    batch_client.config.retry_policy.retries = 5
-    batch_client.config.add_user_agent('aztk/{}'.format(__version__))
+    batch_client.config.add_user_agent("aztk/{}".format(__version__))

    return batch_client

@ -82,26 +83,29 @@ def make_blob_client(secrets):
        blob_client = blob.BlockBlobService(
            account_name=secrets.shared_key.storage_account_name,
            account_key=secrets.shared_key.storage_account_key,
-            endpoint_suffix=secrets.shared_key.storage_account_suffix)
+            endpoint_suffix=secrets.shared_key.storage_account_suffix,
+        )
    else:
        # Set up ServicePrincipalCredentials
        arm_credentials = ServicePrincipalCredentials(
            client_id=secrets.service_principal.client_id,
            secret=secrets.service_principal.credential,
            tenant=secrets.service_principal.tenant_id,
-            resource='https://management.core.windows.net/')
+            resource="https://management.core.windows.net/",
+        )
        m = RESOURCE_ID_PATTERN.match(secrets.service_principal.storage_account_resource_id)
-        accountname = m.group('account')
-        subscription = m.group('subscription')
-        resourcegroup = m.group('resourcegroup')
+        accountname = m.group("account")
+        subscription = m.group("subscription")
+        resourcegroup = m.group("resourcegroup")
        mgmt_client = StorageManagementClient(arm_credentials, subscription)
-        key = retry_function(
+        key = (retry_function(
            mgmt_client.storage_accounts.list_keys,
            10,
            1,
            Exception,
            resource_group_name=resourcegroup,
-            account_name=accountname).keys[0].value
+            account_name=accountname,
+        ).keys[0].value)
        storage_client = CloudStorageAccount(accountname, key)
        blob_client = storage_client.create_block_blob_service()

@ -110,6 +114,7 @@ def make_blob_client(secrets):

 def retry_function(function, retry_attempts: int, retry_interval: int, exception: Exception, *args, **kwargs):
    import time
+
    for i in range(retry_attempts):
        try:
            return function(*args, **kwargs)
--- a/aztk/utils/command_builder.py
+++ b/aztk/utils/command_builder.py
@ -1,4 +1,4 @@
-class CommandOption():
+class CommandOption:
    def __init__(self, name: str, value: str):
        self.name = name
        self.value = value
--- a/aztk/utils/constants.py
+++ b/aztk/utils/constants.py
@ -18,33 +18,33 @@ DOCKER_SPARK_HOME = "/home/spark-current"
 """
    Root path of this repository
 """
-ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), '..', '..'))
+ROOT_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
 """
    User home directory path
 """
-HOME_DIRECTORY_PATH = os.path.expanduser('~')
+HOME_DIRECTORY_PATH = os.path.expanduser("~")
 """
    Path to the secrets file
 """
-DEFAULT_SECRETS_PATH = os.path.join(os.getcwd(), '.aztk/secrets.yaml')
+DEFAULT_SECRETS_PATH = os.path.join(os.getcwd(), ".aztk/secrets.yaml")
 """
    Paths to the cluster configuration files
 """
-GLOBAL_CONFIG_PATH = os.path.join(HOME_DIRECTORY_PATH, '.aztk')
-DEFAULT_SSH_CONFIG_PATH = os.path.join(os.getcwd(), '.aztk/ssh.yaml')
-DEFAULT_CLUSTER_CONFIG_PATH = os.path.join(os.getcwd(), '.aztk/cluster.yaml')
-DEFAULT_SPARK_CONF_SOURCE = os.path.join(os.getcwd(), '.aztk')
-DEFAULT_SPARK_CONF_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'conf')
-DEFAULT_SPARK_JARS_SOURCE = os.path.join(os.getcwd(), '.aztk', 'jars')
-DEFAULT_SPARK_JARS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'jars')
-DEFAULT_SPARK_JOB_CONFIG = os.path.join(os.getcwd(), '.aztk', 'job.yaml')
-GLOBAL_SPARK_JOB_CONFIG = os.path.join(HOME_DIRECTORY_PATH, '.aztk', 'job.yaml')
+GLOBAL_CONFIG_PATH = os.path.join(HOME_DIRECTORY_PATH, ".aztk")
+DEFAULT_SSH_CONFIG_PATH = os.path.join(os.getcwd(), ".aztk/ssh.yaml")
+DEFAULT_CLUSTER_CONFIG_PATH = os.path.join(os.getcwd(), ".aztk/cluster.yaml")
+DEFAULT_SPARK_CONF_SOURCE = os.path.join(os.getcwd(), ".aztk")
+DEFAULT_SPARK_CONF_DEST = os.path.join(ROOT_PATH, "node_scripts", "conf")
+DEFAULT_SPARK_JARS_SOURCE = os.path.join(os.getcwd(), ".aztk", "jars")
+DEFAULT_SPARK_JARS_DEST = os.path.join(ROOT_PATH, "node_scripts", "jars")
+DEFAULT_SPARK_JOB_CONFIG = os.path.join(os.getcwd(), ".aztk", "job.yaml")
+GLOBAL_SPARK_JOB_CONFIG = os.path.join(HOME_DIRECTORY_PATH, ".aztk", "job.yaml")
 """
    Source and destination paths for spark init
 """
-INIT_DIRECTORY_SOURCE = os.path.join(ROOT_PATH, "aztk_cli", 'config')
-LOCAL_INIT_DIRECTORY_DEST = os.path.join(os.getcwd(), '.aztk')
-GLOBAL_INIT_DIRECTORY_DEST = os.path.join(HOME_DIRECTORY_PATH, '.aztk')
+INIT_DIRECTORY_SOURCE = os.path.join(ROOT_PATH, "aztk_cli", "config")
+LOCAL_INIT_DIRECTORY_DEST = os.path.join(os.getcwd(), ".aztk")
+GLOBAL_INIT_DIRECTORY_DEST = os.path.join(HOME_DIRECTORY_PATH, ".aztk")
 """
    Key of the metadata entry for the pool that is used to store the master node id
 """
--- a/aztk/utils/deprecation.py
+++ b/aztk/utils/deprecation.py
@ -39,9 +39,10 @@ def deprecate(version: str, message: str, advice: str = ""):
        advice (str): Sentence explaining alternatives to the deprecated functionality.
    """

-    warnings.simplefilter('always', DeprecationWarning)    # turn off filter
+    warnings.simplefilter("always", DeprecationWarning)    # turn off filter
    warnings.warn(
        "{0} It will be removed in Aztk version {1}. {2}".format(message, version, advice),
        category=DeprecationWarning,
-        stacklevel=2)
-    warnings.simplefilter('default', DeprecationWarning)    # reset filter
+        stacklevel=2,
+    )
+    warnings.simplefilter("default", DeprecationWarning)    # reset filter
--- a/aztk/utils/get_ssh_key.py
+++ b/aztk/utils/get_ssh_key.py
@ -29,6 +29,6 @@ def __read_ssh_key_from_file(path: str) -> str:
    """
        Read the content of the given file
    """
-    with open(os.path.expanduser(path), 'r', encoding='UTF-8') as content_file:
+    with open(os.path.expanduser(path), "r", encoding="UTF-8") as content_file:
        content = content_file.read()
        return content
--- a/Показать больше
+++ b/Показать больше