Feature: SDK refactor (#622)

* start refactor * continue refactor for cluster and job functions * fix imports * fixes * fixes * refactor integration test secrets management * fix cluster create, add new test * add tests for new sdk api and fix bugs * fix naming and bugs * update job operations naming, bug fixes * fix cluster tests * fix joboperations and tests * update cli and fix some bugs * start fixes * fix pylint errors, bugs * add deprecated warning checks, rename tests * add docstrings for baseoperations * add docstrings * docstrings, add back compat for coreclient, fix init for spark client * whitespace * docstrings, whitespace * docstrings, fixes * docstrings, fixes * fix the sdk documentation, bugs * fix method call * pool_id->id * rename ids * cluster_id->id * cluster_id->id * add todo * fixes * add some todos * rename pool to cluster, add todo for nodes params * add todos for nodes param removal * update functions names * remove deprecated fucntion calls * update docs and docstrings * update docstrings * get rid of TODOs, fix docstrings * remove unused setting * inheritance -> composition * fix models bugs * fix create_user bug * update sdk_example.py * fix create user argument issue * update sdk_example.py * update doc * use Software model instead of string * add job wait flag, add cluster application wait functions * add docs for wait, update tests * fix bug * add clientrequesterror catch to fix tests
2018-08-03 15:20:05 -07:00 · 2018-08-03 15:20:05 -07:00 · b18eb695a1
--- a/.style.yapf
+++ b/.style.yapf
@ -3,5 +3,5 @@ based_on_style=pep8
 spaces_before_comment=4
 split_before_logical_operator=True
 indent_width=4
-column_limit=140
+column_limit=120
 split_arguments_when_comma_terminated=True
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -14,5 +14,5 @@
  "python.formatting.provider": "yapf",
  "python.venvPath": "${workspaceFolder}/.venv/",
  "python.pythonPath": "${workspaceFolder}/.venv/Scripts/python.exe",
-  "python.unitTest.pyTestEnabled": true
+  "python.unitTest.pyTestEnabled": true,
 }
--- a/aztk/client/init.py
+++ b/aztk/client/init.py
@ -0,0 +1 @@
 from .client import CoreClient
--- a/aztk/client/base/init.py
+++ b/aztk/client/base/init.py
@ -0,0 +1 @@
 from .base_operations import BaseOperations
--- a/aztk/client/base/base_operations.py
+++ b/aztk/client/base/base_operations.py
@ -0,0 +1,223 @@
 from aztk import models
 from aztk.internal import cluster_data
 from aztk.utils import ssh as ssh_lib
 from .helpers import (create_user_on_cluster, create_user_on_node, delete_user_on_cluster, delete_user_on_node,
                      generate_user_on_cluster, generate_user_on_node, get_application_log, get_remote_login_settings,
                      node_run, run, ssh_into_node)
 class BaseOperations:
    """Base operations that all other operations have as an attribute
    Attributes:
        batch_client (:obj:`azure.batch.batch_service_client.BatchServiceClient`): Client used to interact with the
            Azure Batch service.
        blob_client (:obj:`azure.storage.blob.BlockBlobService`):  Client used to interact with the Azure Storage
            Blob service.
        secrets_configuration (:obj:`aztk.models.SecretsConfiguration`): Model that holds AZTK secrets used to authenticate
            with Azure and the clusters.
    """
    def __init__(self, context):
        self.batch_client = context['batch_client']
        self.blob_client = context['blob_client']
        self.secrets_configuration = context['secrets_configuration']
    def get_cluster_config(self, id: str) -> models.ClusterConfiguration:
        """Open an ssh tunnel to a node
        Args:
            id (:obj:`str`): the id of the cluster the node is in
            node_id (:obj:`str`): the id of the node to open the ssh tunnel to
            username (:obj:`str`): the username to authenticate the ssh session
            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key
                or password. Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
            port_forward_list (:obj:`List[PortForwardingSpecification`, optional): list of PortForwardingSpecifications.
                The defined ports will be forwarded to the client.
            internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
        Returns:
            :obj:`aztk.models.ClusterConfiguration`: Object representing the cluster's configuration
        """
        return self.get_cluster_data(id).read_cluster_config()
    def get_cluster_data(self, id: str) -> cluster_data.ClusterData:
        """Gets the ClusterData object to manage data related to the given cluster
        Args:
            id (:obj:`str`): the id of the cluster to get
        Returns:
            :obj:`aztk.models.ClusterData`: Object used to manage the data and storage functions for a cluster
        """
        return cluster_data.ClusterData(self.blob_client, id)
    def ssh_into_node(self, id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
        """Open an ssh tunnel to a node
        Args:
            id (:obj:`str`): the id of the cluster the node is in
            node_id (:obj:`str`): the id of the node to open the ssh tunnel to
            username (:obj:`str`): the username to authenticate the ssh session
            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
            port_forward_list (:obj:`List[PortForwardingSpecification`, optional): list of PortForwardingSpecifications.
                The defined ports will be forwarded to the client.
            internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
        Returns:
            :obj:`None`
        """
        ssh_into_node.ssh_into_node(self, id, node_id, username, ssh_key, password, port_forward_list, internal)
    def create_user_on_node(self, id, node_id, username, ssh_key=None, password=None):
        """Create a user on a node
        Args:
            id (:obj:`str`): id of the cluster to create the user on.
            node_id (:obj:`str`): id of the node in the cluster to create the user on.
            username (:obj:`str`): name of the user to create.
            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password.
        Returns:
            :obj:`None`
        """
        return create_user_on_node.create_user_on_node(self, id, node_id, username, ssh_key, password)
    #TODO: remove nodes as param
    def create_user_on_cluster(self, id, nodes, username, ssh_pub_key=None, password=None):
        """Create a user on every node in the cluster
        Args:
            username (:obj:`str`): name of the user to create.
            id (:obj:`str`): id of the cluster to create the user on.
            nodes (:obj:`List[ComputeNode]`): list of nodes to create the user on
            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
        Returns:
            :obj:`None`
        """
        return create_user_on_cluster.create_user_on_cluster(self, id, nodes, username, ssh_pub_key, password)
    def generate_user_on_node(self, id, node_id):
        """Create a user with an autogenerated username and ssh_key on the given node.
        Args:
            id (:obj:`str`): the id of the cluster to generate the user on.
            node_id (:obj:`str`): the id of the node in the cluster to generate the user on.
        Returns:
            :obj:`tuple`: A tuple of the form (username: :obj:`str`, ssh_key: :obj:`Cryptodome.PublicKey.RSA`)
        """
        return generate_user_on_node.generate_user_on_node(self, id, node_id)
    #TODO: remove nodes as param
    def generate_user_on_cluster(self, id, nodes):
        """Create a user with an autogenerated username and ssh_key on the cluster
        Args:
            id (:obj:`str`): the id of the cluster to generate the user on.
            node_id (:obj:`str`): the id of the node in the cluster to generate the user on.
        Returns:
            :obj:`tuple`: A tuple of the form (username: :obj:`str`, ssh_key: :obj:`Cryptodome.PublicKey.RSA`)
        """
        return generate_user_on_cluster.generate_user_on_cluster(self, id, nodes)
    def delete_user_on_node(self, id: str, node_id: str, username: str) -> str:
        """Delete a user on a node
        Args:
            id (:obj:`str`): the id of the cluster to delete the user on.
            node_id (:obj:`str`): the id of the node in the cluster to delete the user on.
            username (:obj:`str`): the name of the user to delete.
        Returns:
            :obj:`None`
        """
        return delete_user_on_node.delete_user(self, id, node_id, username)
    #TODO: remove nodes as param
    def delete_user_on_cluster(self, username, id, nodes):
        """Delete a user on every node in the cluster
        Args:
            id (:obj:`str`): the id of the cluster to delete the user on.
            node_id (:obj:`str`): the id of the node in the cluster to delete the user on.
            username (:obj:`str`): the name of the user to delete.
        Returns:
            :obj:`None`
        """
        return delete_user_on_cluster.delete_user_on_cluster(self, username, id, nodes)
    def node_run(self, id, node_id, command, internal, container_name=None, timeout=None):
        """Run a bash command on the given node
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            node_id (:obj:`str`): the id of the node in the cluster to run the command on.
            command (:obj:`str`): the bash command to execute on the node.
            internal (:obj:`bool`): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            container_name=None (:obj:`str`, optional): the name of the container to run the command in.
                If None, the command will run on the host VM. Defaults to None.
            timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`aztk.models.NodeOutput`: object containing the output of the run command
        """
        return node_run.node_run(self, id, node_id, command, internal, container_name, timeout)
    def get_remote_login_settings(self, id: str, node_id: str):
        """Get the remote login information for a node in a cluster
        Args:
            id (:obj:`str`): the id of the cluster the node is in
            node_id (:obj:`str`): the id of the node in the cluster
        Returns:
            :obj:`aztk.models.RemoteLogin`: Object that contains the ip address and port combination to login to a node
        """
        return get_remote_login_settings.get_remote_login_settings(self, id, node_id)
    def run(self, id, command, internal, container_name=None, timeout=None):
        """Run a bash command on every node in the cluster
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            command (:obj:`str`): the bash command to execute on the node.
            internal (:obj:`bool`): if true, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            container_name=None (:obj:`str`, optional): the name of the container to run the command in.
                If None, the command will run on the host VM. Defaults to None.
            timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`List[azkt.models.NodeOutput]`: list of NodeOutput objects containing the output of the run command
        """
        return run.cluster_run(self, id, command, internal, container_name, timeout)
    def get_application_log(self, id: str, application_name: str, tail=False, current_bytes: int = 0):
        """Get the log for a running or completed application
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            application_name (:obj:`str`): str
            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes. Otherwise, the whole log will be retrieved.
                Only use this if streaming the log as it is being written. Defaults to False.
            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are retrieved.
                Only useful is streaming the log as it is being written. Only used if tail is True.
        Returns:
            :obj:`aztk.models.ApplicationLog`: a model representing the output of the application.
        """
        return get_application_log.get_application_log(self, id, application_name, tail, current_bytes)
--- a/aztk/client/base/helpers/init.py
+++ b/aztk/client/base/helpers/init.py
--- a/aztk/client/base/helpers/create_user_on_cluster.py
+++ b/aztk/client/base/helpers/create_user_on_cluster.py
@ -0,0 +1,11 @@
 import concurrent.futures
 #TODO: remove nodes param
 def create_user_on_cluster(base_operations, id, nodes, username, ssh_pub_key=None, password=None):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {
            executor.submit(base_operations.create_user_on_node, id, node.id, username, ssh_pub_key, password): node
            for node in nodes
        }
        concurrent.futures.wait(futures)
--- a/aztk/client/base/helpers/create_user_on_node.py
+++ b/aztk/client/base/helpers/create_user_on_node.py
@ -0,0 +1,42 @@
 from datetime import datetime, timedelta, timezone
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import models
 from aztk.utils import get_ssh_key
 def __create_user(self, id: str, node_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
    """
        Create a pool user
        :param pool: the pool to add the user to
        :param node: the node to add the user to
        :param username: username of the user to add
        :param password: password of the user to add
        :param ssh_key: ssh_key of the user to add
    """
    # Create new ssh user for the given node
    self.batch_client.compute_node.add_user(
        id,
        node_id,
        batch_models.ComputeNodeUser(
            name=username,
            is_admin=True,
            password=password,
            ssh_public_key=get_ssh_key.get_user_public_key(ssh_key, self.secrets_configuration),
            expiry_time=datetime.now(timezone.utc) + timedelta(days=365),
        ),
    )
 def create_user_on_node(base_client, id, node_id, username, ssh_key=None, password=None):
    try:
        __create_user(
            base_client, id=id, node_id=node_id, username=username, ssh_key=ssh_key, password=password)
    except batch_error.BatchErrorException as error:
        try:
            base_client.delete_user_on_node(id, node_id, username)
            base_client.create_user_on_node(id=id, node_id=node_id, username=username, ssh_key=ssh_key)
        except batch_error.BatchErrorException as error:
            raise error
--- a/aztk/client/base/helpers/delete_user_on_cluster.py
+++ b/aztk/client/base/helpers/delete_user_on_cluster.py
@ -0,0 +1,7 @@
 import concurrent.futures
 #TODO: remove nodes param
 def delete_user_on_cluster(base_client, id, nodes, username):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(base_client.delete_user_on_node, id, node.id, username) for node in nodes]
        concurrent.futures.wait(futures)
--- a/aztk/client/base/helpers/delete_user_on_node.py
+++ b/aztk/client/base/helpers/delete_user_on_node.py
@ -0,0 +1,9 @@
 def delete_user(self, pool_id: str, node_id: str, username: str) -> str:
    """
        Create a pool user
        :param pool: the pool to add the user to
        :param node: the node to add the user to
        :param username: username of the user to add
    """
    # Delete a user on the given node
    self.batch_client.compute_node.delete_user(pool_id, node_id, username)
--- a/aztk/client/base/helpers/generate_user_on_cluster.py
+++ b/aztk/client/base/helpers/generate_user_on_cluster.py
@ -0,0 +1,20 @@
 import concurrent.futures
 from Cryptodome.PublicKey import RSA
 from aztk.utils import secure_utils
 #TODO: remove nodes param
 def generate_user_on_cluster(base_operations, id, nodes):
    generated_username = secure_utils.generate_random_string()
    ssh_key = RSA.generate(2048)
    ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = {
            executor.submit(base_operations.create_user_on_node, id, node.id, generated_username, ssh_pub_key): node
            for node in nodes
        }
        concurrent.futures.wait(futures)
    return generated_username, ssh_key
--- a/aztk/client/base/helpers/generate_user_on_node.py
+++ b/aztk/client/base/helpers/generate_user_on_node.py
@ -0,0 +1,11 @@
 from Cryptodome.PublicKey import RSA
 from aztk.utils import secure_utils
 def generate_user_on_node(base_client, pool_id, node_id):
    generated_username = secure_utils.generate_random_string()
    ssh_key = RSA.generate(2048)
    ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8')
    base_client.create_user_on_node(pool_id, node_id, generated_username, ssh_pub_key)
    return generated_username, ssh_key
--- a/aztk/client/base/helpers/get_application_log.py
+++ b/aztk/client/base/helpers/get_application_log.py
@ -0,0 +1,114 @@
 import time
 import azure
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk import models
 from aztk.utils import constants, helpers
 output_file = constants.TASK_WORKING_DIR + \
    "/" + constants.SPARK_SUBMIT_LOGS_FILE
 def __check_task_node_exist(batch_client, cluster_id: str, task: batch_models.CloudTask) -> bool:
    try:
        batch_client.compute_node.get(cluster_id, task.node_info.node_id)
        return True
    except batch_error.BatchErrorException:
        return False
 def __wait_for_app_to_be_running(batch_client, cluster_id: str, application_name: str) -> batch_models.CloudTask:
    """
        Wait for the batch task to leave the waiting state into running(or completed if it was fast enough)
    """
    while True:
        task = batch_client.task.get(cluster_id, application_name)
        if task.state is batch_models.TaskState.active or task.state is batch_models.TaskState.preparing:
            # TODO: log
            time.sleep(5)
        else:
            return task
 def __get_output_file_properties(batch_client, cluster_id: str, application_name: str):
    while True:
        try:
            file = helpers.get_file_properties(cluster_id, application_name, output_file, batch_client)
            return file
        except batch_error.BatchErrorException as e:
            if e.response.status_code == 404:
                # TODO: log
                time.sleep(5)
                continue
            else:
                raise e
 def get_log_from_storage(blob_client, container_name, application_name, task):
    try:
        blob = blob_client.get_blob_to_text(container_name, application_name + '/' + constants.SPARK_SUBMIT_LOGS_FILE)
    except azure.common.AzureMissingResourceHttpError:
        raise error.AztkError("Logs not found in your storage account. They were either deleted or never existed.")
    return models.ApplicationLog(
        name=application_name,
        cluster_id=container_name,
        application_state=task.state._value_,
        log=blob.content,
        total_bytes=blob.properties.content_length,
        exit_code=task.execution_info.exit_code)
 def get_log(batch_client, blob_client, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
    job_id = cluster_id
    task_id = application_name
    task = __wait_for_app_to_be_running(batch_client, cluster_id, application_name)
    if not __check_task_node_exist(batch_client, cluster_id, task):
        return get_log_from_storage(blob_client, cluster_id, application_name, task)
    file = __get_output_file_properties(batch_client, cluster_id, application_name)
    target_bytes = file.content_length
    if target_bytes != current_bytes:
        ocp_range = None
        if tail:
            ocp_range = "bytes={0}-{1}".format(current_bytes, target_bytes - 1)
        stream = batch_client.file.get_from_task(
            job_id, task_id, output_file, batch_models.FileGetFromTaskOptions(ocp_range=ocp_range))
        content = helpers.read_stream_as_string(stream)
        return models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
            application_state=task.state._value_,
            log=content,
            total_bytes=target_bytes,
            exit_code=task.execution_info.exit_code)
    else:
        return models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
            application_state=task.state._value_,
            log='',
            total_bytes=target_bytes,
            exit_code=task.execution_info.exit_code)
 def get_application_log(base_operations,
                        cluster_id: str,
                        application_name: str,
                        tail=False,
                        current_bytes: int = 0):
    try:
        return get_log(base_operations.batch_client, base_operations.blob_client, cluster_id,
                       application_name, tail, current_bytes)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/client/base/helpers/get_remote_login_settings.py
+++ b/aztk/client/base/helpers/get_remote_login_settings.py
@ -0,0 +1,22 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error, models
 from aztk.utils import helpers
 def _get_remote_login_settings(base_client, pool_id: str, node_id: str):
    """
    Get the remote_login_settings for node
    :param pool_id
    :param node_id
    :returns aztk.models.RemoteLogin
    """
    result = base_client.batch_client.compute_node.get_remote_login_settings(pool_id, node_id)
    return models.RemoteLogin(ip_address=result.remote_login_ip_address, port=str(result.remote_login_port))
 def get_remote_login_settings(base_client, cluster_id: str, node_id: str):
    try:
        return _get_remote_login_settings(base_client, cluster_id, node_id)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/client/base/helpers/node_run.py
+++ b/aztk/client/base/helpers/node_run.py
@ -0,0 +1,30 @@
 import aztk.error as error
 import aztk.models as models
 from aztk.utils import ssh as ssh_lib
 def node_run(base_client, cluster_id, node_id, command, internal, container_name=None, timeout=None):
    cluster = base_client.get(cluster_id)
    pool, nodes = cluster.pool, list(cluster.nodes)
    try:
        node = next(node for node in nodes if node.id == node_id)
    except StopIteration:
        raise error.AztkError("Node with id {} not found".format(node_id))
    if internal:
        node_rls = models.RemoteLogin(ip_address=node.ip_address, port="22")
    else:
        node_rls = base_client.get_remote_login_settings(pool.id, node.id)
    try:
        generated_username, ssh_key = base_client.generate_user_on_node(pool.id, node.id)
        output = ssh_lib.node_exec_command(
            node.id,
            command,
            generated_username,
            node_rls.ip_address,
            node_rls.port,
            ssh_key=ssh_key.exportKey().decode('utf-8'),
            container_name=container_name,
            timeout=timeout)
        return output
    finally:
        base_client.delete_user_on_node(cluster_id, node.id, generated_username)
--- a/aztk/client/base/helpers/run.py
+++ b/aztk/client/base/helpers/run.py
@ -0,0 +1,36 @@
 import asyncio
 from azure.batch.models import batch_error
 import aztk.models as models
 from aztk import error
 from aztk.utils import ssh as ssh_lib
 from aztk.utils import helpers
 def cluster_run(base_operations, cluster_id, command, internal, container_name=None, timeout=None):
    cluster = base_operations.get(cluster_id)
    pool, nodes = cluster.pool, list(cluster.nodes)
    if internal:
        cluster_nodes = [(node, models.RemoteLogin(ip_address=node.ip_address, port="22")) for node in nodes]
    else:
        cluster_nodes = [(node, base_operations.get_remote_login_settings(pool.id, node.id)) for node in nodes]
    try:
        generated_username, ssh_key = base_operations.generate_user_on_cluster(pool.id, nodes)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
    try:
        output = asyncio.get_event_loop().run_until_complete(
            ssh_lib.clus_exec_command(
                command,
                generated_username,
                cluster_nodes,
                ssh_key=ssh_key.exportKey().decode('utf-8'),
                container_name=container_name,
                timeout=timeout))
        return output
    except OSError as exc:
        raise exc
    finally:
        base_operations.delete_user_on_cluster(pool.id, nodes, generated_username)
--- a/aztk/client/base/helpers/ssh_into_node.py
+++ b/aztk/client/base/helpers/ssh_into_node.py
@ -0,0 +1,20 @@
 import aztk.models as models
 from aztk.utils import ssh as ssh_lib
 def ssh_into_node(base_client, pool_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
    if internal:
        result = base_client.batch_client.compute_node.get(pool_id=pool_id, node_id=node_id)
        rls = models.RemoteLogin(ip_address=result.ip_address, port="22")
    else:
        result = base_client.batch_client.compute_node.get_remote_login_settings(pool_id, node_id)
        rls = models.RemoteLogin(ip_address=result.remote_login_ip_address, port=str(result.remote_login_port))
    ssh_lib.node_ssh(
        username=username,
        hostname=rls.ip_address,
        port=rls.port,
        ssh_key=ssh_key,
        password=password,
        port_forward_list=port_forward_list,
    )
--- a/aztk/client/client.py
+++ b/aztk/client/client.py
@ -13,21 +13,38 @@ import aztk.utils.constants as constants
 import aztk.utils.get_ssh_key as get_ssh_key
 import aztk.utils.helpers as helpers
 import aztk.utils.ssh as ssh_lib
 from aztk.client.cluster import CoreClusterOperations
 from aztk.client.job import CoreJobOperations
 from aztk.internal import cluster_data
-from aztk.utils import secure_utils
+from aztk.utils import deprecated, secure_utils
-class Client:
+class CoreClient:
-    def __init__(self, secrets_config: models.SecretsConfiguration):
+    """The base AZTK client that all other clients inherit from.
        self.secrets_config = secrets_config
-        azure_api.validate_secrets(secrets_config)
+    **This client should not be used directly. Only software specific clients
-        self.batch_client = azure_api.make_batch_client(secrets_config)
+    should be used.**
        self.blob_client = azure_api.make_blob_client(secrets_config)
    """
    def _get_context(self, secrets_configuration: models.SecretsConfiguration):
        self.secrets_configuration = secrets_configuration
        azure_api.validate_secrets(secrets_configuration)
        self.batch_client = azure_api.make_batch_client(secrets_configuration)
        self.blob_client = azure_api.make_blob_client(secrets_configuration)
        context = {
            'batch_client': self.batch_client,
            'blob_client': self.blob_client,
            'secrets_configuration': self.secrets_configuration,
        }
        return context
    # ALL THE FOLLOWING METHODS ARE DEPRECATED AND WILL BE REMOVED IN 0.10.0
    @deprecated("0.10.0")
    def get_cluster_config(self, cluster_id: str) -> models.ClusterConfiguration:
        return self._get_cluster_data(cluster_id).read_cluster_config()
    @deprecated("0.10.0")
    def _get_cluster_data(self, cluster_id: str) -> cluster_data.ClusterData:
        """
        Returns ClusterData object to manage data related to the given cluster id
@ -38,6 +55,7 @@ class Client:
    General Batch Operations
    '''
    @deprecated("0.10.0")
    def __delete_pool_and_job(self, pool_id: str, keep_logs: bool = False):
        """
            Delete a pool and it's associated job
@ -67,6 +85,7 @@ class Client:
        return job_exists or pool_exists
    @deprecated("0.10.0")
    def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel):
        """
            Create a pool and job
@ -128,6 +147,7 @@ class Client:
        return helpers.get_cluster(cluster_conf.cluster_id, self.batch_client)
    @deprecated("0.10.0")
    def __get_pool_details(self, cluster_id: str):
        """
            Print the information for the given cluster
@ -138,6 +158,7 @@ class Client:
        nodes = self.batch_client.compute_node.list(pool_id=cluster_id)
        return pool, nodes
    @deprecated("0.10.0")
    def __list_clusters(self, software_metadata_key):
        """
            List all the cluster on your account.
@ -155,6 +176,7 @@ class Client:
                aztk_pools.append(pool)
        return aztk_pools
    @deprecated("0.10.0")
    def __create_user(self, pool_id: str, node_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
        """
            Create a pool user
@ -173,9 +195,10 @@ class Client:
                is_admin=True,
                password=password,
                ssh_public_key=get_ssh_key.get_user_public_key(
-                    ssh_key, self.secrets_config),
+                    ssh_key, self.secrets_configuration),
                expiry_time=datetime.now(timezone.utc) + timedelta(days=365)))
    @deprecated("0.10.0")
    def __delete_user(self, pool_id: str, node_id: str, username: str) -> str:
        """
            Create a pool user
@ -186,6 +209,7 @@ class Client:
        # Delete a user on the given node
        self.batch_client.compute_node.delete_user(pool_id, node_id, username)
    @deprecated("0.10.0")
    def __get_remote_login_settings(self, pool_id: str, node_id: str):
        """
        Get the remote_login_settings for node
@ -197,6 +221,7 @@ class Client:
            pool_id, node_id)
        return models.RemoteLogin(ip_address=result.remote_login_ip_address, port=str(result.remote_login_port))
    @deprecated("0.10.0")
    def __create_user_on_node(self, username, pool_id, node_id, ssh_key=None, password=None):
        try:
            self.__create_user(pool_id=pool_id, node_id=node_id, username=username, ssh_key=ssh_key, password=password)
@ -207,6 +232,7 @@ class Client:
            except batch_error.BatchErrorException as error:
                raise error
    @deprecated("0.10.0")
    def __generate_user_on_node(self, pool_id, node_id):
        generated_username = secure_utils.generate_random_string()
        ssh_key = RSA.generate(2048)
@ -214,6 +240,7 @@ class Client:
        self.__create_user_on_node(generated_username, pool_id, node_id, ssh_pub_key)
        return generated_username, ssh_key
    @deprecated("0.10.0")
    def __generate_user_on_pool(self, pool_id, nodes):
        generated_username = secure_utils.generate_random_string()
        ssh_key = RSA.generate(2048)
@ -228,6 +255,7 @@ class Client:
        return generated_username, ssh_key
    @deprecated("0.10.0")
    def __create_user_on_pool(self, username, pool_id, nodes, ssh_pub_key=None, password=None):
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = {executor.submit(self.__create_user_on_node,
@ -238,11 +266,13 @@ class Client:
                                       password): node for node in nodes}
            concurrent.futures.wait(futures)
    @deprecated("0.10.0")
    def __delete_user_on_pool(self, username, pool_id, nodes):
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(self.__delete_user, pool_id, node.id, username) for node in nodes]
            concurrent.futures.wait(futures)
    @deprecated("0.10.0")
    def __node_run(self, cluster_id, node_id, command, internal, container_name=None, timeout=None):
        pool, nodes = self.__get_pool_details(cluster_id)
        try:
@ -271,6 +301,7 @@ class Client:
        finally:
            self.__delete_user(cluster_id, node.id, generated_username)
    @deprecated("0.10.0")
    def __cluster_run(self, cluster_id, command, internal, container_name=None, timeout=None):
        pool, nodes = self.__get_pool_details(cluster_id)
        nodes = list(nodes)
@ -297,6 +328,7 @@ class Client:
        finally:
            self.__delete_user_on_pool(generated_username, pool.id, nodes)
    @deprecated("0.10.0")
    def __cluster_copy(self, cluster_id, source_path, destination_path=None, container_name=None, internal=False, get=False, timeout=None):
        pool, nodes = self.__get_pool_details(cluster_id)
        nodes = list(nodes)
@ -325,6 +357,7 @@ class Client:
        finally:
            self.__delete_user_on_pool(generated_username, pool.id, nodes)
    @deprecated("0.10.0")
    def __ssh_into_node(self, pool_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
        if internal:
            result = self.batch_client.compute_node.get(pool_id=pool_id, node_id=node_id)
@ -342,6 +375,7 @@ class Client:
            port_forward_list=port_forward_list,
        )
    @deprecated("0.10.0")
    def __submit_job(self,
                     job_configuration,
                     start_task,
@ -429,44 +463,3 @@ class Client:
        self.batch_client.job_schedule.add(setup)
        return self.batch_client.job_schedule.get(job_schedule_id=job_configuration.id)
    '''
    Define Public Interface
    '''
    def create_cluster(self, cluster_conf, wait: bool = False):
        raise NotImplementedError()
    def create_clusters_in_parallel(self, cluster_confs):
        raise NotImplementedError()
    def delete_cluster(self, cluster_id: str):
        raise NotImplementedError()
    def get_cluster(self, cluster_id: str):
        raise NotImplementedError()
    def list_clusters(self):
        raise NotImplementedError()
    def wait_until_cluster_is_ready(self, cluster_id):
        raise NotImplementedError()
    def create_user(self, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
        raise NotImplementedError()
    def get_remote_login_settings(self, cluster_id, node_id):
        raise NotImplementedError()
    def cluster_run(self, cluster_id, command):
        raise NotImplementedError()
    def cluster_copy(self, cluster_id, source_path, destination_path):
        raise NotImplementedError()
    def cluster_download(self, cluster_id, source_path, destination_path):
        raise NotImplementedError()
    def submit_job(self, job):
        raise NotImplementedError()
--- a/aztk/client/cluster/init.py
+++ b/aztk/client/cluster/init.py
@ -0,0 +1 @@
 from .operations import CoreClusterOperations
--- a/aztk/client/cluster/helpers/init.py
+++ b/aztk/client/cluster/helpers/init.py
--- a/aztk/client/cluster/helpers/copy.py
+++ b/aztk/client/cluster/helpers/copy.py
@ -0,0 +1,41 @@
 import asyncio
 import azure.batch.models.batch_error as batch_error
 import aztk.models as models
 from aztk import error
 from aztk.utils import ssh as ssh_lib
 from aztk.utils import helpers
 def cluster_copy(cluster_operations, cluster_id, source_path, destination_path=None, container_name=None, internal=False, get=False, timeout=None):
    cluster = cluster_operations.get(cluster_id)
    pool, nodes = cluster.pool, list(cluster.nodes)
    if internal:
        cluster_nodes = [(node, models.RemoteLogin(ip_address=node.ip_address, port="22")) for node in nodes]
    else:
        cluster_nodes = [(node, cluster_operations.get_remote_login_settings(pool.id, node.id)) for node in nodes]
    try:
        generated_username, ssh_key = cluster_operations.generate_user_on_cluster(pool.id, nodes)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
    try:
        output = asyncio.get_event_loop().run_until_complete(
            ssh_lib.clus_copy(
                container_name=container_name,
                username=generated_username,
                nodes=cluster_nodes,
                source_path=source_path,
                destination_path=destination_path,
                ssh_key=ssh_key.exportKey().decode('utf-8'),
                get=get,
                timeout=timeout
            )
        )
        return output
    except (OSError, batch_error.BatchErrorException) as exc:
        raise exc
    finally:
        cluster_operations.delete_user_on_cluster(pool.id, nodes, generated_username)
--- a/aztk/client/cluster/helpers/create.py
+++ b/aztk/client/cluster/helpers/create.py
@ -0,0 +1,67 @@
 from datetime import timedelta
 import azure.batch.models as batch_models
 from aztk import models
 from aztk.utils import helpers, constants
 def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel):
    """
        Create a pool and job
        :param cluster_conf: the configuration object used to create the cluster
        :type cluster_conf: aztk.models.ClusterConfiguration
        :parm software_metadata_key: the id of the software being used on the cluster
        :param start_task: the start task for the cluster
        :param VmImageModel: the type of image to provision for the cluster
        :param wait: wait until the cluster is ready
    """
    core_cluster_operations.get_cluster_data(cluster_conf.cluster_id).save_cluster_config(cluster_conf)
    # reuse pool_id as job_id
    pool_id = cluster_conf.cluster_id
    job_id = cluster_conf.cluster_id
    # Get a verified node agent sku
    sku_to_use, image_ref_to_use = \
        helpers.select_latest_verified_vm_image_with_node_agent_sku(
            VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, core_cluster_operations.batch_client)
    network_conf = None
    if cluster_conf.subnet_id is not None:
        network_conf = batch_models.NetworkConfiguration(
            subnet_id=cluster_conf.subnet_id)
    auto_scale_formula = "$TargetDedicatedNodes={0}; $TargetLowPriorityNodes={1}".format(
        cluster_conf.size, cluster_conf.size_low_priority)
    # Configure the pool
    pool = batch_models.PoolAddParameter(
        id=pool_id,
        virtual_machine_configuration=batch_models.VirtualMachineConfiguration(
            image_reference=image_ref_to_use,
            node_agent_sku_id=sku_to_use),
        vm_size=cluster_conf.vm_size,
        enable_auto_scale=True,
        auto_scale_formula=auto_scale_formula,
        auto_scale_evaluation_interval=timedelta(minutes=5),
        start_task=start_task,
        enable_inter_node_communication=True if not cluster_conf.subnet_id else False,
        max_tasks_per_node=4,
        network_configuration=network_conf,
        metadata=[
            batch_models.MetadataItem(
                name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
            batch_models.MetadataItem(
                name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA)
        ])
    # Create the pool + create user for the pool
    helpers.create_pool_if_not_exist(pool, core_cluster_operations.batch_client)
    # Create job
    job = batch_models.JobAddParameter(
        id=job_id,
        pool_info=batch_models.PoolInformation(pool_id=pool_id))
    # Add job to batch
    core_cluster_operations.batch_client.job.add(job)
    return helpers.get_cluster(cluster_conf.cluster_id, core_cluster_operations.batch_client)
--- a/aztk/client/cluster/helpers/delete.py
+++ b/aztk/client/cluster/helpers/delete.py
@ -0,0 +1,31 @@
 import azure.batch.models as batch_models
 def delete_pool_and_job(core_cluster_operations, pool_id: str, keep_logs: bool = False):
    """
        Delete a pool and it's associated job
        :param cluster_id: the pool to add the user to
        :return bool: deleted the pool if exists and job if exists
    """
    # job id is equal to pool id
    job_id = pool_id
    job_exists = True
    try:
        core_cluster_operations.batch_client.job.get(job_id)
    except batch_models.batch_error.BatchErrorException:
        job_exists = False
    pool_exists = core_cluster_operations.batch_client.pool.exists(pool_id)
    if job_exists:
        core_cluster_operations.batch_client.job.delete(job_id)
    if pool_exists:
        core_cluster_operations.batch_client.pool.delete(pool_id)
    if not keep_logs:
        cluster_data = core_cluster_operations.get_cluster_data(pool_id)
        cluster_data.delete_container(pool_id)
    return job_exists or pool_exists
--- a/aztk/client/cluster/helpers/get.py
+++ b/aztk/client/cluster/helpers/get.py
@ -0,0 +1,15 @@
 #TODO: return Cluster instead of (pool, nodes)
 from aztk import models
 def get_pool_details(core_cluster_operations, cluster_id: str):
    """
        Print the information for the given cluster
        :param cluster_id: Id of the cluster
        :return pool: CloudPool, nodes: ComputeNodePaged
    """
    pool = core_cluster_operations.batch_client.pool.get(cluster_id)
    nodes = core_cluster_operations.batch_client.compute_node.list(pool_id=cluster_id)
    return models.Cluster(pool, nodes)
--- a/aztk/client/cluster/helpers/list.py
+++ b/aztk/client/cluster/helpers/list.py
@ -0,0 +1,20 @@
 from aztk import models
 from aztk.utils import constants
 def list_clusters(cluster_client, software_metadata_key):
    """
        List all the cluster on your account.
    """
    pools = cluster_client.batch_client.pool.list()
    software_metadata = (
        constants.AZTK_SOFTWARE_METADATA_KEY, software_metadata_key)
    cluster_metadata = (
        constants.AZTK_MODE_METADATA_KEY, constants.AZTK_CLUSTER_MODE_METADATA)
    aztk_clusters = []
    for pool in [pool for pool in pools if pool.metadata]:
        pool_metadata = [(metadata.name, metadata.value) for metadata in pool.metadata]
        if all([metadata in pool_metadata for metadata in [software_metadata, cluster_metadata]]):
            aztk_clusters.append(models.Cluster(pool))
    return aztk_clusters
--- a/aztk/client/cluster/helpers/wait_for_task_to_complete.py
+++ b/aztk/client/cluster/helpers/wait_for_task_to_complete.py
@ -0,0 +1,12 @@
 import time
 import azure.batch.models as batch_models
 def wait_for_task_to_complete(core_cluster_operations, job_id: str, task_id: str):
    while True:
        task = core_cluster_operations.batch_client.task.get(job_id=job_id, task_id=task_id)
        if task.state != batch_models.TaskState.completed:
            time.sleep(2)
        else:
            return
--- a/aztk/client/cluster/operations.py
+++ b/aztk/client/cluster/operations.py
@ -0,0 +1,94 @@
 from aztk.client.base import BaseOperations
 from aztk.models import ClusterConfiguration
 from .helpers import copy, create, delete, get, list, wait_for_task_to_complete
 class CoreClusterOperations(BaseOperations):
    def create(self, cluster_configuration: ClusterConfiguration, software_metadata_key: str, start_task,
               vm_image_model):
        """Create a cluster.
        Args:
            cluster_configuration (:obj:`aztk.models.ClusterConfiguration`): Configuration for the cluster to be created
            software_metadata_key (:obj:`str`): the key for the primary software that will be run on the cluster
            start_task (:obj:`azure.batch.models.StartTask`): Batch StartTask defintion to configure the Batch Pool
            vm_image_model (:obj:`azure.batch.models.VirtualMachineConfiguration`): Configuration of the virtual machine image and settings
        Returns:
            :obj:`aztk.models.Cluster`: A Cluster object representing the state and configuration of the cluster.
        """
        return create.create_pool_and_job(self, cluster_configuration, software_metadata_key, start_task,
                                          vm_image_model)
    def get(self, id: str):
        """Get the state and configuration of a cluster
        Args:
            id (:obj:`str`): the id of the cluster to get.
        Returns:
            :obj:`aztk.models.Cluster`: A Cluster object representing the state and configuration of the cluster.
        """
        return get.get_pool_details(self, id)
    def copy(self, id, source_path, destination_path=None, container_name=None, internal=False, get=False,
             timeout=None):
        """Copy files to or from every node in a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to copy files with.
            source_path (:obj:`str`): the path of the file to copy from.
            destination_path (:obj:`str`, optional): the local directory path where the output should be written.
                If None, a SpooledTemporaryFile will be returned in the NodeOutput object, else the file will be
                written to this path. Defaults to None.
            container_name (:obj:`str`, optional): the name of the container to copy to or from.
                If None, the copy operation will occur on the host VM, Defaults to None.
            internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            get (:obj:`bool`, optional): If True, the file are downloaded from every node in the cluster.
                Else, the file is copied from the client to the node. Defaults to False.
            timeout (:obj:`int`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`List[aztk.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
        """
        return copy.cluster_copy(self, id, source_path, destination_path, container_name, internal, get, timeout)
    def delete(self, id: str, keep_logs: bool = False):
        """Copy files to or from every node in a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to delete
            keep_logs (:obj:`bool`): If True, the logs related to this cluster in Azure Storage are not deleted.
                Defaults to False.
        Returns:
            :obj:`List[aztk.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
        """
        return delete.delete_pool_and_job(self, id, keep_logs)
    def list(self, software_metadata_key):
        """List clusters running the specified software.
        Args:
            software_metadata_key(:obj:`str`): the key of the primary softare running on the cluster.
                This filters out non-aztk clusters and aztk clusters running other software.
        Returns:
            :obj:`List[aztk.models.Cluster]`: list of clusters running the software defined by software_metadata_key
        """
        return list.list_clusters(self, software_metadata_key)
    def wait(self, id, task_name):
        """Wait until the task has completed
        Args:
            id (:obj:`str`): the id of the job the task was submitted to
            task_name (:obj:`str`): the name of the task to wait for
        Returns:
            :obj:`None`
        """
        return wait_for_task_to_complete.wait_for_task_to_complete(self, id, task_name)
--- a/aztk/client/job/init.py
+++ b/aztk/client/job/init.py
@ -0,0 +1 @@
 from .operations import CoreJobOperations
--- a/aztk/client/job/helpers/init.py
+++ b/aztk/client/job/helpers/init.py
--- a/aztk/client/job/helpers/submit.py
+++ b/aztk/client/job/helpers/submit.py
@ -0,0 +1,76 @@
 from datetime import timedelta
 import azure.batch.models as batch_models
 from aztk.utils import helpers, constants
 def submit_job(
        job_client,
        job_configuration,
        start_task,
        job_manager_task,
        autoscale_formula,
        software_metadata_key: str,
        vm_image_model,
        application_metadata):
    """
            Job Submission
            :param job_configuration -> aztk_sdk.spark.models.JobConfiguration
            :param start_task -> batch_models.StartTask
            :param job_manager_task -> batch_models.TaskAddParameter
            :param autoscale_formula -> str
            :param software_metadata_key -> str
            :param vm_image_model -> aztk_sdk.models.VmImage
            :returns None
        """
    job_client.get_cluster_data(job_configuration.id).save_cluster_config(job_configuration.to_cluster_config())
    # get a verified node agent sku
    sku_to_use, image_ref_to_use = \
        helpers.select_latest_verified_vm_image_with_node_agent_sku(
            vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, job_client.batch_client)
    # set up subnet if necessary
    network_conf = None
    if job_configuration.subnet_id:
        network_conf = batch_models.NetworkConfiguration(subnet_id=job_configuration.subnet_id)
    # set up a schedule for a recurring job
    auto_pool_specification = batch_models.AutoPoolSpecification(
        pool_lifetime_option=batch_models.PoolLifetimeOption.job_schedule,
        auto_pool_id_prefix=job_configuration.id,
        keep_alive=False,
        pool=batch_models.PoolSpecification(
            display_name=job_configuration.id,
            virtual_machine_configuration=batch_models.VirtualMachineConfiguration(
                image_reference=image_ref_to_use, node_agent_sku_id=sku_to_use),
            vm_size=job_configuration.vm_size,
            enable_auto_scale=True,
            auto_scale_formula=autoscale_formula,
            auto_scale_evaluation_interval=timedelta(minutes=5),
            start_task=start_task,
            enable_inter_node_communication=not job_configuration.mixed_mode(),
            network_configuration=network_conf,
            max_tasks_per_node=4,
            metadata=[
                batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key),
                batch_models.MetadataItem(name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA)
            ]))
    # define job specification
    job_spec = batch_models.JobSpecification(
        pool_info=batch_models.PoolInformation(auto_pool_specification=auto_pool_specification),
        display_name=job_configuration.id,
        on_all_tasks_complete=batch_models.OnAllTasksComplete.terminate_job,
        job_manager_task=job_manager_task,
        metadata=[batch_models.MetadataItem(name='applications', value=application_metadata)])
    # define schedule
    schedule = batch_models.Schedule(do_not_run_until=None, do_not_run_after=None, start_window=None, recurrence_interval=None)
    # create job schedule and add task
    setup = batch_models.JobScheduleAddParameter(id=job_configuration.id, schedule=schedule, job_specification=job_spec)
    job_client.batch_client.job_schedule.add(setup)
    return job_client.batch_client.job_schedule.get(job_schedule_id=job_configuration.id)
--- a/aztk/client/job/operations.py
+++ b/aztk/client/job/operations.py
@ -0,0 +1,30 @@
 from aztk.client.base import BaseOperations
 from .helpers import submit
 class CoreJobOperations(BaseOperations):
    def submit(self, job_configuration, start_task, job_manager_task, autoscale_formula, software_metadata_key: str,
               vm_image_model, application_metadata):
        """Submit a job
        Jobs are a cluster definition and one or many application definitions which run on the cluster. The job's
        cluster will be allocated and configured, then the applications will be executed with their output stored
        in Azure Storage. When all applications have completed, the cluster will be automatically deleted.
        Args:
            job_configuration (:obj:`aztk.models.JobConfiguration`): Model defining the job's configuration.
            start_task (:obj:`azure.batch.models.StartTask`): Batch StartTask defintion to configure the Batch Pool
            job_manager_task (:obj:`azure.batch.models.JobManagerTask`): Batch JobManagerTask defintion to schedule
                the defined applications on the cluster.
            autoscale_formula (:obj:`str`): formula that defines the numbers of nodes allocated to the cluster.
            software_metadata_key (:obj:`str`): the key of the primary softare running on the cluster.
            vm_image_model
            application_metadata (:obj:`List[str]`): list of the names of all applications that will be run as a
                part of the job
        Returns:
            :obj:`azure.batch.models.CloudJobSchedule`: Model representing the Azure Batch JobSchedule state.
        """
        return submit.submit_job(self, job_configuration, start_task, job_manager_task, autoscale_formula,
                                 software_metadata_key, vm_image_model, application_metadata)
--- a/aztk/models/init.py
+++ b/aztk/models/init.py
@ -18,4 +18,5 @@ from .software import Software
 from .cluster import Cluster
 from .scheduling_target import SchedulingTarget
 from .port_forward_specification import PortForwardingSpecification
 from .application_log import ApplicationLog
 from .plugins import *
--- a/aztk/models/application_log.py
+++ b/aztk/models/application_log.py
@ -0,0 +1,12 @@
 import azure.batch.models as batch_models
 class ApplicationLog():
    def __init__(self, name: str, cluster_id: str, log: str, total_bytes: int,
                 application_state: batch_models.TaskState, exit_code: int):
        self.name = name
        self.cluster_id = cluster_id    # TODO: change to something cluster/job agnostic
        self.log = log
        self.total_bytes = total_bytes
        self.application_state = application_state
        self.exit_code = exit_code
--- a/aztk/node_scripts/install/pick_master.py
+++ b/aztk/node_scripts/install/pick_master.py
@ -1,10 +1,11 @@
 """
    This is the code that all nodes will run in their start task to try to allocate the master
 """
 import azure.batch.batch_service_client as batch
 import azure.batch.models as batchmodels
 import azure.batch.models.batch_error as batcherror
 from msrest.exceptions import ClientRequestError
 from core import config
 MASTER_NODE_METADATA_KEY = "_spark_master_node"
@ -36,7 +37,7 @@ def try_assign_self_as_master(client: batch.BatchServiceClient, pool: batchmodel
            if_match=pool.e_tag,
        ))
        return True
-    except batcherror.BatchErrorException:
+    except (batcherror.BatchErrorException, ClientRequestError):
        print("Couldn't assign itself as master the pool because the pool was modified since last get.")
        return False
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@ -1,361 +0,0 @@
 from typing import List
 import azure.batch.models.batch_error as batch_error
 import aztk
 from aztk import error
 from aztk.client import Client as BaseClient
 from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.spark.helpers import create_cluster as create_cluster_helper
 from aztk.spark.helpers import get_log as get_log_helper
 from aztk.spark.helpers import job_submission as job_submit_helper
 from aztk.spark.helpers import submit as cluster_submit_helper
 from aztk.spark.helpers import cluster_diagnostic_helper
 from aztk.spark.utils import util
 from aztk.utils import helpers
 class Client(BaseClient):
    """
    Aztk Spark Client
    This is the main entry point for using aztk for spark
    Args:
        secrets_config(aztk.spark.models.models.SecretsConfiguration): Configuration with all the needed credentials
    """
    def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool = False):
        """
        Create a new aztk spark cluster
        Args:
            cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
            wait(bool): If you should wait for the cluster to be ready before returning
        Returns:
            aztk.spark.models.Cluster
        """
        cluster_conf = _apply_default_for_cluster_config(cluster_conf)
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()
            start_task = create_cluster_helper.generate_cluster_start_task(self,
                                                                           zip_resource_files,
                                                                           cluster_conf.cluster_id,
                                                                           cluster_conf.gpu_enabled(),
                                                                           cluster_conf.get_docker_repo(),
                                                                           cluster_conf.file_shares,
                                                                           cluster_conf.plugins,
                                                                           cluster_conf.mixed_mode(),
                                                                           cluster_conf.worker_on_master)
            software_metadata_key = "spark"
            vm_image = models.VmImage(
                publisher='Canonical',
                offer='UbuntuServer',
                sku='16.04')
            cluster = self.__create_pool_and_job(
                cluster_conf, software_metadata_key, start_task, vm_image)
            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)
            return cluster
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def create_clusters_in_parallel(self, cluster_confs):
        for cluster_conf in cluster_confs:
            self.create_cluster(cluster_conf)
    def delete_cluster(self, cluster_id: str, keep_logs: bool = False):
        try:
            return self.__delete_pool_and_job(cluster_id, keep_logs)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_cluster(self, cluster_id: str):
        try:
            pool, nodes = self.__get_pool_details(cluster_id)
            return models.Cluster(pool, nodes)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def list_clusters(self):
        try:
            return [models.Cluster(pool) for pool in self.__list_clusters(aztk.models.Software.spark)]
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_remote_login_settings(self, cluster_id: str, node_id: str):
        try:
            return self.__get_remote_login_settings(cluster_id, node_id)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def submit(self, cluster_id: str, application: models.ApplicationConfiguration, remote: bool = False, wait: bool = False):
        try:
            cluster_submit_helper.submit_application(self, cluster_id, application, remote, wait)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def submit_all_applications(self, cluster_id: str, applications):
        for application in applications:
            self.submit(cluster_id, application)
    def wait_until_application_done(self, cluster_id: str, task_id: str):
        try:
            helpers.wait_for_task_to_complete(job_id=cluster_id, task_id=task_id, batch_client=self.batch_client)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def wait_until_applications_done(self, cluster_id: str):
        try:
            helpers.wait_for_tasks_to_complete(job_id=cluster_id, batch_client=self.batch_client)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def wait_until_cluster_is_ready(self, cluster_id: str):
        try:
            util.wait_for_master_to_be_ready(self, cluster_id)
            pool = self.batch_client.pool.get(cluster_id)
            nodes = self.batch_client.compute_node.list(pool_id=cluster_id)
            return models.Cluster(pool, nodes)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def wait_until_all_clusters_are_ready(self, clusters: List[str]):
        for cluster_id in clusters:
            self.wait_until_cluster_is_ready(cluster_id)
    def create_user(self, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
        try:
            cluster = self.get_cluster(cluster_id)
            master_node_id = cluster.master_node_id
            if not master_node_id:
                raise error.ClusterNotReadyError("The master has not yet been picked, a user cannot be added.")
            self.__create_user_on_pool(username, cluster.id, cluster.nodes, ssh_key, password)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_application_log(self, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
        try:
            return get_log_helper.get_log(self.batch_client, self.blob_client,
                                          cluster_id, application_name, tail, current_bytes)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_application_status(self, cluster_id: str, app_name: str):
        try:
            task = self.batch_client.task.get(cluster_id, app_name)
            return task.state._value_
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def cluster_run(self, cluster_id: str, command: str, host=False, internal: bool = False, timeout=None):
        try:
            return self.__cluster_run(cluster_id,
                                      command,
                                      internal,
                                      container_name='spark' if not host else None,
                                      timeout=timeout)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def node_run(self, cluster_id: str, node_id: str, command: str, host=False, internal: bool = False, timeout=None):
        try:
            return self.__node_run(cluster_id,
                                   node_id,
                                   command,
                                   internal,
                                   container_name='spark' if not host else None,
                                   timeout=timeout)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def cluster_copy(self, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False, timeout: int = None):
        try:
            container_name = None if host else 'spark'
            return self.__cluster_copy(cluster_id,
                                       source_path,
                                       destination_path=destination_path,
                                       container_name=container_name,
                                       get=False,
                                       internal=internal,
                                       timeout=timeout)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def cluster_download(self, cluster_id: str, source_path: str, destination_path: str = None, host: bool = False, internal: bool = False, timeout: int = None):
        try:
            container_name = None if host else 'spark'
            return self.__cluster_copy(cluster_id,
                                       source_path,
                                       destination_path=destination_path,
                                       container_name=container_name,
                                       get=True,
                                       internal=internal,
                                       timeout=timeout)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def cluster_ssh_into_master(self, cluster_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
        try:
            self.__ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, internal)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    '''
        job submission
    '''
    def submit_job(self, job_configuration: models.JobConfiguration):
        try:
            job_configuration = _apply_default_for_job_config(job_configuration)
            job_configuration.validate()
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data =  NodeData(job_configuration.to_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()
            start_task = create_cluster_helper.generate_cluster_start_task(self,
                                                                           zip_resource_files,
                                                                           job_configuration.id,
                                                                           job_configuration.gpu_enabled,
                                                                           job_configuration.get_docker_repo(),
                                                                           mixed_mode=job_configuration.mixed_mode(),
                                                                           worker_on_master=job_configuration.worker_on_master)
            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append(
                    (application, cluster_submit_helper.generate_task(self, job_configuration.id, application))
                )
            job_manager_task = job_submit_helper.generate_task(self, job_configuration, application_tasks)
            software_metadata_key = "spark"
            vm_image = models.VmImage(
                publisher='Canonical',
                offer='UbuntuServer',
                sku='16.04')
            autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                                "$TargetLowPriorityNodes = {1}".format(
                                    job_configuration.max_dedicated_nodes,
                                    job_configuration.max_low_pri_nodes)
            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(application.name for application in (job_configuration.applications or [])))
            return models.Job(job)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def list_jobs(self):
        try:
            return [models.Job(cloud_job_schedule) for cloud_job_schedule in job_submit_helper.list_jobs(self)]
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def list_applications(self, job_id):
        try:
            applications = job_submit_helper.list_applications(self, job_id)
            for item in applications:
                if applications[item]:
                    applications[item] = models.Application(applications[item])
            return applications
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_job(self, job_id):
        try:
            job, apps, pool, nodes = job_submit_helper.get_job(self, job_id)
            return models.Job(job, apps, pool, nodes)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def stop_job(self, job_id):
        try:
            return job_submit_helper.stop(self, job_id)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def delete_job(self, job_id: str, keep_logs: bool = False):
        try:
            return job_submit_helper.delete(self, job_id, keep_logs)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_application(self, job_id, application_name):
        try:
            return models.Application(job_submit_helper.get_application(self, job_id, application_name))
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def get_job_application_log(self, job_id, application_name):
        try:
            return job_submit_helper.get_application_log(self, job_id, application_name)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def stop_job_app(self, job_id, application_name):
        try:
            return job_submit_helper.stop_app(self, job_id, application_name)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def wait_until_job_finished(self, job_id):
        try:
            job_submit_helper.wait_until_job_finished(self, job_id)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    def wait_until_all_jobs_finished(self, jobs):
        for job in jobs:
            self.wait_until_job_finished(job)
    def run_cluster_diagnostics(self, cluster_id, output_directory=None):
        try:
            output = cluster_diagnostic_helper.run(self, cluster_id, output_directory)
            return output
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
 def _default_scheduling_target(vm_count: int):
    if vm_count == 0:
        return models.SchedulingTarget.Any
    else:
        return models.SchedulingTarget.Dedicated
 def _apply_default_for_cluster_config(configuration: models.ClusterConfiguration):
    cluster_conf = models.ClusterConfiguration()
    cluster_conf.merge(configuration)
    if cluster_conf.scheduling_target is None:
        cluster_conf.scheduling_target = _default_scheduling_target(cluster_conf.size)
    return cluster_conf
 def _apply_default_for_job_config(job_conf: models.JobConfiguration):
    if job_conf.scheduling_target is None:
        job_conf.scheduling_target = _default_scheduling_target(job_conf.max_dedicated_nodes)
    return job_conf
--- a/aztk/spark/client/init.py
+++ b/aztk/spark/client/init.py
@ -0,0 +1 @@
 from .client import Client
--- a/aztk/spark/client/base/init.py
+++ b/aztk/spark/client/base/init.py
@ -0,0 +1 @@
 from .operations import SparkBaseOperations
--- a/aztk/spark/client/base/helpers/init.py
+++ b/aztk/spark/client/base/helpers/init.py
--- a/aztk/spark/client/base/helpers/generate_application_task.py
+++ b/aztk/spark/client/base/helpers/generate_application_task.py
@ -0,0 +1,96 @@
 import os
 import azure.batch.models as batch_models
 import yaml
 from aztk.utils import helpers
 from aztk.utils.command_builder import CommandBuilder
 def generate_application_task(core_base_operations, container_id, application, remote=False):
    resource_files = []
    # The application provided is not hosted remotely and therefore must be uploaded
    if not remote:
        app_resource_file = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=application.application,
            blob_client=core_base_operations.blob_client,
            use_full_path=False)
        # Upload application file
        resource_files.append(app_resource_file)
        application.application = '$AZ_BATCH_TASK_WORKING_DIR/' + os.path.basename(application.application)
    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=jar,
            blob_client=core_base_operations.blob_client,
            use_full_path=False)
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)
    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=py_file,
            blob_client=core_base_operations.blob_client,
            use_full_path=False)
        py_files_resource_file_paths.append(current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)
    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=file,
            blob_client=core_base_operations.blob_client,
            use_full_path=False)
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)
    # Upload application definition
    application.jars = [os.path.basename(jar) for jar in application.jars]
    application.py_files = [os.path.basename(py_files) for py_files in application.py_files]
    application.files = [os.path.basename(files) for files in application.files]
    application_definition_file = helpers.upload_text_to_container(
        container_name=container_id,
        application_name=application.name,
        file_path='application.yaml',
        content=yaml.dump(vars(application)),
        blob_client=core_base_operations.blob_client)
    resource_files.append(application_definition_file)
    # create command to submit task
    task_cmd = CommandBuilder('sudo docker exec')
    task_cmd.add_argument('-i')
    task_cmd.add_option('-e', 'AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR')
    task_cmd.add_option('-e', 'STORAGE_LOGS_CONTAINER={0}'.format(container_id))
    task_cmd.add_argument('spark /bin/bash >> output.log 2>&1')
    task_cmd.add_argument('-c "source ~/.bashrc; ' \
                          'export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; ' \
                          'cd \$AZ_BATCH_TASK_WORKING_DIR; ' \
                          '\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"')
    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]),
        resource_files=resource_files,
        constraints=batch_models.TaskConstraints(max_task_retry_count=application.max_retry_count),
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
    return task
--- a/aztk/spark/client/base/helpers/generate_cluster_start_task.py
+++ b/aztk/spark/client/base/helpers/generate_cluster_start_task.py
@ -0,0 +1,148 @@
 from typing import List
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.spark.utils import util
 from aztk.utils import constants, helpers
 from aztk.spark import models
 POOL_ADMIN_USER_IDENTITY = batch_models.UserIdentity(
    auto_user=batch_models.AutoUserSpecification(
        scope=batch_models.AutoUserScope.pool, elevation_level=batch_models.ElevationLevel.admin))
 def _get_aztk_environment(cluster_id, worker_on_master, mixed_mode):
    envs = []
    envs.append(batch_models.EnvironmentSetting(name="AZTK_MIXED_MODE", value=helpers.bool_env(mixed_mode)))
    envs.append(batch_models.EnvironmentSetting(name="AZTK_WORKER_ON_MASTER", value=helpers.bool_env(worker_on_master)))
    envs.append(batch_models.EnvironmentSetting(name="AZTK_CLUSTER_ID", value=cluster_id))
    return envs
 def __get_docker_credentials(core_base_operations):
    creds = []
    docker = core_base_operations.secrets_configuration.docker
    if docker:
        if docker.endpoint:
            creds.append(batch_models.EnvironmentSetting(name="DOCKER_ENDPOINT", value=docker.endpoint))
        if docker.username:
            creds.append(batch_models.EnvironmentSetting(name="DOCKER_USERNAME", value=docker.username))
        if docker.password:
            creds.append(batch_models.EnvironmentSetting(name="DOCKER_PASSWORD", value=docker.password))
    return creds
 def __get_secrets_env(core_base_operations):
    shared_key = core_base_operations.secrets_configuration.shared_key
    service_principal = core_base_operations.secrets_configuration.service_principal
    if shared_key:
        return [
            batch_models.EnvironmentSetting(name="BATCH_SERVICE_URL", value=shared_key.batch_service_url),
            batch_models.EnvironmentSetting(name="BATCH_ACCOUNT_KEY", value=shared_key.batch_account_key),
            batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_NAME", value=shared_key.storage_account_name),
            batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_KEY", value=shared_key.storage_account_key),
            batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_SUFFIX", value=shared_key.storage_account_suffix),
        ]
    else:
        return [
            batch_models.EnvironmentSetting(name="SP_TENANT_ID", value=service_principal.tenant_id),
            batch_models.EnvironmentSetting(name="SP_CLIENT_ID", value=service_principal.client_id),
            batch_models.EnvironmentSetting(name="SP_CREDENTIAL", value=service_principal.credential),
            batch_models.EnvironmentSetting(
                name="SP_BATCH_RESOURCE_ID", value=service_principal.batch_account_resource_id),
            batch_models.EnvironmentSetting(
                name="SP_STORAGE_RESOURCE_ID", value=service_principal.storage_account_resource_id),
        ]
 def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile,
                          gpu_enabled: bool,
                          docker_repo: str = None,
                          plugins=None,
                          worker_on_master: bool = True,
                          file_mounts=None,
                          mixed_mode: bool = False):
    """
        For Docker on ubuntu 16.04 - return the command line
        to be run on the start task of the pool to setup spark.
    """
    default_docker_repo = constants.DEFAULT_DOCKER_REPO if not gpu_enabled else constants.DEFAULT_DOCKER_REPO_GPU
    docker_repo = docker_repo or default_docker_repo
    shares = []
    if file_mounts:
        for mount in file_mounts:
            # Create the directory on the node
            shares.append('mkdir -p {0}'.format(mount.mount_path))
            # Mount the file share
            shares.append(
                'mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp'.
                format(mount.storage_account_name, mount.storage_account_key, mount.file_share_path, mount.mount_path))
    setup = [
        'time('\
            'apt-get -y update;'\
            'apt-get -y --no-install-recommends install unzip;'\
            'unzip -o $AZ_BATCH_TASK_WORKING_DIR/{0};'\
            'chmod 777 $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh;'\
        ') 2>&1'.format(zip_resource_file.file_path),
        '/bin/bash $AZ_BATCH_TASK_WORKING_DIR/aztk/node_scripts/setup_host.sh {0} {1}'.format(
            constants.DOCKER_SPARK_CONTAINER_NAME,
            docker_repo,
        )
    ]
    commands = shares + setup
    return commands
 def generate_cluster_start_task(core_base_operations,
                                zip_resource_file: batch_models.ResourceFile,
                                cluster_id: str,
                                gpu_enabled: bool,
                                docker_repo: str = None,
                                file_shares: List[models.FileShare] = None,
                                plugins: List[models.PluginConfiguration] = None,
                                mixed_mode: bool = False,
                                worker_on_master: bool = True):
    """
        This will return the start task object for the pool to be created.
        :param cluster_id str: Id of the cluster(Used for uploading the resource files)
        :param zip_resource_file: Resource file object pointing to the zip file containing scripts to run on the node
    """
    resource_files = [zip_resource_file]
    spark_web_ui_port = constants.DOCKER_SPARK_WEB_UI_PORT
    spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT
    spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT
    spark_container_name = constants.DOCKER_SPARK_CONTAINER_NAME
    spark_submit_logs_file = constants.SPARK_SUBMIT_LOGS_FILE
    # TODO use certificate
    environment_settings = __get_secrets_env(core_base_operations) + [
        batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT", value=spark_web_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT", value=spark_job_ui_port),
        batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME", value=spark_container_name),
        batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file),
        batch_models.EnvironmentSetting(name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)),
    ] + __get_docker_credentials(core_base_operations) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode)
    # start task command
    command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, plugins, worker_on_master, file_shares,
                                    mixed_mode)
    return batch_models.StartTask(
        command_line=helpers.wrap_commands_in_shell(command),
        resource_files=resource_files,
        environment_settings=environment_settings,
        user_identity=POOL_ADMIN_USER_IDENTITY,
        wait_for_success=True)
--- a/aztk/spark/client/base/operations.py
+++ b/aztk/spark/client/base/operations.py
@ -0,0 +1,64 @@
 from typing import List
 import azure.batch.models as batch_models
 from aztk.client.base import BaseOperations as CoreBaseOperations
 from aztk.spark import models
 from .helpers import generate_cluster_start_task, generate_application_task
 class SparkBaseOperations:
    """Spark Base operations object that all other Spark operations objects inherit from
    """
    #TODO: make this private or otherwise not public
    def _generate_cluster_start_task(self,
                                     core_base_operations,
                                     zip_resource_file: batch_models.ResourceFile,
                                     id: str,
                                     gpu_enabled: bool,
                                     docker_repo: str = None,
                                     file_shares: List[models.FileShare] = None,
                                     plugins: List[models.PluginConfiguration] = None,
                                     mixed_mode: bool = False,
                                     worker_on_master: bool = True):
        """Generate the Azure Batch Start Task to provision a Spark cluster.
        Args:
            zip_resource_file (:obj:`azure.batch.models.ResourceFile`): a single zip file of all necessary data
                to upload to the cluster.
            id (:obj:`str`): the id of the cluster.
            gpu_enabled (:obj:`bool`): if True, the cluster is GPU enabled.
            docker_repo (:obj:`str`, optional): the docker repository and tag that identifies the docker image to use.
                If None, the default Docker image will be used. Defaults to None.
            file_shares (:obj:`aztk.spark.models.FileShare`, optional): a list of FileShares to mount on the cluster.
                Defaults to None.
            plugins (:obj:`aztk.spark.models.PluginConfiguration`, optional): a list of plugins to set up on the cluster.
                Defaults to None.
            mixed_mode (:obj:`bool`, optional): If True, the cluster is configured to use both dedicated and low priority VMs.
                Defaults to False.
            worker_on_master (:obj:`bool`, optional): If True, the cluster is configured to provision a Spark worker
                on the VM that runs the Spark master. Defaults to True.
        Returns:
            :obj:`azure.batch.models.StartTask`: the StartTask definition to provision the cluster.
        """
        return generate_cluster_start_task.generate_cluster_start_task(
            core_base_operations, zip_resource_file, id, gpu_enabled, docker_repo, file_shares, plugins, mixed_mode, worker_on_master)
    #TODO: make this private or otherwise not public
    def _generate_application_task(self, core_base_operations, container_id, application, remote=False):
        """Generate the Azure Batch Start Task to provision a Spark cluster.
        Args:
            container_id (:obj:`str`): the id of the container to run the application in
            application (:obj:`aztk.spark.models.ApplicationConfiguration): the Application Definition
            remote (:obj:`bool`): If True, the application file will not be uploaded, it is assumed to be reachable
                by the cluster already. This is useful when your application is stored in a mounted Azure File Share
                and not the client. Defaults to False.
        Returns:
            :obj:`azure.batch.models.TaskAddParameter`: the Task definition for the Application.
        """
        return generate_application_task.generate_application_task(core_base_operations, container_id, application, remote)
--- a/aztk/spark/client/client.py
+++ b/aztk/spark/client/client.py
@ -0,0 +1,233 @@
 from typing import List
 import azure.batch.models.batch_error as batch_error
 import aztk
 from aztk import error
 from aztk import models as base_models
 from aztk.client import CoreClient
 from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.spark.client.cluster import ClusterOperations
 from aztk.spark.client.job import JobOperations
 from aztk.spark.helpers import cluster_diagnostic_helper
 from aztk.spark.helpers import create_cluster as create_cluster_helper
 from aztk.spark.helpers import get_log as get_log_helper
 from aztk.spark.helpers import job_submission as job_submit_helper
 from aztk.spark.helpers import submit as cluster_submit_helper
 from aztk.spark.utils import util
 from aztk.utils import azure_api, deprecated, deprecate, helpers
 class Client(CoreClient):
    """The client used to create and manage Spark clusters
        Attributes:
            cluster (:obj:`aztk.spark.client.cluster.ClusterOperations`): Cluster
            job (:obj:`aztk.spark.client.job.JobOperations`): Job
    """
    def __init__(self, secrets_configuration: models.SecretsConfiguration = None, **kwargs):
        self.secrets_configuration = None
        context = None
        if kwargs.get("secrets_config"):
            deprecate(version="0.10.0", message="secrets_config key is deprecated in secrets.yaml",
                      advice="Please use secrets_configuration key instead.")
            context = self._get_context(kwargs.get("secrets_config"))
        else:
            context = self._get_context(secrets_configuration)
        self.cluster = ClusterOperations(context)
        self.job = JobOperations(context)
    # ALL THE FOLLOWING METHODS ARE DEPRECATED AND WILL BE REMOVED IN 0.10.0
    @deprecated("0.10.0")
    def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool = False):
        return self.cluster.create(cluster_configuration=cluster_conf, wait=wait)
    @deprecated("0.10.0")
    def create_clusters_in_parallel(self, cluster_confs):    # NOT IMPLEMENTED
        for cluster_conf in cluster_confs:
            self.cluster.create(cluster_conf)
    @deprecated("0.10.0")
    def delete_cluster(self, cluster_id: str, keep_logs: bool = False):
        return self.cluster.delete(id=cluster_id, keep_logs=keep_logs)
    @deprecated("0.10.0")
    def get_cluster(self, cluster_id: str):
        return self.cluster.get(id=cluster_id)
    @deprecated("0.10.0")
    def list_clusters(self):
        return self.cluster.list()
    @deprecated("0.10.0")
    def get_remote_login_settings(self, cluster_id: str, node_id: str):
        return self.cluster.get_remote_login_settings(cluster_id, node_id)
    @deprecated("0.10.0")
    def submit(self,
               cluster_id: str,
               application: models.ApplicationConfiguration,
               remote: bool = False,
               wait: bool = False):
        return self.cluster.submit(id=cluster_id, application=application, remote=remote, wait=wait)
    @deprecated("0.10.0")
    def submit_all_applications(self, cluster_id: str, applications):    # NOT IMPLEMENTED
        for application in applications:
            self.cluster.submit(cluster_id, application)
    @deprecated("0.10.0")
    def wait_until_application_done(self, cluster_id: str, task_id: str):    # NOT IMPLEMENTED
        try:
            helpers.wait_for_task_to_complete(job_id=cluster_id, task_id=task_id, batch_client=self.batch_client)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    @deprecated("0.10.0")
    def wait_until_applications_done(self, cluster_id: str):    # NOT IMPLEMENTED
        try:
            helpers.wait_for_tasks_to_complete(job_id=cluster_id, batch_client=self.batch_client)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    @deprecated("0.10.0")
    def wait_until_cluster_is_ready(self, cluster_id: str):    # NOT IMPLEMENTED
        try:
            util.wait_for_master_to_be_ready(self.cluster._core_cluster_operations, self.cluster, cluster_id)
            pool = self.batch_client.pool.get(cluster_id)
            nodes = self.batch_client.compute_node.list(pool_id=cluster_id)
            return models.Cluster(base_models.Cluster(pool, nodes))
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    @deprecated("0.10.0")
    def wait_until_all_clusters_are_ready(self, clusters: List[str]):    # NOT IMPLEMENTED
        for cluster_id in clusters:
            self.wait_until_cluster_is_ready(cluster_id)
    @deprecated("0.10.0")
    def create_user(self, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
        return self.cluster.create_user(id=cluster_id, username=username, password=password, ssh_key=ssh_key)
    @deprecated("0.10.0")
    def get_application_log(self, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
        return self.cluster.get_application_log(
            id=cluster_id, application_name=application_name, tail=tail, current_bytes=current_bytes)
    @deprecated("0.10.0")
    def get_application_status(self, cluster_id: str, app_name: str):
        return self.cluster.get_application_status(id=cluster_id, application_name=app_name)
    @deprecated("0.10.0")
    def cluster_run(self, cluster_id: str, command: str, host=False, internal: bool = False, timeout=None):
        return self.cluster.run(id=cluster_id, command=command, host=host, internal=internal)
    @deprecated("0.10.0")
    def node_run(self, cluster_id: str, node_id: str, command: str, host=False, internal: bool = False, timeout=None):
        return self.cluster.node_run(
            id=cluster_id, node_id=node_id, command=command, host=host, internal=internal, timeout=timeout)
    @deprecated("0.10.0")
    def cluster_copy(self,
                     cluster_id: str,
                     source_path: str,
                     destination_path: str,
                     host: bool = False,
                     internal: bool = False,
                     timeout: int = None):
        return self.cluster.copy(
            id=cluster_id,
            source_path=source_path,
            destination_path=destination_path,
            host=host,
            internal=internal,
            timeout=timeout)
    @deprecated("0.10.0")
    def cluster_download(self,
                         cluster_id: str,
                         source_path: str,
                         destination_path: str = None,
                         host: bool = False,
                         internal: bool = False,
                         timeout: int = None):
        return self.cluster.download(
            id=cluster_id,
            source_path=source_path,
            destination_path=destination_path,
            host=host,
            internal=internal,
            timeout=timeout)
    @deprecated("0.10.0")
    def cluster_ssh_into_master(self,
                                cluster_id,
                                node_id,
                                username,
                                ssh_key=None,
                                password=None,
                                port_forward_list=None,
                                internal=False):
        return self.cluster._core_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, internal)
    '''
        job submission
    '''
    @deprecated("0.10.0")
    def submit_job(self, job_configuration: models.JobConfiguration):
        return self.job.submit(job_configuration)
    @deprecated("0.10.0")
    def list_jobs(self):
        return self.job.list()
    @deprecated("0.10.0")
    def list_applications(self, job_id):
        return self.job.list_applications(job_id)
    @deprecated("0.10.0")
    def get_job(self, job_id):
        return self.job.get(job_id)
    @deprecated("0.10.0")
    def stop_job(self, job_id):
        return self.job.stop(job_id)
    @deprecated("0.10.0")
    def delete_job(self, job_id: str, keep_logs: bool = False):
        return self.job.delete(job_id, keep_logs)
    @deprecated("0.10.0")
    def get_application(self, job_id, application_name):
        return self.job.get_application(job_id, application_name)
    @deprecated("0.10.0")
    def get_job_application_log(self, job_id, application_name):
        return self.job.get_application_log(job_id, application_name)
    @deprecated("0.10.0")
    def stop_job_app(self, job_id, application_name):    # NOT IMPLEMENTED
        try:
            return job_submit_helper.stop_app(self, job_id, application_name)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    @deprecated("0.10.0")
    def wait_until_job_finished(self, job_id):
        try:
            self.job.wait(job_id)
        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
    @deprecated("0.10.0")
    def wait_until_all_jobs_finished(self, jobs):    # NOT IMPLEMENTED
        for job in jobs:
            self.wait_until_job_finished(job)
    @deprecated("0.10.0")
    def run_cluster_diagnostics(self, cluster_id, output_directory=None):
        return self.cluster.diagnostics(cluster_id, output_directory)
--- a/aztk/spark/client/cluster/init.py
+++ b/aztk/spark/client/cluster/init.py
@ -0,0 +1 @@
 from .operations import ClusterOperations
--- a/aztk/spark/client/cluster/helpers/init.py
+++ b/aztk/spark/client/cluster/helpers/init.py
--- a/aztk/spark/client/cluster/helpers/copy.py
+++ b/aztk/spark/client/cluster/helpers/copy.py
@ -0,0 +1,19 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def cluster_copy(core_cluster_operations, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False, timeout: int = None):
    try:
        container_name = None if host else 'spark'
        return core_cluster_operations.copy(
            cluster_id,
            source_path,
            destination_path=destination_path,
            container_name=container_name,
            get=False,
            internal=internal,
            timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/create.py
+++ b/aztk/spark/client/cluster/helpers/create.py
@ -0,0 +1,67 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk import models as base_models
 from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.spark.utils import constants, util
 from aztk.utils import helpers
 POOL_ADMIN_USER_IDENTITY = batch_models.UserIdentity(
    auto_user=batch_models.AutoUserSpecification(
        scope=batch_models.AutoUserScope.pool, elevation_level=batch_models.ElevationLevel.admin))
 def _default_scheduling_target(vm_count: int):
    if vm_count == 0:
        return models.SchedulingTarget.Any
    else:
        return models.SchedulingTarget.Dedicated
 def _apply_default_for_cluster_config(configuration: models.ClusterConfiguration):
    cluster_conf = models.ClusterConfiguration()
    cluster_conf.merge(configuration)
    if cluster_conf.scheduling_target is None:
        cluster_conf.scheduling_target = _default_scheduling_target(cluster_conf.size)
    return cluster_conf
 def create_cluster(core_cluster_operations, spark_cluster_operations, cluster_conf: models.ClusterConfiguration, wait: bool = False):
    """
    Create a new aztk spark cluster
    Args:
        cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
        wait(bool): If you should wait for the cluster to be ready before returning
    Returns:
        :obj:`aztk.spark.models.Cluster`
    """
    cluster_conf = _apply_default_for_cluster_config(cluster_conf)
    cluster_conf.validate()
    cluster_data = core_cluster_operations.get_cluster_data(cluster_conf.cluster_id)
    try:
        zip_resource_files = None
        node_data = NodeData(cluster_conf).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()
        start_task = spark_cluster_operations._generate_cluster_start_task(core_cluster_operations, zip_resource_files, cluster_conf.cluster_id,
                                                 cluster_conf.gpu_enabled(), cluster_conf.get_docker_repo(),
                                                 cluster_conf.file_shares, cluster_conf.plugins,
                                                 cluster_conf.mixed_mode(), cluster_conf.worker_on_master)
        software_metadata_key = base_models.Software.spark
        cluster = core_cluster_operations.create(cluster_conf, software_metadata_key, start_task, constants.SPARK_VM_IMAGE)
        # Wait for the master to be ready
        if wait:
            util.wait_for_master_to_be_ready(core_cluster_operations, spark_cluster_operations, cluster.id)
            cluster = spark_cluster_operations.get(cluster.id)
        return cluster
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/create_user.py
+++ b/aztk/spark/client/cluster/helpers/create_user.py
@ -0,0 +1,15 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def create_user(core_cluster_operations, spark_cluster_operations, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
    try:
        cluster = spark_cluster_operations.get(cluster_id)
        master_node_id = cluster.master_node_id
        if not master_node_id:
            raise error.ClusterNotReadyError("The master has not yet been picked, a user cannot be added.")
        core_cluster_operations.create_user_on_cluster(cluster.id, cluster.nodes, username, ssh_key, password)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/delete.py
+++ b/aztk/spark/client/cluster/helpers/delete.py
@ -0,0 +1,11 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def delete_cluster(core_cluster_operations, cluster_id: str, keep_logs: bool = False):
    try:
        return core_cluster_operations.delete(cluster_id, keep_logs)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/diagnostics.py
+++ b/aztk/spark/client/cluster/helpers/diagnostics.py
@ -0,0 +1,44 @@
 import os
 from azure.batch.models import batch_error
 from aztk import error
 from aztk.utils import helpers
 def _run(spark_cluster_operations, cluster_id, output_directory=None):
    # copy debug program to each node
    output = spark_cluster_operations.copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True)
    ssh_cmd = _build_diagnostic_ssh_command()
    run_output = spark_cluster_operations.run(cluster_id, ssh_cmd, host=True)
    remote_path = "/tmp/debug.zip"
    if output_directory:
        local_path = os.path.join(os.path.abspath(output_directory), "debug.zip")
        output = spark_cluster_operations.download(cluster_id, remote_path, local_path, host=True)
        # write run output to debug/ directory
        with open(os.path.join(os.path.dirname(local_path), "debug-output.txt"), 'w', encoding="UTF-8") as f:
            [f.write(line + '\n') for node_output in run_output for line in node_output.output]
    else:
        output = spark_cluster_operations.download(cluster_id, remote_path, host=True)
    return output
 def _build_diagnostic_ssh_command():
    return "sudo rm -rf /tmp/debug.zip; "\
           "sudo apt-get install -y python3-pip; "\
           "sudo -H pip3 install --upgrade pip; "\
           "sudo -H pip3 install docker; "\
           "sudo python3 /tmp/debug.py"
 def run_cluster_diagnostics(spark_cluster_operations, cluster_id, output_directory=None):
    try:
        output = _run(spark_cluster_operations, cluster_id, output_directory)
        return output
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/download.py
+++ b/aztk/spark/client/cluster/helpers/download.py
@ -0,0 +1,19 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def cluster_download(core_cluster_operations, cluster_id: str, source_path: str, destination_path: str = None, host: bool = False, internal: bool = False, timeout: int = None):
    try:
        container_name = None if host else 'spark'
        return core_cluster_operations.copy(cluster_id,
                                    source_path,
                                    destination_path=destination_path,
                                    container_name=container_name,
                                    get=True,
                                    internal=internal,
                                    timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/get.py
+++ b/aztk/spark/client/cluster/helpers/get.py
@ -0,0 +1,13 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 def get_cluster(core_cluster_operations, cluster_id: str):
    try:
        cluster = core_cluster_operations.get(cluster_id)
        return models.Cluster(cluster)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/get_application_log.py
+++ b/aztk/spark/client/cluster/helpers/get_application_log.py
@ -0,0 +1,7 @@
 from aztk.spark import models
 def get_application_log(core_base_operations, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
    base_application_log = core_base_operations.get_application_log(
        cluster_id, application_name, tail, current_bytes)
    return models.ApplicationLog(base_application_log)
--- a/aztk/spark/client/cluster/helpers/get_application_status.py
+++ b/aztk/spark/client/cluster/helpers/get_application_status.py
@ -0,0 +1,12 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def get_application_status(core_cluster_operations, cluster_id: str, app_name: str):
    try:
        task = core_cluster_operations.batch_client.task.get(cluster_id, app_name)
        return task.state._value_
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/get_remote_login_settings.py
+++ b/aztk/spark/client/cluster/helpers/get_remote_login_settings.py
@ -0,0 +1,12 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 def get_remote_login_settings(core_cluster_operations, id: str, node_id: str):
    try:
        return models.RemoteLogin(core_cluster_operations.get_remote_login_settings(id, node_id))
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/list.py
+++ b/aztk/spark/client/cluster/helpers/list.py
@ -0,0 +1,14 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk import models as base_models
 from aztk.spark import models
 from aztk.utils import helpers
 def list_clusters(core_cluster_operations):
    try:
        software_metadata_key = base_models.Software.spark
        return [models.Cluster(cluster) for cluster in core_cluster_operations.list(software_metadata_key)]
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/node_run.py
+++ b/aztk/spark/client/cluster/helpers/node_run.py
@ -0,0 +1,18 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def node_run(core_cluster_operations,
             cluster_id: str,
             node_id: str,
             command: str,
             host=False,
             internal: bool = False,
             timeout=None):
    try:
        return core_cluster_operations.node_run(
            cluster_id, node_id, command, internal, container_name='spark' if not host else None, timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/run.py
+++ b/aztk/spark/client/cluster/helpers/run.py
@ -0,0 +1,12 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def cluster_run(core_cluster_operations, cluster_id: str, command: str, host=False, internal: bool = False, timeout=None):
    try:
        return core_cluster_operations.run( 
            cluster_id, command, internal, container_name='spark' if not host else None, timeout=timeout)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/ssh_into_master.py
+++ b/aztk/spark/client/cluster/helpers/ssh_into_master.py
@ -0,0 +1,12 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def cluster_ssh_into_master(spark_cluster_operations, cluster_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False):
    try:
        spark_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, internal)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/submit.py
+++ b/aztk/spark/client/cluster/helpers/submit.py
@ -0,0 +1,47 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.error import AztkError
 from aztk.spark import models
 from aztk.utils import helpers
 def __get_node(core_cluster_operations, node_id: str, cluster_id: str) -> batch_models.ComputeNode:
    return core_cluster_operations.batch_client.compute_node.get(cluster_id, node_id)
 def affinitize_task_to_master(core_cluster_operations, spark_cluster_operations, cluster_id, task):
    cluster = spark_cluster_operations.get(cluster_id)
    if cluster.master_node_id is None:
        raise AztkError("Master has not yet been selected. Please wait until the cluster is finished provisioning.")
    master_node = core_cluster_operations.batch_client.compute_node.get(pool_id=cluster_id, node_id=cluster.master_node_id)
    task.affinity_info = batch_models.AffinityInformation(affinity_id=master_node.affinity_id)
    return task
 def submit_application(core_cluster_operations, spark_cluster_operations, cluster_id, application, remote: bool = False, wait: bool = False):
    """
    Submit a spark app
    """
    task = spark_cluster_operations._generate_application_task(core_cluster_operations, cluster_id, application, remote)
    task = affinitize_task_to_master(core_cluster_operations, spark_cluster_operations, cluster_id, task)
    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    core_cluster_operations.batch_client.task.add(job_id=job_id, task=task)
    if wait:
        helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=core_cluster_operations.batch_client)
 def submit(core_cluster_operations,
           spark_cluster_operations,
           cluster_id: str,
           application: models.ApplicationConfiguration,
           remote: bool = False,
           wait: bool = False):
    try:
        submit_application(core_cluster_operations, spark_cluster_operations, cluster_id, application, remote, wait)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/helpers/wait.py
+++ b/aztk/spark/client/cluster/helpers/wait.py
@ -0,0 +1,10 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def wait_for_application_to_complete(core_cluster_operations, id, application_name):
    try:
        return core_cluster_operations.wait(id, application_name)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/cluster/operations.py
+++ b/aztk/spark/client/cluster/operations.py
@ -0,0 +1,248 @@
 from aztk.client.cluster import CoreClusterOperations
 from aztk.spark import models
 from aztk.spark.client.base import SparkBaseOperations
 from .helpers import (copy, create, create_user, delete, diagnostics, download, get, get_application_log,
                      get_application_status, get_remote_login_settings, list, node_run, run, submit, wait)
 class ClusterOperations(SparkBaseOperations):
    """Spark ClusterOperations object
    Attributes:
        _core_cluster_operations (:obj:`aztk.client.cluster.CoreClusterOperations`):
        # _spark_base_cluster_operations (:obj:`aztk.spark.client.cluster.CoreClusterOperations`):
    """
    def __init__(self, context):
        self._core_cluster_operations = CoreClusterOperations(context)
        # self._spark_base_cluster_operations = SparkBaseOperations()
    def create(self, cluster_configuration: models.ClusterConfiguration, wait: bool = False):
        """Create a cluster.
        Args:
            cluster_configuration (:obj:`ClusterConfiguration`): Configuration for the cluster to be created.
            wait (:obj:`bool`): if True, this function will block until the cluster creation is finished.
        Returns:
            :obj:`aztk.spark.models.Cluster`: An Cluster object representing the state and configuration of the cluster.
        """
        return create.create_cluster(self._core_cluster_operations, self, cluster_configuration, wait)
    def delete(self, id: str, keep_logs: bool = False):
        """Delete a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to delete.
            keep_logs (:obj:`bool`): If True, the logs related to this cluster in Azure Storage are not deleted.
                Defaults to False.
        Returns:
            :obj:`bool`: True if the deletion process was successful.
        """
        return delete.delete_cluster(self._core_cluster_operations, id, keep_logs)
    def get(self, id: str):
        """Get details about the state of a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to get.
        Returns:
            :obj:`aztk.spark.models.Cluster`: A Cluster object representing the state and configuration of the cluster.
        """
        return get.get_cluster(self._core_cluster_operations, id)
    def list(self):
        """List all clusters.
        Returns:
            :obj:`List[aztk.spark.models.Cluster]`: List of Cluster objects each representing the state and configuration of the cluster.
        """
        return list.list_clusters(self._core_cluster_operations)
    def submit(self, id: str, application: models.ApplicationConfiguration, remote: bool = False, wait: bool = False):
        """Submit an application to a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to submit the application to.
            application (:obj:`aztk.spark.models.ApplicationConfiguration`): Application definition
            remote (:obj:`bool`): If True, the application file will not be uploaded, it is assumed to be reachable
                by the cluster already. This is useful when your application is stored in a mounted Azure File Share
                and not the client. Defaults to False.
            wait (:obj:`bool`, optional): If True, this function blocks until the application has completed. Defaults to False.
        Returns:
            :obj:`None`
        """
        return submit.submit(self._core_cluster_operations, self, id, application, remote, wait)
    def create_user(self, id: str, username: str, password: str = None, ssh_key: str = None):
        """Create a user on every node in the cluster
        Args:
            username (:obj:`str`): name of the user to create.
            pool_id (:obj:`str`): id of the cluster to create the user on.
            ssh_key (:obj:`str`, optional): ssh public key to create the user with, must use ssh_key or password. Defaults to None.
            password (:obj:`str`, optional): password for the user, must use ssh_key or password. Defaults to None.
        Returns:
            :obj:`None`
        """
        return create_user.create_user(self._core_cluster_operations, self, id, username, ssh_key, password)
    def get_application_status(self, id: str, application_name: str):
        """Get the status of a submitted application
        Args:
            id (:obj:`str`): the name of the cluster the application was submitted to
            application_name (:obj:`str`): the name of the application to get
        Returns:
            :obj:`str`: the status state of the application
        """
        return get_application_status.get_application_status(self._core_cluster_operations, id, application_name)
    def run(self, id: str, command: str, host=False, internal: bool = False, timeout=None):
        """Run a bash command on every node in the cluster
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            command (:obj:`str`): the bash command to execute on the node.
            internal (:obj:`bool`): if true, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            container_name=None (:obj:`str`, optional): the name of the container to run the command in.
                If None, the command will run on the host VM. Defaults to None.
            timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`List[aztk.spark.models.NodeOutput]`: list of NodeOutput objects containing the output of the run command
        """
        return run.cluster_run(self._core_cluster_operations, id, command, host, internal, timeout)
    def node_run(self, id: str, node_id: str, command: str, host=False, internal: bool = False, timeout=None):
        """Run a bash command on the given node
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            node_id (:obj:`str`): the id of the node in the cluster to run the command on.
            command (:obj:`str`): the bash command to execute on the node.
            internal (:obj:`bool`): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            container_name=None (:obj:`str`, optional): the name of the container to run the command in.
                If None, the command will run on the host VM. Defaults to None.
            timeout=None (:obj:`str`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`aztk.spark.models.NodeOutput`: object containing the output of the run command
        """
        return node_run.node_run(self._core_cluster_operations, id, node_id, command, host, internal, timeout)
    def copy(self,
             id: str,
             source_path: str,
             destination_path: str,
             host: bool = False,
             internal: bool = False,
             timeout: int = None):
        """Copy a file to every node in a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to copy files with.
            source_path (:obj:`str`): the local path of the file to copy.
            destination_path (:obj:`str`, optional): the path on each node the file is copied to.
            container_name (:obj:`str`, optional): the name of the container to copy to or from.
                If None, the copy operation will occur on the host VM, Defaults to None.
            internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            timeout (:obj:`int`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
        """
        return copy.cluster_copy(self._core_cluster_operations, id, source_path, destination_path, host, internal, timeout)
    def download(self,
                 id: str,
                 source_path: str,
                 destination_path: str = None,
                 host: bool = False,
                 internal: bool = False,
                 timeout: int = None):
        """Download a file from every node in a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to copy files with.
            source_path (:obj:`str`): the path of the file to copy from.
            destination_path (:obj:`str`, optional): the local directory path where the output should be written.
                If None, a SpooledTemporaryFile will be returned in the NodeOutput object, else the file will be
                written to this path. Defaults to None.
            container_name (:obj:`str`, optional): the name of the container to copy to or from.
                If None, the copy operation will occur on the host VM, Defaults to None.
            internal (:obj:`bool`, optional): if True, this will connect to the node using its internal IP.
                Only use this if running within the same VNET as the cluster. Defaults to False.
            timeout (:obj:`int`, optional): The timeout in seconds for establishing a connection to the node.
                Defaults to None.
        Returns:
            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
        """
        return download.cluster_download(self._core_cluster_operations, id, source_path, destination_path, host, internal,
                                         timeout)
    def diagnostics(self, id, output_directory=None):
        """Download a file from every node in a cluster.
        Args:
            id (:obj:`str`): the id of the cluster to copy files with.
            output_directory (:obj:`str`, optional): the local directory path where the output should be written.
                If None, a SpooledTemporaryFile will be returned in the NodeOutput object, else the file will be
                written to this path. Defaults to None.
        Returns:
            :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command.
        """
        return diagnostics.run_cluster_diagnostics(self, id, output_directory)
    def get_application_log(self, id: str, application_name: str, tail=False, current_bytes: int = 0):
        """Get the log for a running or completed application
        Args:
            id (:obj:`str`): the id of the cluster to run the command on.
            application_name (:obj:`str`): str
            tail (:obj:`bool`, optional): If True, get the remaining bytes after current_bytes. Otherwise, the whole log will be retrieved.
                Only use this if streaming the log as it is being written. Defaults to False.
            current_bytes (:obj:`int`): Specifies the last seen byte, so only the bytes after current_bytes are retrieved.
                Only useful is streaming the log as it is being written. Only used if tail is True.
        Returns:
            :obj:`aztk.spark.models.ApplicationLog`: a model representing the output of the application.
        """
        return get_application_log.get_application_log(self._core_cluster_operations, id, application_name, tail, current_bytes)
    def get_remote_login_settings(self, id: str, node_id: str):
        """Get the remote login information for a node in a cluster
        Args:
            id (:obj:`str`): the id of the cluster the node is in
            node_id (:obj:`str`): the id of the node in the cluster
        Returns:
            :obj:`aztk.spark.models.RemoteLogin`: Object that contains the ip address and port combination to login to a node
        """
        return get_remote_login_settings.get_remote_login_settings(self._core_cluster_operations, id, node_id)
    def wait(self, id: str, application_name: str):
        """Wait until the application has completed
        Args:
            id (:obj:`str`): the id of the cluster the application was submitted to
            application_name (:obj:`str`): the name of the application to wait for
        Returns:
            :obj:`None`
        """
        return wait.wait_for_application_to_complete(self._core_cluster_operations, id, application_name)
--- a/aztk/spark/client/job/init.py
+++ b/aztk/spark/client/job/init.py
@ -0,0 +1 @@
 from .operations import JobOperations
--- a/aztk/spark/client/job/helpers/init.py
+++ b/aztk/spark/client/job/helpers/init.py
--- a/aztk/spark/client/job/helpers/delete.py
+++ b/aztk/spark/client/job/helpers/delete.py
@ -0,0 +1,39 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def _delete(core_job_operations, spark_job_operations, job_id, keep_logs: bool = False):
    recent_run_job = get_recent_job(core_job_operations, job_id)
    deleted_job_or_job_schedule = False
    # delete job
    try:
        core_job_operations.batch_client.job.delete(recent_run_job.id)
        deleted_job_or_job_schedule = True
    except batch_models.batch_error.BatchErrorException:
        pass
    # delete job_schedule
    try:
        core_job_operations.batch_client.job_schedule.delete(job_id)
        deleted_job_or_job_schedule = True
    except batch_models.batch_error.BatchErrorException:
        pass
    # delete storage container
    if keep_logs:
        cluster_data = core_job_operations.get_cluster_data(job_id)
        cluster_data.delete_container(job_id)
    return deleted_job_or_job_schedule
 def delete(core_job_operations, spark_job_operations, job_id: str, keep_logs: bool = False):
    try:
        return _delete(core_job_operations, spark_job_operations, job_id, keep_logs)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/get.py
+++ b/aztk/spark/client/job/helpers/get.py
@ -0,0 +1,32 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def _get_job(core_job_operations, job_id):
    job = core_job_operations.batch_client.job_schedule.get(job_id)
    job_apps = [
        app for app in core_job_operations.batch_client.task.list(job_id=job.execution_info.recent_job.id) if app.id != job_id
    ]
    recent_run_job = get_recent_job(core_job_operations, job_id)
    pool_prefix = recent_run_job.pool_info.auto_pool_specification.auto_pool_id_prefix
    pool = nodes = None
    for cloud_pool in core_job_operations.batch_client.pool.list():
        if pool_prefix in cloud_pool.id:
            pool = cloud_pool
            break
    if pool:
        nodes = core_job_operations.batch_client.compute_node.list(pool_id=pool.id)
    return job, job_apps, pool, nodes
 def get_job(core_job_operations, job_id):
    try:
        job, apps, pool, nodes = _get_job(core_job_operations, job_id)
        return models.Job(job, apps, pool, nodes)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/get_application.py
+++ b/aztk/spark/client/job/helpers/get_application.py
@ -0,0 +1,25 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def _get_application(spark_job_operations, job_id, application_name):
    # info about the app
    recent_run_job = get_recent_job(spark_job_operations._core_job_operations, job_id)
    try:
        return spark_job_operations._core_job_operations.batch_client.task.get(job_id=recent_run_job.id, task_id=application_name)
    except batch_models.batch_error.BatchErrorException:
        raise error.AztkError(
            "The Spark application {0} is still being provisioned or does not exist.".format(application_name))
 def get_application(spark_job_operations, job_id, application_name):
    try:
        return models.Application(_get_application(spark_job_operations, job_id, application_name))
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/get_application_log.py
+++ b/aztk/spark/client/job/helpers/get_application_log.py
@ -0,0 +1,40 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .list_applications import list_applications
 from .get_recent_job import get_recent_job
 def _get_application_log(core_job_operations, spark_job_operations, job_id, application_name):
    # TODO: change where the logs are uploaded so they aren't overwritten on scheduled runs
    #           current: job_id, application_name/output.log
    #           new: job_id, recent_run_job.id/application_name/output.log
    recent_run_job = get_recent_job(core_job_operations, job_id)
    try:
        task = core_job_operations.batch_client.task.get(job_id=recent_run_job.id, task_id=application_name)
    except batch_models.batch_error.BatchErrorException as e:
        # see if the application is written to metadata of pool
        applications = spark_job_operations.list_applications(job_id)
        for application in applications:
            if applications[application] is None and application == application_name:
                raise error.AztkError("The application {0} has not yet been created.".format(application))
        raise error.AztkError("The application {0} does not exist".format(application_name))
    else:
        if task.state in (batch_models.TaskState.active, batch_models.TaskState.running,
                          batch_models.TaskState.preparing):
            raise error.AztkError("The application {0} has not yet finished executing.".format(application_name))
        return core_job_operations.get_application_log(job_id, application_name)
 def get_job_application_log(core_job_operations, spark_job_operations, job_id, application_name):
    try:
        return models.ApplicationLog(
            _get_application_log(core_job_operations, spark_job_operations, job_id, application_name))
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/get_recent_job.py
+++ b/aztk/spark/client/job/helpers/get_recent_job.py
@ -0,0 +1,3 @@
 def get_recent_job(core_job_operations, job_id):
    job_schedule = core_job_operations.batch_client.job_schedule.get(job_id)
    return core_job_operations.batch_client.job.get(job_schedule.execution_info.recent_job.id)
--- a/aztk/spark/client/job/helpers/list.py
+++ b/aztk/spark/client/job/helpers/list.py
@ -0,0 +1,16 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 def _list_jobs(core_job_operations):
    return [cloud_job_schedule for cloud_job_schedule in core_job_operations.batch_client.job_schedule.list()]
 def list_jobs(core_job_operations):
    try:
        return [models.Job(cloud_job_schedule) for cloud_job_schedule in _list_jobs(core_job_operations)]
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/list_applications.py
+++ b/aztk/spark/client/job/helpers/list_applications.py
@ -0,0 +1,35 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def _list_applications(core_job_operations, job_id):
    recent_run_job = get_recent_job(core_job_operations, job_id)
    # get application names from Batch job metadata
    applications = {}
    for metadata_item in recent_run_job.metadata:
        if metadata_item.name == "applications":
            for app_name in metadata_item.value.split('\n'):
                applications[app_name] = None
    # get tasks from Batch job
    for task in core_job_operations.batch_client.task.list(recent_run_job.id):
        if task.id != job_id:
            applications[task.id] = task
    return applications
 def list_applications(core_job_operations, job_id):
    try:
        applications = _list_applications(core_job_operations, job_id)
        for item in applications:
            if applications[item]:
                applications[item] = models.Application(applications[item])
        return applications
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/stop.py
+++ b/aztk/spark/client/job/helpers/stop.py
@ -0,0 +1,22 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def _stop(core_job_operations, job_id):
    # terminate currently running job and tasks
    recent_run_job = get_recent_job(core_job_operations, job_id)
    core_job_operations.batch_client.job.terminate(recent_run_job.id)
    # terminate job_schedule
    core_job_operations.batch_client.job_schedule.terminate(job_id)
 def stop(self, job_id):
    try:
        return _stop(self, job_id)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/stop_application.py
+++ b/aztk/spark/client/job/helpers/stop_application.py
@ -0,0 +1,16 @@
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.spark import models
 from aztk.utils import helpers
 from .get_recent_job import get_recent_job
 def stop_app(core_job_operations, job_id, application_name):
    recent_run_job = get_recent_job(core_job_operations, job_id)
    # stop batch task
    try:
        core_job_operations.batch_client.task.terminate(job_id=recent_run_job.id, task_id=application_name)
        return True
    except batch_error.BatchErrorException:
        return False
--- a/aztk/spark/client/job/helpers/submit.py
+++ b/aztk/spark/client/job/helpers/submit.py
@ -0,0 +1,116 @@
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 import yaml
 from aztk import error
 from aztk import models as base_models
 from aztk.internal.cluster_data import NodeData
 from aztk.spark import models
 from aztk.utils import helpers
 from aztk.utils.command_builder import CommandBuilder
 def __app_cmd():
    docker_exec = CommandBuilder("sudo docker exec")
    docker_exec.add_argument("-i")
    docker_exec.add_option("-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
    docker_exec.add_option("-e", "AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID")
    docker_exec.add_argument("spark /bin/bash >> output.log 2>&1 -c \"" \
                             "source ~/.bashrc; " \
                             "export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; " \
                             "cd \$AZ_BATCH_TASK_WORKING_DIR; " \
                             "\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/job_submission.py\"")
    return docker_exec.to_str()
 def generate_job_manager_task(core_job_operations, job, application_tasks):
    resource_files = []
    for application, task in application_tasks:
        task_definition_resource_file = helpers.upload_text_to_container(
            container_name=job.id,
            application_name=application.name + '.yaml',
            file_path=application.name + '.yaml',
            content=yaml.dump(task),
            blob_client=core_job_operations.blob_client)
        resource_files.append(task_definition_resource_file)
    task_cmd = __app_cmd()
    # Create task
    task = batch_models.JobManagerTask(
        id=job.id,
        command_line=helpers.wrap_commands_in_shell([task_cmd]),
        resource_files=resource_files,
        kill_job_on_completion=False,
        allow_low_priority_node=True,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)))
    return task
 def _default_scheduling_target(vm_count: int):
    if vm_count == 0:
        return models.SchedulingTarget.Any
    else:
        return models.SchedulingTarget.Dedicated
 def _apply_default_for_job_config(job_conf: models.JobConfiguration):
    if job_conf.scheduling_target is None:
        job_conf.scheduling_target = _default_scheduling_target(job_conf.max_dedicated_nodes)
    return job_conf
 def submit_job(core_job_operations, spark_job_operations, job_configuration: models.JobConfiguration, wait: bool = False):
    try:
        job_configuration = _apply_default_for_job_config(job_configuration)
        job_configuration.validate()
        cluster_data = core_job_operations.get_cluster_data(job_configuration.id)
        node_data = NodeData(job_configuration.to_cluster_config()).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()
        start_task = spark_job_operations._generate_cluster_start_task(
            core_job_operations,
            zip_resource_files,
            job_configuration.id,
            job_configuration.gpu_enabled,
            job_configuration.get_docker_repo(),
            mixed_mode=job_configuration.mixed_mode(),
            worker_on_master=job_configuration.worker_on_master)
        application_tasks = []
        for application in job_configuration.applications:
            application_tasks.append((application,
                                      spark_job_operations._generate_application_task(core_job_operations, job_configuration.id,
                                                                                     application)))
        job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks)
        software_metadata_key = base_models.Software.spark
        vm_image = models.VmImage(publisher='Canonical', offer='UbuntuServer', sku='16.04')
        autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                            "$TargetLowPriorityNodes = {1}".format(
                                job_configuration.max_dedicated_nodes,
                                job_configuration.max_low_pri_nodes)
        job = core_job_operations.submit(
            job_configuration=job_configuration,
            start_task=start_task,
            job_manager_task=job_manager_task,
            autoscale_formula=autoscale_formula,
            software_metadata_key=software_metadata_key,
            vm_image_model=vm_image,
            application_metadata='\n'.join(application.name for application in (job_configuration.applications or [])))
        if wait:
            spark_job_operations.wait(id=job_configuration.id)
        return models.Job(job)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/helpers/wait_until_complete.py
+++ b/aztk/spark/client/job/helpers/wait_until_complete.py
@ -0,0 +1,22 @@
 import time
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
 from aztk.utils import helpers
 def _wait_until_job_finished(core_job_operations, job_id):
    job_state = core_job_operations.batch_client.job_schedule.get(job_id).state
    while job_state != batch_models.JobScheduleState.completed:
        time.sleep(3)
        job_state = core_job_operations.batch_client.job_schedule.get(job_id).state
 def wait_until_job_finished(core_job_operations, job_id):
    try:
        _wait_until_job_finished(core_job_operations, job_id)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
--- a/aztk/spark/client/job/operations.py
+++ b/aztk/spark/client/job/operations.py
@ -0,0 +1,134 @@
 from aztk.client.job import CoreJobOperations
 from aztk.spark import models
 from aztk.spark.client.base import SparkBaseOperations
 from .helpers import (delete, get, get_application, get_application_log, list, list_applications, stop,
                      stop_application, submit, wait_until_complete)
 class JobOperations(SparkBaseOperations):
    """Spark ClusterOperations object
    Attributes:
        _core_job_operations (:obj:`aztk.client.cluster.CoreJobOperations`):
    """
    def __init__(self, context):
        self._core_job_operations = CoreJobOperations(context)
        # self._spark_base_cluster_operations = SparkBaseOperations()
    def list(self):
        """List all jobs.
        Returns:
            :obj:`List[Job]`: List of aztk.models.Job objects each representing the state and configuration of the job.
        """
        return list.list_jobs(self._core_job_operations)
    def delete(self, id, keep_logs: bool = False):
        """Delete a job.
        Args:
            id (:obj:`str`): the id of the job to delete.
            keep_logs (:obj:`bool`): If True, the logs related to this job in Azure Storage are not deleted.
                Defaults to False.
        Returns:
            :obj:`bool`: True if the deletion process was successful.
        """
        return delete.delete(self._core_job_operations, self, id, keep_logs)
    def get(self, id):
        """Get details about the state of a job.
        Args:
            id (:obj:`str`): the id of the job to get.
        Returns:
            :obj:`aztk.spark.models.job`: A job object representing the state and configuration of the job.
        """
        return get.get_job(self._core_job_operations, id)
    def get_application(self, id, application_name):
        """Get information on a submitted application
        Args:
            id (:obj:`str`): the name of the job the application was submitted to
            application_name (:obj:`str`): the name of the application to get
        Returns:
            :obj:`aztk.spark.models.Application`: object representing that state and output of an application
        """
        return get_application.get_application(self, id, application_name)
    def get_application_log(self, id, application_name):
        """Get the log for a running or completed application
        Args:
            id (:obj:`str`): the id of the job the application was submitted to.
            application_name (:obj:`str`): the name of the application to get the log of
        Returns:
            :obj:`aztk.spark.models.ApplicationLog`: a model representing the output of the application.
        """
        return get_application_log.get_job_application_log(self._core_job_operations, self, id, application_name)
    def list_applications(self, id):
        """List all application defined as a part of a job
        Args:
            id (:obj:`str`): the id of the job to list the applications of
        Returns:
            :obj:`List[aztk.spark.models.Application]`: a list of all applications defined as a part of the job
        """
        return list_applications.list_applications(self._core_job_operations, id)
    def stop(self, id):
        """Stop a submitted job
        Args:
            id (:obj:`str`): the id of the job to stop
        Returns:
            :obj:`None`
        """
        return stop.stop(self._core_job_operations, id)
    def stop_application(self, id, application_name):
        """Stops a submitted application
        Args:
            id (:obj:`str`): the id of the job the application belongs to
            application_name (:obj:`str`):  the name of the application to stop
        Returns:
            :obj:`bool`: True if the stop was successful, else False
        """
        return stop_application.stop_app(self._core_job_operations, id, application_name)
    def submit(self, job_configuration: models.JobConfiguration, wait: bool = False):
        """Submit a job
        Jobs are a cluster definition and one or many application definitions which run on the cluster. The job's
        cluster will be allocated and configured, then the applications will be executed with their output stored
        in Azure Storage. When all applications have completed, the cluster will be automatically deleted.
        Args:
            job_configuration (:obj:`aztk.spark.models.JobConfiguration`): Model defining the job's configuration.
            wait (:obj:`bool`): If True, blocks until job is completed. Defaults to False.
        Returns:
            :obj:`aztk.spark.models.Job`: Model representing the state of the job.
        """
        return submit.submit_job(self._core_job_operations, self, job_configuration, wait)
    def wait(self, id):
        """Wait until the job has completed.
        Args:
            id (:obj:`str`): the id of the job the application belongs to
        Returns:
            :obj:`None`
        """
        wait_until_complete.wait_until_job_finished(self._core_job_operations, id)
--- a/aztk/spark/helpers/init.py
+++ b/aztk/spark/helpers/init.py
@ -0,0 +1,2 @@
 # ALL FILES IN THIS DIRECTORY ARE DEPRECATED, WILL BE REMOTE IN v0.9.0
--- a/aztk/spark/helpers/get_log.py
+++ b/aztk/spark/helpers/get_log.py
@ -1,13 +1,13 @@
 import time
-import azure.batch.models as batch_models
+
 import azure
 import azure.batch.models as batch_models
 import azure.batch.models.batch_error as batch_error
 from aztk import error
-from aztk.utils import helpers
+from aztk import models as base_models
 from aztk.utils import constants
 from aztk.spark import models
-
+from aztk.utils import constants, helpers
 output_file = constants.TASK_WORKING_DIR + \
    "/" + constants.SPARK_SUBMIT_LOGS_FILE
@ -53,14 +53,14 @@ def get_log_from_storage(blob_client, container_name, application_name, task):
        blob = blob_client.get_blob_to_text(container_name, application_name + '/' + constants.SPARK_SUBMIT_LOGS_FILE)
    except azure.common.AzureMissingResourceHttpError:
        raise error.AztkError("Logs not found in your storage account. They were either deleted or never existed.")
-
+    base_model = base_models.ApplicationLog(
    return models.ApplicationLog(
        name=application_name,
        cluster_id=container_name,
        application_state=task.state._value_,
        log=blob.content,
        total_bytes=blob.properties.content_length,
-        exit_code = task.execution_info.exit_code)
+        exit_code=task.execution_info.exit_code)
    return models.ApplicationLog(base_model)
 def get_log(batch_client, blob_client, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0):
@ -85,19 +85,20 @@ def get_log(batch_client, blob_client, cluster_id: str, application_name: str, t
        stream = batch_client.file.get_from_task(
            job_id, task_id, output_file, batch_models.FileGetFromTaskOptions(ocp_range=ocp_range))
        content = helpers.read_stream_as_string(stream)
-
+        base_model = base_models.ApplicationLog(
        return models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
            application_state=task.state._value_,
            log=content,
            total_bytes=target_bytes,
            exit_code=task.execution_info.exit_code)
        return models.ApplicationLog(base_model)
    else:
-        return models.ApplicationLog(
+        base_model = base_models.ApplicationLog(
            name=application_name,
            cluster_id=cluster_id,
            application_state=task.state._value_,
            log='',
            total_bytes=target_bytes,
            exit_code=task.execution_info.exit_code)
        return models.ApplicationLog(base_model)
--- a/aztk/spark/models/models.py
+++ b/aztk/spark/models/models.py
@ -17,10 +17,10 @@ class SparkToolkit(aztk.models.Toolkit):
 class Cluster(aztk.models.Cluster):
-    def __init__(self, pool: batch_models.CloudPool = None, nodes: batch_models.ComputeNodePaged = None):
+    def __init__(self, cluster: aztk.models.Cluster):
-        super().__init__(pool, nodes)
+        super().__init__(cluster.pool, cluster.nodes)
        self.master_node_id = self.__get_master_node_id()
-        self.gpu_enabled = helpers.is_gpu_enabled(pool.vm_size)
+        self.gpu_enabled = helpers.is_gpu_enabled(cluster.pool.vm_size)
    def is_pool_running_spark(self, pool: batch_models.CloudPool):
        if pool.metadata is None:
@ -47,7 +47,9 @@ class Cluster(aztk.models.Cluster):
 class RemoteLogin(aztk.models.RemoteLogin):
-    pass
+    def __init__(self, remote_login: aztk.models.RemoteLogin):
        super().__init__(remote_login.ip_address, remote_login.port)
 class PortForwardingSpecification(aztk.models.PortForwardingSpecification):
    pass
@ -286,16 +288,16 @@ class Job():
        self.creation_time = cloud_job_schedule.creation_time
        self.applications = [Application(task) for task in (cloud_tasks or [])]
        if pool:
-            self.cluster = Cluster(pool, nodes)
+            self.cluster = Cluster(aztk.models.Cluster(pool, nodes))
        else:
            self.cluster = None
-class ApplicationLog():
+class ApplicationLog(aztk.models.ApplicationLog):
-    def __init__(self, name: str, cluster_id: str, log: str, total_bytes: int, application_state: batch_models.TaskState, exit_code: int):
+    def __init__(self, application_log: aztk.models.ApplicationLog):
-        self.name = name
+        self.name = application_log.name
-        self.cluster_id = cluster_id  # TODO: change to something cluster/job agnostic
+        self.cluster_id = application_log.cluster_id    # TODO: change to something cluster/job agnostic
-        self.log = log
+        self.log = application_log.log
-        self.total_bytes = total_bytes
+        self.total_bytes = application_log.total_bytes
-        self.application_state = application_state
+        self.application_state = application_log.application_state
-        self.exit_code = exit_code
+        self.exit_code = application_log.exit_code
--- a/aztk/spark/utils/constants.py
+++ b/aztk/spark/utils/constants.py
@ -0,0 +1,3 @@
 from aztk.spark import models
 SPARK_VM_IMAGE = models.VmImage(publisher='Canonical', offer='UbuntuServer', sku='16.04')
--- a/aztk/spark/utils/util.py
+++ b/aztk/spark/utils/util.py
@ -17,18 +17,18 @@ class MasterInvalidStateError(Exception):
    pass
-def wait_for_master_to_be_ready(client, cluster_id: str):
+def wait_for_master_to_be_ready(core_operations, spark_operations, cluster_id: str):
    master_node_id = None
    start_time = datetime.datetime.now()
    while True:
        if not master_node_id:
-            master_node_id = client.get_cluster(cluster_id).master_node_id
+            master_node_id = spark_operations.get(cluster_id).master_node_id
            if not master_node_id:
                time.sleep(5)
                continue
-        master_node = client.batch_client.compute_node.get(cluster_id, master_node_id)
+        master_node = core_operations.batch_client.compute_node.get(cluster_id, master_node_id)
        if master_node.state in [batch_models.ComputeNodeState.idle,  batch_models.ComputeNodeState.running]:
            break
--- a/aztk_cli/spark/endpoints/cluster/cluster_add_user.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_add_user.py
@ -30,12 +30,12 @@ def execute(args: typing.NamedTuple):
    if args.ssh_key:
        ssh_key = args.ssh_key
    else:
-        ssh_key = spark_client.secrets_config.ssh_pub_key
+        ssh_key = spark_client.secrets_configuration.ssh_pub_key
-    ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, spark_client.secrets_config)
+    ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, spark_client.secrets_configuration)
-    spark_client.create_user(
+    spark_client.cluster.create_user(
-        cluster_id=args.cluster_id,
+        id=args.cluster_id,
        username=args.username,
        password=password,
        ssh_key=ssh_key
--- a/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py
@ -30,7 +30,7 @@ def execute(args: typing.NamedTuple):
    if args.tail:
        utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.app_name)
    else:
-        app_log = spark_client.get_application_log(cluster_id=args.cluster_id, application_name=args.app_name)
+        app_log = spark_client.cluster.get_application_log(id=args.cluster_id, application_name=args.app_name)
        if args.output:
            with utils.Spinner():
                with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f:
--- a/aztk_cli/spark/endpoints/cluster/cluster_copy.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_copy.py
@ -24,8 +24,8 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    with utils.Spinner():
-        copy_output = spark_client.cluster_copy(
+        copy_output = spark_client.cluster.copy(
-            cluster_id=args.cluster_id,
+            id=args.cluster_id,
            source_path=args.source_path,
            destination_path=args.dest_path,
            internal=args.internal
--- a/aztk_cli/spark/endpoints/cluster/cluster_create.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_create.py
@ -66,10 +66,10 @@ def execute(args: typing.NamedTuple):
    user_configuration = cluster_conf.user_configuration
    if user_configuration and user_configuration.username:
-        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key,
+        ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_configuration.ssh_pub_key,
                                                        user_configuration.username,
                                                        user_configuration.password,
-                                                        spark_client.secrets_config)
+                                                        spark_client.secrets_configuration)
        cluster_conf.user_configuration = aztk.spark.models.UserConfiguration(
            username=user_configuration.username,
            password=password,
@ -82,8 +82,8 @@ def execute(args: typing.NamedTuple):
    utils.print_cluster_conf(cluster_conf, wait)
    with utils.Spinner():
        # create spark cluster
-        cluster = spark_client.create_cluster(
+        cluster = spark_client.cluster.create(
-            cluster_conf,
+            cluster_configuration=cluster_conf,
            wait=wait
        )
--- a/aztk_cli/spark/endpoints/cluster/cluster_debug.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py
@ -22,5 +22,5 @@ def execute(args: typing.NamedTuple):
    if not args.output:
        args.output = os.path.join(os.getcwd(), "debug-{0}-{1}".format(args.cluster_id, timestr))
    with utils.Spinner():
-        spark_client.run_cluster_diagnostics(cluster_id=args.cluster_id, output_directory=args.output)
+        spark_client.cluster.diagnostics(id=args.cluster_id, output_directory=args.output)
    # TODO: analyze results, display some info about status
--- a/aztk_cli/spark/endpoints/cluster/cluster_delete.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_delete.py
@ -40,7 +40,7 @@ def execute(args: typing.NamedTuple):
                log.error("Confirmation cluster id does not match. Please try again.")
                return
-        if spark_client.delete_cluster(cluster_id, args.keep_logs):
+        if spark_client.cluster.delete(id=cluster_id, keep_logs=args.keep_logs):
            log.info("Deleting cluster %s", cluster_id)
        else:
            log.error("Cluster with id '%s' doesn't exist or was already deleted.", cluster_id)
--- a/aztk_cli/spark/endpoints/cluster/cluster_get.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_get.py
@ -23,10 +23,10 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    cluster_id = args.cluster_id
-    cluster = spark_client.get_cluster(cluster_id)
+    cluster = spark_client.cluster.get(cluster_id)
    utils.print_cluster(spark_client, cluster, args.internal)
-    configuration = spark_client.get_cluster_config(cluster_id)
+    configuration = spark_client.cluster.get_cluster_config(cluster_id)
    if configuration and args.show_config:
        log.info("-------------------------------------------")
        log.info("Cluster configuration:")
--- a/aztk_cli/spark/endpoints/cluster/cluster_list.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_list.py
@ -16,7 +16,7 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    clusters = spark_client.list_clusters()
+    clusters = spark_client.cluster.list()
    if args.quiet:
        utils.print_clusters_quiet(clusters)
    else:
--- a/aztk_cli/spark/endpoints/cluster/cluster_run.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py
@ -27,8 +27,8 @@ def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
    with utils.Spinner():
        if args.node_id:
-            results = [spark_client.node_run(args.cluster_id, args.node_id, args.command, args.host, args.internal)]
+            results = [spark_client.cluster.node_run(args.cluster_id, args.node_id, args.command, args.host, args.internal)]
        else:
-            results = spark_client.cluster_run(args.cluster_id, args.command, args.host, args.internal)
+            results = spark_client.cluster.run(args.cluster_id, args.command, args.host, args.internal)
    [utils.log_node_run_output(node_output) for node_output in results]
--- a/aztk_cli/spark/endpoints/cluster/cluster_ssh.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_ssh.py
@ -31,8 +31,8 @@ http_prefix = 'http://localhost:'
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    cluster = spark_client.get_cluster(args.cluster_id)
+    cluster = spark_client.cluster.get(args.cluster_id)
-    cluster_config = spark_client.get_cluster_config(args.cluster_id)
+    cluster_config = spark_client.cluster.get_cluster_config(args.cluster_id)
    ssh_conf = SshConfig()
    ssh_conf.merge(
@ -93,7 +93,7 @@ def native_python_ssh_into_master(spark_client, cluster, ssh_conf, password):
        log.warning("No ssh client found, using pure python connection.")
        return
-    configuration = spark_client.get_cluster_config(cluster.id)
+    configuration = spark_client.cluster.get_cluster_config(cluster.id)
    plugin_ports = []
    if configuration and configuration.plugins:
        ports = [
@ -104,7 +104,7 @@ def native_python_ssh_into_master(spark_client, cluster, ssh_conf, password):
        plugin_ports.extend(ports)
    print("Press ctrl+c to exit...")
-    spark_client.cluster_ssh_into_master(
+    spark_client.cluster.ssh_into_master(
        cluster.id,
        cluster.master_node_id,
        ssh_conf.username,
--- a/aztk_cli/spark/endpoints/cluster/cluster_submit.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_submit.py
@ -134,8 +134,8 @@ def execute(args: typing.NamedTuple):
    log.info("-------------------------------------------")
-    spark_client.submit(
+    spark_client.cluster.submit(
-        cluster_id=args.cluster_id,
+        id=args.cluster_id,
        application = aztk.spark.models.ApplicationConfiguration(
            name=args.name,
            application=args.app,
@ -162,8 +162,8 @@ def execute(args: typing.NamedTuple):
            exit_code = utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.name)
        else:
            with utils.Spinner():
-                spark_client.wait_until_application_done(cluster_id=args.cluster_id, task_id=args.name)
+                spark_client.cluster.wait(id=args.cluster_id, application_name=args.name) # TODO: replace wait_until_application_done
-                application_log = spark_client.get_application_log(cluster_id=args.cluster_id, application_name=args.name)
+                application_log = spark_client.cluster.get_application_log(id=args.cluster_id, application_name=args.name)
                with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f:
                    f.write(application_log.log)
                exit_code = application_log.exit_code
--- a/aztk_cli/spark/endpoints/job/delete.py
+++ b/aztk_cli/spark/endpoints/job/delete.py
@ -29,7 +29,7 @@ def execute(args: typing.NamedTuple):
    if not args.force:
        # check if job exists before prompting for confirmation
-        spark_client.get_job(job_id)
+        spark_client.job.get(id=job_id)
        if not args.keep_logs:
            log.warning("All logs persisted for this job will be deleted.")
@ -40,7 +40,7 @@ def execute(args: typing.NamedTuple):
            log.error("Confirmation cluster id does not match. Please try again.")
            return
-    if spark_client.delete_job(job_id, args.keep_logs):
+    if spark_client.job.delete(id=job_id, keep_logs=args.keep_logs):
        log.info("Deleting Job %s", job_id)
    else:
        log.error("Job with id '%s' doesn't exist or was already deleted.", job_id)
--- a/aztk_cli/spark/endpoints/job/get.py
+++ b/aztk_cli/spark/endpoints/job/get.py
@ -16,4 +16,4 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    utils.print_job(spark_client, spark_client.get_job(args.job_id))
+    utils.print_job(spark_client, spark_client.job.get(id=args.job_id))
--- a/aztk_cli/spark/endpoints/job/get_app.py
+++ b/aztk_cli/spark/endpoints/job/get_app.py
@ -20,4 +20,4 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    utils.print_application(spark_client.get_application(args.job_id, args.app_name))
+    utils.print_application(spark_client.job.get_application(args.job_id, args.app_name))
--- a/aztk_cli/spark/endpoints/job/get_app_logs.py
+++ b/aztk_cli/spark/endpoints/job/get_app_logs.py
@ -22,7 +22,7 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    app_log = spark_client.get_job_application_log(args.job_id, args.app_name)
+    app_log = spark_client.job.get_application_log(args.job_id, args.app_name)
    if args.output:
        with utils.Spinner():
            with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f:
--- a/aztk_cli/spark/endpoints/job/list.py
+++ b/aztk_cli/spark/endpoints/job/list.py
@ -13,4 +13,4 @@ def setup_parser(_: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    utils.print_jobs(spark_client.list_jobs())
+    utils.print_jobs(spark_client.job.list())
--- a/aztk_cli/spark/endpoints/job/list_apps.py
+++ b/aztk_cli/spark/endpoints/job/list_apps.py
@ -14,4 +14,4 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    utils.print_applications(spark_client.list_applications(args.job_id))
+    utils.print_applications(spark_client.job.list_applications(args.job_id))
--- a/aztk_cli/spark/endpoints/job/stop.py
+++ b/aztk_cli/spark/endpoints/job/stop.py
@ -15,5 +15,5 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    spark_client.stop_job(args.job_id)
+    spark_client.job.stop(args.job_id)
    log.print("Stopped Job {0}".format(args.job_id))
--- a/aztk_cli/spark/endpoints/job/stop_app.py
+++ b/aztk_cli/spark/endpoints/job/stop_app.py
@ -20,7 +20,7 @@ def setup_parser(parser: argparse.ArgumentParser):
 def execute(args: typing.NamedTuple):
    spark_client = aztk.spark.Client(config.load_aztk_secrets())
-    if spark_client.stop_job_app(args.job_id, args.app_name):
+    if spark_client.job.stop_application(args.job_id, args.app_name):
        log.info("Stopped app {0}".format(args.app_name))
    else:
        log.error("App with name {0} does not exist or was already deleted")
--- a/aztk_cli/spark/endpoints/job/submit.py
+++ b/aztk_cli/spark/endpoints/job/submit.py
@ -48,4 +48,4 @@ def execute(args: typing.NamedTuple):
    )
    #TODO: utils.print_job_conf(job_configuration)
-    spark_client.submit_job(job_configuration)
+    spark_client.job.submit(job_configuration)
--- a/Показать больше
+++ b/Показать больше
		`@ -0,0 +1 @@`
							`from .operations import CoreClusterOperations`
		`@ -0,0 +1,2 @@`
							`# ALL FILES IN THIS DIRECTORY ARE DEPRECATED, WILL BE REMOTE IN v0.9.0`
		`@ -0,0 +1,3 @@`
							`from aztk.spark import models`

							`SPARK_VM_IMAGE = models.VmImage(publisher='Canonical', offer='UbuntuServer', sku='16.04')`