batch-shipyard/convoy/fleet.py

# Copyright (c) Microsoft Corporation
#
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# compat imports
from __future__ import (
    absolute_import, division, print_function, unicode_literals
)
from builtins import (  # noqa
    bytes, dict, int, list, object, range, str, ascii, chr, hex, input,
    next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports
import logging
import os
try:
    import pathlib2 as pathlib
except ImportError:
    import pathlib
import requests
import tempfile
import time
import uuid
# non-stdlib imports
import azure.batch.models as batchmodels
# local imports
from . import autoscale
from . import batch
from . import crypto
from . import data
from . import keyvault
from . import misc
from . import remotefs
from . import resource
from . import settings
from . import storage
from . import util
from .version import __version__

# create logger
logger = logging.getLogger(__name__)
util.setup_logger(logger)
# global defines
_REQUEST_CHUNK_SIZE = 4194304
_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent
_RESOURCES_PATH = None
_NVIDIA_DRIVER = {
    'compute': {
        'url': (
            'http://us.download.nvidia.com/tesla/'
            '384.111/NVIDIA-Linux-x86_64-384.111.run'
        ),
        'sha256': (
            'bd8af7654ccb224c37e74c8e81477a42f63fa9f2360b1b1ec6ae00b03ae21054'
        ),
        'target': 'nvidia-driver.run'
    },
    'visualization': {
        'url': 'https://go.microsoft.com/fwlink/?linkid=849941',
        'sha256': (
            'ca3fd5f5e9156ad3d983b2032bde3c009dca73400f2753f9b475825f4670a854'
        ),
        'target': 'nvidia-driver-grid.run'
    },
    'license': (
        'http://www.nvidia.com/content/DriverDownload-March2009'
        '/licence.php?lang=us'
    ),
}
_CASCADE_FILE = (
    'cascade.py',
    pathlib.Path(_ROOT_PATH, 'cascade/cascade.py')
)
_PERF_FILE = (
    'perf.py',
    pathlib.Path(_ROOT_PATH, 'cascade/perf.py')
)
_NODEPREP_FILE = (
    'shipyard_nodeprep.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_nodeprep.sh')
)
_NODEPREP_CUSTOMIMAGE_FILE = (
    'shipyard_nodeprep_customimage.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_nodeprep_customimage.sh')
)
_NODEPREP_NATIVEDOCKER_FILE = (
    'shipyard_nodeprep_nativedocker.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_nodeprep_nativedocker.sh')
)
_NODEPREP_WINDOWS_FILE = (
    'shipyard_nodeprep_nativedocker.ps1',
    pathlib.Path(
        _ROOT_PATH,
        'scripts/windows/shipyard_nodeprep_nativedocker.ps1'
    )
)
_GLUSTERPREP_FILE = (
    'shipyard_glusterfs_on_compute.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_glusterfs_on_compute.sh')
)
_GLUSTERRESIZE_FILE = (
    'shipyard_glusterfs_on_compute_resize.sh',
    pathlib.Path(
        _ROOT_PATH, 'scripts/shipyard_glusterfs_on_compute_resize.sh')
)
_HPNSSH_FILE = (
    'shipyard_hpnssh.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_hpnssh.sh')
)
_IMAGE_BLOCK_FILE = (
    'wait_for_images.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/wait_for_images.sh')
)
_REGISTRY_LOGIN_FILE = (
    'registry_login.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/registry_login.sh')
)
_REGISTRY_LOGIN_WINDOWS_FILE = (
    'registry_login.ps1',
    pathlib.Path(_ROOT_PATH, 'scripts/windows/registry_login.ps1')
)
_BLOBXFER_FILE = (
    'shipyard_blobxfer.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_blobxfer.sh')
)
_BLOBXFER_WINDOWS_FILE = (
    'shipyard_blobxfer.ps1',
    pathlib.Path(_ROOT_PATH, 'scripts/windows/shipyard_blobxfer.ps1')
)
_REMOTEFSPREP_FILE = (
    'shipyard_remotefs_bootstrap.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_bootstrap.sh')
)
_REMOTEFSADDBRICK_FILE = (
    'shipyard_remotefs_addbrick.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_addbrick.sh')
)
_REMOTEFSSTAT_FILE = (
    'shipyard_remotefs_stat.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_stat.sh')
)
_ALL_REMOTEFS_FILES = [
    _REMOTEFSPREP_FILE, _REMOTEFSADDBRICK_FILE, _REMOTEFSSTAT_FILE,
]


def initialize_globals(verbose):
    # type: (bool) -> None
    """Initialize any runtime globals
    :param bool verbose: verbose
    """
    global _RESOURCES_PATH
    if _RESOURCES_PATH is None:
        _RESOURCES_PATH = _ROOT_PATH / 'resources'
        if not _RESOURCES_PATH.exists():
            _RESOURCES_PATH = pathlib.Path(
                tempfile.gettempdir()) / 'batch-shipyard-{}-resources'.format(
                    __version__)
            _RESOURCES_PATH.mkdir(parents=True, exist_ok=True)
        if verbose:
            logger.debug('initialized resources path to: {}'.format(
                _RESOURCES_PATH))


def populate_global_settings(config, fs_storage, pool_id=None):
    # type: (dict, bool) -> None
    """Populate global settings from config
    :param dict config: configuration dict
    :param bool fs_storage: adjust for fs context
    :param str pool_id: pool id override
    """
    bs = settings.batch_shipyard_settings(config)
    sc = settings.credentials_storage(config, bs.storage_account_settings)
    if fs_storage:
        # set postfix to empty for now, it will be populated with the
        # storage cluster during the actual calls
        postfix = ''
        if util.is_not_empty(pool_id):
            raise ValueError('pool id specified for fs_storage')
    else:
        bc = settings.credentials_batch(config)
        if util.is_none_or_empty(pool_id):
            pool_id = settings.pool_id(config, lower=True)
        postfix = '-'.join((bc.account.lower(), pool_id))
    storage.set_storage_configuration(
        bs.storage_entity_prefix,
        postfix,
        sc.account,
        sc.account_key,
        sc.endpoint,
        bs.generated_sas_expiry_days)


def fetch_credentials_conf_from_keyvault(
        keyvault_client, keyvault_uri, keyvault_credentials_secret_id):
    # type: (azure.keyvault.KeyVaultClient, str, str) -> dict
    """Fetch a credentials conf from keyvault
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    :param str keyvault_credentials_secret_id: keyvault cred secret id
    :rtype: dict
    :return: credentials conf
    """
    if keyvault_uri is None:
        raise ValueError('credentials conf was not specified or is invalid')
    if keyvault_client is None:
        raise ValueError('no Azure KeyVault or AAD credentials specified')
    return keyvault.fetch_credentials_conf(
        keyvault_client, keyvault_uri, keyvault_credentials_secret_id)


def fetch_secrets_from_keyvault(keyvault_client, config):
    # type: (azure.keyvault.KeyVaultClient, dict) -> None
    """Fetch secrets with secret ids in config from keyvault
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    """
    if keyvault_client is not None:
        keyvault.parse_secret_ids(keyvault_client, config)


def _setup_nvidia_driver_package(blob_client, config, vm_size):
    # type: (azure.storage.blob.BlockBlobService, dict, str) -> pathlib.Path
    """Set up the nvidia driver package
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str vm_size: vm size
    :rtype: pathlib.Path
    :return: package path
    """
    gpu_type = settings.get_gpu_type_from_vm_size(vm_size)
    pkg = _RESOURCES_PATH / _NVIDIA_DRIVER[gpu_type]['target']
    # check to see if package is downloaded
    if (not pkg.exists() or
            util.compute_sha256_for_file(pkg, False) !=
            _NVIDIA_DRIVER[gpu_type]['sha256']):
        # display license link
        if not util.confirm_action(
                config,
                msg=('agreement with License for Customer Use of NVIDIA '
                     'Software @ {}').format(_NVIDIA_DRIVER['license']),
                allow_auto=True):
            raise RuntimeError(
                'Cannot proceed with deployment due to non-agreement with '
                'license for NVIDIA driver')
        else:
            logger.info('NVIDIA Software License accepted')
        # download driver
        logger.debug('downloading NVIDIA driver to {}'.format(
            _NVIDIA_DRIVER[gpu_type]['target']))
        response = requests.get(_NVIDIA_DRIVER[gpu_type]['url'], stream=True)
        with pkg.open('wb') as f:
            for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
                if chunk:
                    f.write(chunk)
        logger.debug('wrote {} bytes to {}'.format(pkg.stat().st_size, pkg))
        # check sha256
        if (util.compute_sha256_for_file(pkg, False) !=
                _NVIDIA_DRIVER[gpu_type]['sha256']):
            raise RuntimeError('sha256 mismatch for {}'.format(pkg))
    return pkg


def _generate_azure_mount_script_name(
        batch_account_name, pool_id, is_file_share, is_windows):
    # type: (str, str, bool, bool) -> pathlib.Path
    """Generate an azure blob/file mount script name
    :param str batch_account_name: batch account name
    :param str pool_id: pool id
    :param boo is_file_share: is file share
    :param bool is_windows: is windows
    :rtype: pathlib.Path
    :return: path to azure mount script
    """
    if is_file_share:
        prefix = 'azurefile'
    else:
        prefix = 'azureblob'
    return _RESOURCES_PATH / '{}-mount-{}-{}.{}'.format(
        prefix, batch_account_name.lower(), pool_id.lower(),
        'cmd' if is_windows else 'sh')


def _setup_azureblob_mounts(blob_client, config, bc):
    # type: (azure.storage.blob.BlockBlobService, dict,
    #        settings.BatchCredentials) -> tuple
    """Set up the Azure Blob container via blobfuse
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param settings.BatchCredentials bc: batch creds
    :rtype: tuple
    :return: (bin path, service file path, service env file path,
        volume creation script path)
    """
    tmpmount = settings.temp_disk_mountpoint(config)
    # construct mount commands
    cmds = []
    sdv = settings.global_resources_shared_data_volumes(config)
    for svkey in sdv:
        if settings.is_shared_data_volume_azure_blob(sdv, svkey):
            sa = settings.credentials_storage(
                config,
                settings.azure_storage_account_settings(sdv, svkey))
            cont = settings.azure_blob_container_name(sdv, svkey)
            hmp = settings.azure_blob_host_mount_path(sa.account, cont)
            tmpmp = '{}/blobfuse-tmp/{}-{}'.format(tmpmount, sa.account, cont)
            cmds.append('mkdir -p {}'.format(hmp))
            cmds.append('chmod 0770 {}'.format(hmp))
            cmds.append('mkdir -p {}'.format(tmpmp))
            cmds.append('chown _azbatch:_azbatchgrp {}'.format(tmpmp))
            cmds.append('chmod 0770 {}'.format(tmpmp))
            conn = 'azblob-{}-{}.cfg'.format(sa.account, cont)
            cmds.append('cat > {} << EOF'.format(conn))
            cmds.append('accountName {}'.format(sa.account))
            cmds.append('accountKey {}'.format(sa.account_key))
            cmds.append('containerName {}'.format(cont))
            cmds.append('EOF')
            cmd = (
                'blobfuse {hmp} --tmp-path={tmpmp} -o attr_timeout=240 '
                '-o entry_timeout=240 -o negative_timeout=120 -o allow_other '
                '--config-file={conn}'
            ).format(hmp=hmp, tmpmp=tmpmp, conn=conn)
            # add any additional mount options
            mo = settings.shared_data_volume_mount_options(sdv, svkey)
            if util.is_not_empty(mo):
                opts = []
                for opt in mo:
                    if opt.strip() == '-o allow_other':
                        continue
                    opts.append(opt)
                cmd = '{} {}'.format(cmd, ' '.join(opts))
            cmds.append(cmd)
    # create file share mount command script
    if util.is_none_or_empty(cmds):
        raise RuntimeError('Generated Azure blob mount commands are invalid')
    volcreate = _generate_azure_mount_script_name(
        bc.account, settings.pool_id(config), False, False)
    newline = '\n'
    with volcreate.open('w', newline=newline) as f:
        f.write('#!/usr/bin/env bash')
        f.write(newline)
        f.write('set -e')
        f.write(newline)
        f.write('set -o pipefail')
        f.write(newline)
        for cmd in cmds:
            f.write(cmd)
            f.write(newline)
    return volcreate


def _setup_azurefile_mounts(blob_client, config, bc, is_windows):
    # type: (azure.storage.blob.BlockBlobService, dict,
    #        settings.BatchCredentials, bool) -> tuple
    """Set up the Azure File shares
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param settings.BatchCredentials bc: batch creds
    :param bool is_windows: is windows pool
    :rtype: tuple
    :return: (bin path, service file path, service env file path,
        volume creation script path)
    """
    # construct mount commands
    cmds = []
    sdv = settings.global_resources_shared_data_volumes(config)
    for svkey in sdv:
        if settings.is_shared_data_volume_azure_file(sdv, svkey):
            sa = settings.credentials_storage(
                config,
                settings.azure_storage_account_settings(sdv, svkey))
            share = settings.azure_file_share_name(sdv, svkey)
            hmp = settings.azure_file_host_mount_path(
                sa.account, share, is_windows)
            if is_windows:
                cmd = (
                    'net use \\\\{sa}.file.{ep}\{share} {sakey} '
                    '/user:Azure\{sa}'
                ).format(
                    sa=sa.account, ep=sa.endpoint, share=share,
                    sakey=sa.account_key)
                cmds.append(cmd)
                cmd = 'mklink /d {hmp} \\\\{sa}.file.{ep}\{share}'.format(
                    hmp=hmp, sa=sa.account, ep=sa.endpoint, share=share)
            else:
                cmd = (
                    'mount -t cifs //{sa}.file.{ep}/{share} {hmp} -o '
                    'vers=3.0,username={sa},password={sakey},'
                    'serverino'
                ).format(
                    sa=sa.account, ep=sa.endpoint, share=share, hmp=hmp,
                    sakey=sa.account_key)
                # add any additional mount options
                mo = settings.shared_data_volume_mount_options(sdv, svkey)
                if util.is_not_empty(mo):
                    opts = []
                    # retain backward compatibility with filemode/dirmode
                    # options from the old Azure File Docker volume driver
                    for opt in mo:
                        tmp = opt.split('=')
                        if tmp[0] == 'filemode':
                            opts.append('file_mode={}'.format(tmp[1]))
                        elif tmp[0] == 'dirmode':
                            opts.append('dir_mode={}'.format(tmp[1]))
                        else:
                            opts.append(opt)
                    cmd = '{},{}'.format(cmd, ','.join(opts))
            if not is_windows:
                cmds.append('mkdir -p {}'.format(hmp))
            cmds.append(cmd)
    # create file share mount command script
    if util.is_none_or_empty(cmds):
        raise RuntimeError('Generated Azure file mount commands are invalid')
    volcreate = _generate_azure_mount_script_name(
        bc.account, settings.pool_id(config), True, is_windows)
    newline = '\r\n' if is_windows else '\n'
    with volcreate.open('w', newline=newline) as f:
        if is_windows:
            f.write('@echo off')
            f.write(newline)
        else:
            f.write('#!/usr/bin/env bash')
            f.write(newline)
            f.write('set -e')
            f.write(newline)
            f.write('set -o pipefail')
            f.write(newline)
        for cmd in cmds:
            f.write(cmd)
            f.write(newline)
    return volcreate


def _create_storage_cluster_mount_args(
        compute_client, network_client, batch_mgmt_client, config, sc_id,
        bc, subnet_id):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient, dict, str,
    #        settings.BatchCredentials, str) -> Tuple[str, str]
    """Create storage cluster mount arguments
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param settings.BatchCredentials bc: batch creds
    :param str subnet_id: subnet id
    :rtype: tuple
    :return: (fstab mount, storage cluster arg)
    """
    fstab_mount = None
    sc_arg = None
    ba = batch.get_batch_account(batch_mgmt_client, config)
    # check for vnet/subnet presence
    if util.is_none_or_empty(subnet_id):
        raise RuntimeError(
            'cannot mount a storage cluster without a valid virtual '
            'network or subnet')
    # get remotefs settings
    rfs = settings.remotefs_settings(config, sc_id)
    sc = rfs.storage_cluster
    # iterate through shared data volumes and fine storage clusters
    sdv = settings.global_resources_shared_data_volumes(config)
    if (sc_id not in sdv or
            not settings.is_shared_data_volume_storage_cluster(
                sdv, sc_id)):
        raise RuntimeError(
            'No storage cluster {} found in configuration'.format(sc_id))
    vnet_subid, vnet_rg, _, vnet_name, subnet_name = _explode_arm_subnet_id(
        subnet_id)
    # check for same vnet name
    if vnet_name.lower() != sc.virtual_network.name.lower():
        raise RuntimeError(
            'cannot link storage cluster {} on virtual '
            'network {} with pool virtual network {}'.format(
                sc_id, sc.virtual_network.name, vnet_name))
    # cross check vnet resource group
    if vnet_rg.lower() != sc.virtual_network.resource_group.lower():
        raise RuntimeError(
            'cannot link storage cluster {} virtual network in resource group '
            '{} with pool virtual network in resource group {}'.format(
                sc_id, sc.virtual_network.resource_group, vnet_rg))
    # cross check vnet subscription id
    _ba_tmp = ba.id.lower().split('/')
    if vnet_subid.lower() != _ba_tmp[2]:
        raise RuntimeError(
            'cannot link storage cluster {} virtual network in subscription '
            '{} with pool virtual network in subscription {}'.format(
                sc_id, vnet_subid, _ba_tmp[2]))
    del _ba_tmp
    # get vm count
    if sc.vm_count < 1:
        raise RuntimeError(
            'storage cluster {} vm_count {} is invalid'.format(
                sc_id, sc.vm_count))
    # get fileserver type
    if sc.file_server.type == 'nfs':
        # query first vm for info
        vm_name = settings.generate_virtual_machine_name(sc, 0)
        vm = compute_client.virtual_machines.get(
            resource_group_name=sc.resource_group,
            vm_name=vm_name,
        )
        nic = resource.get_nic_from_virtual_machine(
            network_client, sc.resource_group, vm)
        # get private ip of vm
        remote_ip = nic.ip_configurations[0].private_ip_address
        # construct mount options
        mo = '_netdev,auto,nfsvers=4,intr'
        amo = settings.shared_data_volume_mount_options(sdv, sc_id)
        if util.is_not_empty(amo):
            if 'udp' in mo:
                raise RuntimeError(
                    ('udp cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if any([x.startswith('nfsvers=') for x in amo]):
                raise RuntimeError(
                    ('nfsvers cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if any([x.startswith('port=') for x in amo]):
                raise RuntimeError(
                    ('port cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            mo = ','.join((mo, ','.join(amo)))
        # construct mount string for fstab
        fstab_mount = (
            '{remoteip}:{srcpath} {hmp}/{scid} '
            '{fstype} {mo} 0 2').format(
                remoteip=remote_ip,
                srcpath=sc.file_server.mountpoint,
                hmp=settings.get_host_mounts_path(False),
                scid=sc_id,
                fstype=sc.file_server.type,
                mo=mo)
    elif sc.file_server.type == 'glusterfs':
        # walk vms and find non-overlapping ud/fds
        primary_ip = None
        primary_ud = None
        primary_fd = None
        backup_ip = None
        backup_ud = None
        backup_fd = None
        vms = {}
        # first pass, attempt to populate all ip, ud/fd
        for i in range(sc.vm_count):
            vm_name = settings.generate_virtual_machine_name(sc, i)
            vm = compute_client.virtual_machines.get(
                resource_group_name=sc.resource_group,
                vm_name=vm_name,
                expand=compute_client.virtual_machines.models.
                InstanceViewTypes.instance_view,
            )
            nic = resource.get_nic_from_virtual_machine(
                network_client, sc.resource_group, vm)
            vms[i] = (vm, nic)
            # get private ip and ud/fd of vm
            remote_ip = nic.ip_configurations[0].private_ip_address
            ud = vm.instance_view.platform_update_domain
            fd = vm.instance_view.platform_fault_domain
            if primary_ip is None:
                primary_ip = remote_ip
                primary_ud = ud
                primary_fd = fd
            if backup_ip is None:
                if (primary_ip == backup_ip or primary_ud == ud or
                        primary_fd == fd):
                    continue
                backup_ip = remote_ip
                backup_ud = ud
                backup_fd = fd
        # second pass, fill in with at least non-overlapping update domains
        if backup_ip is None:
            for i in range(sc.vm_count):
                vm, nic = vms[i]
                remote_ip = nic.ip_configurations[0].private_ip_address
                ud = vm.instance_view.platform_update_domain
                fd = vm.instance_view.platform_fault_domain
                if primary_ud != ud:
                    backup_ip = remote_ip
                    backup_ud = ud
                    backup_fd = fd
                    break
        if primary_ip is None or backup_ip is None:
            raise RuntimeError(
                'Could not find either a primary ip {} or backup ip {} for '
                'glusterfs client mount'.format(primary_ip, backup_ip))
        logger.debug('primary ip/ud/fd={} backup ip/ud/fd={}'.format(
            (primary_ip, primary_ud, primary_fd),
            (backup_ip, backup_ud, backup_fd)))
        # construct mount options
        mo = '_netdev,auto,transport=tcp,backupvolfile-server={}'.format(
            backup_ip)
        amo = settings.shared_data_volume_mount_options(sdv, sc_id)
        if util.is_not_empty(amo):
            if any([x.startswith('backupvolfile-server=') for x in amo]):
                raise RuntimeError(
                    ('backupvolfile-server cannot be specified as a mount '
                     'option for storage cluster {}').format(sc_id))
            if any([x.startswith('transport=') for x in amo]):
                raise RuntimeError(
                    ('transport cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            mo = ','.join((mo, ','.join(amo)))
        # construct mount string for fstab, srcpath is the gluster volume
        fstab_mount = (
            '{remoteip}:/{srcpath} {hmp}/{scid} '
            '{fstype} {mo} 0 2').format(
                remoteip=primary_ip,
                srcpath=settings.get_file_server_glusterfs_volume_name(sc),
                hmp=settings.get_host_mounts_path(False),
                scid=sc_id,
                fstype=sc.file_server.type,
                mo=mo)
    else:
        raise NotImplementedError(
            ('cannot handle file_server type {} for storage '
             'cluster {}').format(sc.file_server.type, sc_id))
    if util.is_none_or_empty(fstab_mount):
        raise RuntimeError(
            ('Could not construct an fstab mount entry for storage '
             'cluster {}').format(sc_id))
    # construct sc_arg
    sc_arg = '{}:{}'.format(sc.file_server.type, sc_id)
    # log config
    if settings.verbose(config):
        logger.debug('storage cluster {} fstab mount: {}'.format(
            sc_id, fstab_mount))
    return (fstab_mount, sc_arg)


def _create_custom_linux_mount_args(config, mount_name):
    # type: (dict, str) -> str
    """Create a custom linux mount fstab entry
    :param dict config: configuration dict
    :param str mount_name: mount name
    :rtype: str
    :return: fstab entry
    """
    sdv = settings.global_resources_shared_data_volumes(config)
    fstab = settings.custom_linux_mount_fstab_options(sdv, mount_name)
    fstab_mount = (
        '{fs_spec} {hmp}/{name} {fs_vfstype} {fs_mntops} {fs_freq} '
        '{fs_passno}').format(
            fs_spec=fstab.fs_spec,
            hmp=settings.get_host_mounts_path(False),
            name=mount_name,
            fs_vfstype=fstab.fs_vfstype,
            fs_mntops=fstab.fs_mntops,
            fs_freq=fstab.fs_freq,
            fs_passno=fstab.fs_passno)
    return fstab_mount


def _pick_node_agent_for_vm(batch_client, pool_settings):
    # type: (azure.batch.batch_service_client.BatchServiceClient,
    #        settings.PoolSettings) -> (str, str)
    """Pick a node agent id for the vm
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param settings.PoolSettings pool_settings: pool settings
    :rtype: tuple
    :return: image reference to use, node agent id to use
    """
    publisher = pool_settings.vm_configuration.publisher
    offer = pool_settings.vm_configuration.offer
    sku = pool_settings.vm_configuration.sku
    # TODO special exception for CentOS HPC 7.1
    if publisher == 'openlogic' and offer == 'centos-hpc' and sku == '7.1':
        return ({
            'publisher': publisher,
            'offer': offer,
            'sku': sku,
            'version': pool_settings.vm_configuration.version,
        }, 'batch.node.centos 7')
    # pick latest sku
    node_agent_skus = batch_client.account.list_node_agent_skus()
    skus_to_use = [
        (nas, image_ref) for nas in node_agent_skus
        for image_ref in sorted(
            nas.verified_image_references,
            key=lambda item: item.sku
        )
        if image_ref.publisher.lower() == publisher and
        image_ref.offer.lower() == offer and
        image_ref.sku.lower() == sku
    ]
    try:
        sku_to_use, image_ref_to_use = skus_to_use[-1]
    except IndexError:
        raise RuntimeError(
            ('Could not find an Azure Batch Node Agent Sku for this '
             'offer={} publisher={} sku={}. You can list the valid and '
             'available Marketplace images with the command: pool '
             'listskus').format(
                 pool_settings.vm_configuration.offer,
                 pool_settings.vm_configuration.publisher,
                 pool_settings.vm_configuration.sku))
    # set image version to use
    image_ref_to_use.version = pool_settings.vm_configuration.version
    logger.info('deploying vm config: {}'.format(image_ref_to_use))
    return (image_ref_to_use, sku_to_use.id)


def _explode_arm_subnet_id(arm_subnet_id):
    # type: (str) -> Tuple[str, str, str, str, str]
    """Parses components from ARM subnet id
    :param str arm_subnet_id: ARM subnet id
    :rtype: tuple
    :return: subid, rg, provider, vnet, subnet
    """
    tmp = arm_subnet_id.split('/')
    subid = tmp[2]
    rg = tmp[4]
    provider = tmp[6]
    vnet = tmp[8]
    subnet = tmp[10]
    return subid, rg, provider, vnet, subnet


def _pool_virtual_network_subnet_address_space_check(
        resource_client, network_client, config, pool_settings, bc):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict,
    #        settings.PoolSettings, settings.BatchCredentialsSettings) -> str
    """Pool Virtual Network and subnet address space check and create if
    specified
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param settings.PoolSettings pool_settings: pool settings
    :param settings.BatchCredentialsSettings bc: batch cred settings
    :rtype: str
    :return: subnet id
    """
    if (util.is_none_or_empty(pool_settings.virtual_network.arm_subnet_id) and
            util.is_none_or_empty(pool_settings.virtual_network.name)):
        logger.debug('no virtual network settings specified')
        return None
    # check if AAD is enabled
    if util.is_not_empty(bc.account_key):
        raise RuntimeError(
            'cannot allocate a pool with a virtual network without AAD '
            'credentials')
    # get subnet object
    subnet_id = None
    if util.is_not_empty(pool_settings.virtual_network.arm_subnet_id):
        subnet_components = _explode_arm_subnet_id(
            pool_settings.virtual_network.arm_subnet_id)
        logger.debug(
            ('arm subnet id breakdown: subid={} rg={} provider={} vnet={} '
             'subnet={}').format(
                 subnet_components[0], subnet_components[1],
                 subnet_components[2], subnet_components[3],
                 subnet_components[4]))
        subnet_id = pool_settings.virtual_network.arm_subnet_id
        if network_client is None:
            logger.info('using virtual network subnet id: {}'.format(
                subnet_id))
            logger.warning(
                'cannot perform IP space validation without a valid '
                'network_client, please specify management AAD credentials '
                'to allow pre-validation')
            return subnet_id
        # retrieve address prefix for subnet
        _subnet = network_client.subnets.get(
            subnet_components[1], subnet_components[3], subnet_components[4])
    else:
        if util.is_not_empty(pool_settings.virtual_network.resource_group):
            _vnet_rg = pool_settings.virtual_network.resource_group
        else:
            _vnet_rg = bc.resource_group
        # create virtual network and subnet if specified
        _, _subnet = resource.create_virtual_network_and_subnet(
            resource_client, network_client, _vnet_rg, bc.location,
            pool_settings.virtual_network)
        del _vnet_rg
        subnet_id = _subnet.id
    # ensure address prefix for subnet is valid
    tmp = _subnet.address_prefix.split('/')
    if len(tmp) <= 1:
        raise RuntimeError(
            'subnet address_prefix is invalid for Batch pools: {}'.format(
                _subnet.address_prefix))
    mask = int(tmp[-1])
    # subtract 5 for guideline and Azure numbering start
    allowable_addresses = (1 << (32 - mask)) - 5
    logger.debug('subnet {} mask is {} and allows {} addresses'.format(
        _subnet.name, mask, allowable_addresses))
    pool_total_vm_count = (
        pool_settings.vm_count.dedicated +
        pool_settings.vm_count.low_priority
    )
    if allowable_addresses < pool_total_vm_count:
        raise RuntimeError(
            ('subnet {} mask is {} and allows {} addresses but desired '
             'pool vm_count is {}').format(
                 _subnet.name, mask, allowable_addresses, pool_total_vm_count))
    elif int(allowable_addresses * 0.9) <= pool_total_vm_count:
        # if within 90% tolerance, warn user due to potential
        # address shortage if other compute resources are in this subnet
        if not util.confirm_action(
                config,
                msg=('subnet {} mask is {} and allows {} addresses '
                     'but desired pool vm_count is {}, proceed?').format(
                         _subnet.name, mask, allowable_addresses,
                         pool_total_vm_count)):
            raise RuntimeError('Pool deployment rejected by user')
    logger.info('using virtual network subnet id: {}'.format(subnet_id))
    return subnet_id


def _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azureblob.BlockBlobService, dict) -> None
    """Construct a pool add parameter object for create pool along with
    uploading resource files
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    """
    # check shared data volume mounts before proceeding to allocate
    azureblob_vd = False
    azurefile_vd = False
    gluster_on_compute = False
    storage_cluster_mounts = []
    custom_linux_mounts = []
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_azure_file(sdv, sdvkey):
                azurefile_vd = True
            elif settings.is_shared_data_volume_azure_blob(sdv, sdvkey):
                azureblob_vd = True
            elif settings.is_shared_data_volume_gluster_on_compute(
                    sdv, sdvkey):
                if gluster_on_compute:
                    raise ValueError(
                        'only one glusterfs on compute can be created')
                gluster_on_compute = True
            elif settings.is_shared_data_volume_storage_cluster(
                    sdv, sdvkey):
                storage_cluster_mounts.append(sdvkey)
            elif settings.is_shared_data_volume_custom_linux_mount(
                    sdv, sdvkey):
                custom_linux_mounts.append(sdvkey)
            else:
                raise ValueError('Unknown shared data volume: {}'.format(
                    settings.shared_data_volume_driver(sdv, sdvkey)))
    except KeyError:
        pass
    # retrieve settings
    pool_settings = settings.pool_settings(config)
    native = settings.is_native_docker_pool(
        config, vm_config=pool_settings.vm_configuration)
    is_windows = settings.is_windows_pool(
        config, vm_config=pool_settings.vm_configuration)
    # get autoscale settings
    if settings.is_pool_autoscale_enabled(config, pas=pool_settings.autoscale):
        asenable = True
        asformula = autoscale.get_formula(pool_settings)
        asei = pool_settings.autoscale.evaluation_interval
        if pool_settings.resize_timeout is not None:
            logger.warning(
                'ignoring resize timeout for autoscale-enabled pool')
    else:
        asenable = False
        asformula = None
        asei = None
    logger.debug('autoscale enabled: {}'.format(asenable))
    # task scheduling policy settings
    if util.is_not_empty(pool_settings.node_fill_type):
        task_scheduling_policy = batchmodels.TaskSchedulingPolicy(
            node_fill_type=batchmodels.ComputeNodeFillType(
                pool_settings.node_fill_type),
        )
    else:
        task_scheduling_policy = None
    # custom image settings
    custom_image_na = settings.pool_custom_image_node_agent(config)
    # check for virtual network settings
    bc = settings.credentials_batch(config)
    subnet_id = _pool_virtual_network_subnet_address_space_check(
        resource_client, network_client, config, pool_settings, bc)
    # construct fstab mounts for storage clusters
    sc_fstab_mounts = []
    sc_args = []
    if util.is_not_empty(storage_cluster_mounts):
        for sc_id in storage_cluster_mounts:
            fm, sca = _create_storage_cluster_mount_args(
                compute_client, network_client, batch_mgmt_client, config,
                sc_id, bc, subnet_id)
            sc_fstab_mounts.append(fm)
            sc_args.append(sca)
        if settings.verbose(config):
            logger.debug('storage cluster args: {}'.format(sc_args))
    del storage_cluster_mounts
    # constrcut fstab mounts for custom mounts
    custom_linux_fstab_mounts = []
    if util.is_not_empty(custom_linux_mounts):
        for id in custom_linux_mounts:
            custom_linux_fstab_mounts.append(
                _create_custom_linux_mount_args(config, id))
    del custom_linux_mounts
    # add encryption cert to account if specified
    encrypt = settings.batch_shipyard_encryption_enabled(config)
    if encrypt:
        pfx = crypto.get_encryption_pfx_settings(config)
        batch.add_certificate_to_account(batch_client, config)
    # construct block list
    block_for_gr = None
    if pool_settings.block_until_all_global_resources_loaded:
        block_for_gr_docker = ''
        block_for_gr_singularity = ''
        docker_images = settings.global_resources_docker_images(config)
        if len(docker_images) > 0:
            block_for_gr_docker = ','.join([x for x in docker_images])
        singularity_images = settings.global_resources_singularity_images(
            config)
        if len(singularity_images) > 0:
            block_for_gr_singularity = ','.join(
                [util.singularity_image_name_on_disk(x)
                 for x in singularity_images])
        if (util.is_none_or_empty(block_for_gr_docker) and
                util.is_none_or_empty(block_for_gr_singularity)):
            logger.warning(
                'no Docker and Singularity images specified in global '
                'resources')
        if native:
            # native pools will auto preload
            block_for_gr_docker = ''
        block_for_gr = '{}#{}'.format(
            block_for_gr_docker, block_for_gr_singularity)
    # shipyard settings
    bs = settings.batch_shipyard_settings(config)
    # data replication and peer-to-peer settings
    dr = settings.data_replication_settings(config)
    # create torrent flags
    torrentflags = '{}:{}:{}:{}'.format(
        dr.peer_to_peer.enabled, dr.concurrent_source_downloads,
        dr.peer_to_peer.direct_download_seed_bias,
        dr.peer_to_peer.compression)
    # create resource files list
    if is_windows:
        _rflist = [_REGISTRY_LOGIN_WINDOWS_FILE, _BLOBXFER_WINDOWS_FILE]
    else:
        _rflist = [_REGISTRY_LOGIN_FILE, _BLOBXFER_FILE]
    if not native and not is_windows:
        _rflist.append(_IMAGE_BLOCK_FILE)
        if not bs.use_shipyard_docker_image:
            _rflist.append(_CASCADE_FILE)
            if bs.store_timing_metrics:
                _rflist.append(_PERF_FILE)
    if pool_settings.ssh.hpn_server_swap:
        _rflist.append(_HPNSSH_FILE)
    # handle azure mounts
    if azureblob_vd:
        abms = _setup_azureblob_mounts(blob_client, config, bc)
        _rflist.append(('azureblob-mount.sh', abms))
    if azurefile_vd:
        afms = _setup_azurefile_mounts(blob_client, config, bc, is_windows)
        _rflist.append(
            ('azurefile-mount.{}'.format('cmd' if is_windows else 'sh'), afms)
        )
    # gpu settings
    if (not native and settings.is_gpu_pool(pool_settings.vm_size) and
            util.is_none_or_empty(custom_image_na)):
        if pool_settings.gpu_driver is None:
            gpu_driver = _setup_nvidia_driver_package(
                blob_client, config, pool_settings.vm_size)
            _rflist.append((gpu_driver.name, gpu_driver))
        else:
            gpu_type = settings.get_gpu_type_from_vm_size(
                pool_settings.vm_size)
            gpu_driver = pathlib.Path(_NVIDIA_DRIVER[gpu_type]['target'])
        gpu_env = '{}:{}'.format(
            settings.is_gpu_visualization_pool(pool_settings.vm_size),
            gpu_driver.name)
    else:
        gpu_env = None
    # get container registries
    docker_registries = settings.docker_registries(config)
    # set additional start task commands (pre version)
    start_task = pool_settings.additional_node_prep_commands_pre
    # set vm configuration
    if native:
        if util.is_not_empty(custom_image_na):
            # check if AAD is enabled
            if util.is_not_empty(bc.account_key):
                raise RuntimeError(
                    'cannot allocate a pool with a custom image without AAD '
                    'credentials')
            vmconfig = batchmodels.VirtualMachineConfiguration(
                image_reference=batchmodels.ImageReference(
                    virtual_machine_image_id=pool_settings.
                    vm_configuration.arm_image_id,
                ),
                node_agent_sku_id=pool_settings.vm_configuration.node_agent,
            )
            logger.debug(
                ('deploying custom image to pool in native mode: {} '
                 'node agent: {}').format(
                     vmconfig.image_reference.virtual_machine_image_id,
                     vmconfig.node_agent_sku_id))
        else:
            image_ref, na_ref = _pick_node_agent_for_vm(
                batch_client, pool_settings)
            vmconfig = batchmodels.VirtualMachineConfiguration(
                image_reference=image_ref,
                node_agent_sku_id=na_ref,
            )
            logger.debug('deploying pool in native mode')
        # attach container config
        vmconfig.container_configuration = batchmodels.ContainerConfiguration(
            container_image_names=settings.global_resources_docker_images(
                config),
            container_registries=docker_registries,
        )
        if is_windows:
            if util.is_not_empty(custom_image_na):
                raise RuntimeError(
                    'Native mode and Windows custom images is not supported')
            _rflist.append(_NODEPREP_WINDOWS_FILE)
            start_task.append(
                ('powershell -ExecutionPolicy Unrestricted -command '
                 '{npf}{a}{e}{v}{x}').format(
                     npf=_NODEPREP_WINDOWS_FILE[0],
                     a=' -a' if azurefile_vd else '',
                     e=' -e {}'.format(pfx.sha1) if encrypt else '',
                     v=' -v {}'.format(__version__),
                     x=' -x {}'.format(data._BLOBXFER_VERSION))
            )
        else:
            _rflist.append(_NODEPREP_NATIVEDOCKER_FILE)
            start_task.append(
                '{npf}{a}{c}{e}{f}{m}{n}{v}{x}'.format(
                    npf=_NODEPREP_NATIVEDOCKER_FILE[0],
                    a=' -a' if azurefile_vd else '',
                    c=' -c' if azureblob_vd else '',
                    e=' -e {}'.format(pfx.sha1) if encrypt else '',
                    f=' -f' if gluster_on_compute else '',
                    m=' -m {}'.format(','.join(sc_args)) if util.is_not_empty(
                        sc_args) else '',
                    n=' -n' if settings.can_tune_tcp(
                        pool_settings.vm_size) else '',
                    v=' -v {}'.format(__version__),
                    x=' -x {}'.format(data._BLOBXFER_VERSION),
                )
            )
    elif util.is_not_empty(custom_image_na):
        # check if AAD is enabled
        if util.is_not_empty(bc.account_key):
            raise RuntimeError(
                'cannot allocate a pool with a custom image without AAD '
                'credentials')
        _rflist.append(_NODEPREP_CUSTOMIMAGE_FILE)
        vmconfig = batchmodels.VirtualMachineConfiguration(
            image_reference=batchmodels.ImageReference(
                virtual_machine_image_id=pool_settings.
                vm_configuration.arm_image_id,
            ),
            node_agent_sku_id=pool_settings.vm_configuration.node_agent,
        )
        logger.debug('deploying custom image: {} node agent: {}'.format(
            vmconfig.image_reference.virtual_machine_image_id,
            vmconfig.node_agent_sku_id))
        start_task.append(
            '{npf}{a}{b}{c}{e}{f}{m}{n}{p}{t}{v}{x}'.format(
                npf=_NODEPREP_CUSTOMIMAGE_FILE[0],
                a=' -a' if azurefile_vd else '',
                b=' -b' if util.is_not_empty(block_for_gr) else '',
                c=' -c' if azureblob_vd else '',
                e=' -e {}'.format(pfx.sha1) if encrypt else '',
                f=' -f' if gluster_on_compute else '',
                m=' -m {}'.format(','.join(sc_args)) if util.is_not_empty(
                    sc_args) else '',
                n=' -n' if settings.can_tune_tcp(
                    pool_settings.vm_size) else '',
                p=' -p {}'.format(bs.storage_entity_prefix)
                if bs.storage_entity_prefix else '',
                t=' -t {}'.format(torrentflags),
                v=' -v {}'.format(__version__),
                x=' -x {}'.format(data._BLOBXFER_VERSION),
            )
        )
    else:
        _rflist.append(_NODEPREP_FILE)
        image_ref, na_ref = _pick_node_agent_for_vm(
            batch_client, pool_settings)
        vmconfig = batchmodels.VirtualMachineConfiguration(
            image_reference=image_ref,
            node_agent_sku_id=na_ref,
        )
        # create start task commandline
        start_task.append(
            '{npf}{a}{b}{c}{d}{e}{f}{g}{m}{n}{o}{p}{s}{t}{v}{w}{x}'.format(
                npf=_NODEPREP_FILE[0],
                a=' -a' if azurefile_vd else '',
                b=' -b' if util.is_not_empty(block_for_gr) else '',
                c=' -c' if azureblob_vd else '',
                d=' -d' if bs.use_shipyard_docker_image else '',
                e=' -e {}'.format(pfx.sha1) if encrypt else '',
                f=' -f' if gluster_on_compute else '',
                g=' -g {}'.format(gpu_env) if gpu_env is not None else '',
                m=' -m {}'.format(','.join(sc_args)) if util.is_not_empty(
                    sc_args) else '',
                n=' -n' if settings.can_tune_tcp(
                    pool_settings.vm_size) else '',
                o=' -o {}'.format(pool_settings.vm_configuration.offer),
                p=' -p {}'.format(bs.storage_entity_prefix)
                if bs.storage_entity_prefix else '',
                s=' -s {}'.format(pool_settings.vm_configuration.sku),
                t=' -t {}'.format(torrentflags),
                v=' -v {}'.format(__version__),
                w=' -w' if pool_settings.ssh.hpn_server_swap else '',
                x=' -x {}'.format(data._BLOBXFER_VERSION),
            )
        )
    # upload resource files
    sas_urls = storage.upload_resource_files(blob_client, config, _rflist)
    del _rflist
    # remove temporary az mount files created
    if azureblob_vd:
        try:
            abms.unlink()
            pass
        except OSError:
            pass
    if azurefile_vd:
        try:
            afms.unlink()
        except OSError:
            pass
    # digest any input data
    addlcmds = data.process_input_data(
        config, _BLOBXFER_WINDOWS_FILE if is_windows else _BLOBXFER_FILE,
        settings.pool_specification(config))
    if addlcmds is not None:
        start_task.append(addlcmds)
    del addlcmds
    # add additional start task commands (post version)
    start_task.extend(pool_settings.additional_node_prep_commands_post)
    # create pool param
    pool = batchmodels.PoolAddParameter(
        id=pool_settings.id,
        virtual_machine_configuration=vmconfig,
        vm_size=pool_settings.vm_size,
        target_dedicated_nodes=(
            pool_settings.vm_count.dedicated if not asenable else None
        ),
        target_low_priority_nodes=(
            pool_settings.vm_count.low_priority if not asenable else None
        ),
        resize_timeout=pool_settings.resize_timeout if not asenable else None,
        max_tasks_per_node=pool_settings.max_tasks_per_node,
        enable_inter_node_communication=pool_settings.
        inter_node_communication_enabled,
        start_task=batchmodels.StartTask(
            command_line=util.wrap_commands_in_shell(
                start_task, windows=is_windows, wait=False),
            user_identity=batch._RUN_ELEVATED,
            wait_for_success=True,
            environment_settings=[
                batchmodels.EnvironmentSetting('LC_ALL', 'en_US.UTF-8'),
            ],
            resource_files=[],
        ),
        enable_auto_scale=asenable,
        auto_scale_formula=asformula,
        auto_scale_evaluation_interval=asei,
        metadata=[
            batchmodels.MetadataItem(
                name=settings.get_metadata_version_name(),
                value=__version__,
            ),
        ],
        task_scheduling_policy=task_scheduling_policy,
    )
    if encrypt:
        if is_windows:
            pool.certificate_references = [
                batchmodels.CertificateReference(
                    pfx.sha1, 'sha1',
                    visibility=[
                        batchmodels.CertificateVisibility.start_task,
                        batchmodels.CertificateVisibility.task,
                    ]
                )
            ]
        else:
            pool.certificate_references = [
                batchmodels.CertificateReference(
                    pfx.sha1, 'sha1',
                    visibility=[batchmodels.CertificateVisibility.start_task]
                )
            ]
    for rf in sas_urls:
        pool.start_task.resource_files.append(
            batchmodels.ResourceFile(
                file_path=rf,
                blob_source=sas_urls[rf])
        )
    if not native:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SHIPYARD_STORAGE_ENV',
                crypto.encrypt_string(
                    encrypt, '{}:{}:{}'.format(
                        storage.get_storageaccount(),
                        storage.get_storageaccount_endpoint(),
                        storage.get_storageaccount_key()),
                    config)
            )
        )
        if pool_settings.gpu_driver and util.is_none_or_empty(custom_image_na):
            pool.start_task.resource_files.append(
                batchmodels.ResourceFile(
                    file_path=gpu_driver.name,
                    blob_source=pool_settings.gpu_driver,
                    file_mode='0755')
            )
    # add any additional specified resource files
    if util.is_not_empty(pool_settings.resource_files):
        for rf in pool_settings.resource_files:
            pool.start_task.resource_files.append(
                batchmodels.ResourceFile(
                    file_path=rf.file_path,
                    blob_source=rf.blob_source,
                    file_mode=rf.file_mode,
                )
            )
    # virtual network settings
    if subnet_id is not None:
        pool.network_configuration = batchmodels.NetworkConfiguration(
            subnet_id=subnet_id,
        )
    # storage cluster settings
    if util.is_not_empty(sc_fstab_mounts):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SHIPYARD_STORAGE_CLUSTER_FSTAB',
                '#'.join(sc_fstab_mounts)
            )
        )
        del sc_args
        del sc_fstab_mounts
    # custom linux mount settings
    if util.is_not_empty(custom_linux_fstab_mounts):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SHIPYARD_CUSTOM_MOUNTS_FSTAB',
                '#'.join(custom_linux_fstab_mounts)
            )
        )
        del custom_linux_fstab_mounts
    # add optional environment variables
    if not native and bs.store_timing_metrics:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting('SHIPYARD_TIMING', '1')
        )
    # add docker login settings
    pool.start_task.environment_settings.extend(
        batch.generate_docker_login_settings(config)[0])
    # image preload setting
    if util.is_not_empty(block_for_gr):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SHIPYARD_CONTAINER_IMAGES_PRELOAD',
                block_for_gr,
            )
        )
    # singularity env vars
    if not is_windows:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SINGULARITY_TMPDIR',
                settings.get_singularity_tmpdir(config)
            )
        )
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                'SINGULARITY_CACHEDIR',
                settings.get_singularity_cachedir(config)
            )
        )
    return (pool_settings, gluster_on_compute, pool)


def _construct_auto_pool_specification(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azureblob.BlockBlobService, dict) -> None
    """Construct an auto pool specification
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    """
    # upload resource files and construct pool add parameter object
    pool_settings, gluster_on_compute, pool = _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config)
    # convert pool add parameter object to a pool specification object
    poolspec = batchmodels.PoolSpecification(
        vm_size=pool.vm_size,
        virtual_machine_configuration=pool.virtual_machine_configuration,
        max_tasks_per_node=pool.max_tasks_per_node,
        task_scheduling_policy=pool.task_scheduling_policy,
        resize_timeout=pool.resize_timeout,
        target_dedicated_nodes=pool.target_dedicated_nodes,
        target_low_priority_nodes=pool.target_low_priority_nodes,
        enable_auto_scale=pool.enable_auto_scale,
        auto_scale_formula=pool.auto_scale_formula,
        auto_scale_evaluation_interval=pool.auto_scale_evaluation_interval,
        enable_inter_node_communication=pool.enable_inter_node_communication,
        network_configuration=pool.network_configuration,
        start_task=pool.start_task,
        certificate_references=pool.certificate_references,
        metadata=pool.metadata,
    )
    # add auto pool env var for cascade
    poolspec.start_task.environment_settings.append(
        batchmodels.EnvironmentSetting('SHIPYARD_AUTOPOOL', 1)
    )
    return poolspec


def _add_pool(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azureblob.BlockBlobService, dict) -> None
    """Add a Batch pool to account
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    """
    # upload resource files and construct pool add parameter object
    pool_settings, gluster_on_compute, pool = _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config)
    # ingress data to Azure Blob Storage if specified
    storage_threads = []
    if pool_settings.transfer_files_on_pool_creation:
        storage_threads = data.ingress_data(
            batch_client, compute_client, network_client, config, rls=None,
            kind='storage')
    # create pool
    nodes = batch.create_pool(batch_client, config, pool)
    _pool = batch_client.pool.get(pool.id)
    pool_current_vm_count = (
        _pool.current_dedicated_nodes + _pool.current_low_priority_nodes
    )
    pool_target_vm_count = (
        _pool.target_dedicated_nodes + _pool.target_low_priority_nodes
    )
    if util.is_none_or_empty(nodes) and pool_target_vm_count > 0:
        raise RuntimeError(
            ('No nodes could be allocated for pool: {}. If the pool is '
             'comprised entirely of low priority nodes, then there may not '
             'have been enough available capacity in the region to satisfy '
             'your request. Please inspect the pool for resize errors and '
             'issue pool resize to try again.').format(pool.id))
    # set up gluster on compute if specified
    if gluster_on_compute and pool_current_vm_count > 0:
        _setup_glusterfs(
            batch_client, blob_client, config, nodes, _GLUSTERPREP_FILE,
            cmdline=None)
    # create admin user on each node if requested
    if pool_current_vm_count > 0:
        try:
            batch.add_rdp_user(batch_client, config, nodes)
        except Exception as e:
            logger.exception(e)
        try:
            batch.add_ssh_user(batch_client, config, nodes)
        except Exception as e:
            logger.exception(e)
            logger.error(
                'Could not add SSH users to nodes. Please ensure ssh-keygen '
                'is available in your PATH or cwd. Skipping data ingress if '
                'specified.')
        else:
            rls = None
            # ingress data to shared fs if specified
            if pool_settings.transfer_files_on_pool_creation:
                if rls is None:
                    rls = batch.get_remote_login_settings(
                        batch_client, config, nodes)
                data.ingress_data(
                    batch_client, compute_client, network_client, config,
                    rls=rls, kind='shared',
                    total_vm_count=pool_current_vm_count)
            # log remote login settings
            if rls is None:
                if pool_current_vm_count <= 16:
                    batch.get_remote_login_settings(
                        batch_client, config, nodes)
                else:
                    logger.info(
                        'Not listing remote login settings due to VM count. '
                        'If you need a list of remote login settings for all '
                        'nodes in the pool, issue the "pool nodes grls" '
                        'command.')
    # wait for storage ingress processes
    data.wait_for_storage_threads(storage_threads)


def _setup_glusterfs(
        batch_client, blob_client, config, nodes, shell_script, cmdline=None):
    # type: (batchsc.BatchServiceClient, azureblob.BlockBlobService, dict,
    #        List[batchmodels.ComputeNode], str, str) -> None
    """Setup glusterfs via multi-instance task
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param list nodes: list of nodes
    :param str shell_script: glusterfs setup script to use
    :param str cmdline: coordination cmdline
    """
    # get volume type/options
    voltype = None
    volopts = None
    sdv = settings.global_resources_shared_data_volumes(config)
    for sdvkey in sdv:
        try:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                voltype = settings.gluster_volume_type(sdv, sdvkey)
                volopts = settings.gluster_volume_options(sdv, sdvkey)
                break
        except KeyError:
            pass
    if voltype is None:
        raise RuntimeError('glusterfs volume not defined')
    pool_id = settings.pool_id(config)
    job_id = 'shipyard-glusterfs-{}'.format(uuid.uuid4())
    job = batchmodels.JobAddParameter(
        id=job_id,
        pool_info=batchmodels.PoolInformation(pool_id=pool_id),
    )
    # create coordination command line
    if cmdline is None:
        tempdisk = settings.temp_disk_mountpoint(config)
        cmdline = util.wrap_commands_in_shell([
            '$AZ_BATCH_TASK_DIR/{} {} {}'.format(
                shell_script[0], voltype.lower(), tempdisk)])
    # create application command line
    appcmd = [
        '[[ -f $AZ_BATCH_TASK_WORKING_DIR/.glusterfs_success ]] || exit 1',
    ]
    if volopts is not None:
        for vo in volopts:
            appcmd.append('gluster volume set {} {}'.format(
                settings.get_gluster_default_volume_name(), vo))
    # upload script
    sas_urls = storage.upload_resource_files(
        blob_client, config, [shell_script])
    # get pool current dedicated
    pool = batch_client.pool.get(pool_id)
    batchtask = batchmodels.TaskAddParameter(
        id='gluster-setup',
        multi_instance_settings=batchmodels.MultiInstanceSettings(
            number_of_instances=pool.current_dedicated_nodes,
            coordination_command_line=cmdline,
            common_resource_files=[
                batchmodels.ResourceFile(
                    file_path=shell_script[0],
                    blob_source=sas_urls[shell_script[0]],
                    file_mode='0755'),
            ],
        ),
        command_line=util.wrap_commands_in_shell(appcmd),
        user_identity=batch._RUN_ELEVATED,
    )
    # add job and task
    batch_client.job.add(job)
    batch_client.task.add(job_id=job_id, task=batchtask)
    logger.debug(
        'waiting for glusterfs setup task {} in job {} to complete'.format(
            batchtask.id, job_id))
    # wait for gluster fs setup task to complete
    while True:
        batchtask = batch_client.task.get(job_id, batchtask.id)
        if batchtask.state == batchmodels.TaskState.completed:
            break
        time.sleep(1)
    # ensure all nodes have glusterfs success file
    if nodes is None:
        nodes = batch_client.compute_node.list(pool_id)
    success = True
    for node in nodes:
        try:
            batch_client.file.get_properties_from_compute_node(
                pool_id, node.id,
                ('workitems/{}/job-1/gluster-setup/wd/'
                 '.glusterfs_success').format(job_id))
        except batchmodels.BatchErrorException:
            logger.error('gluster success file absent on node {}'.format(
                node.id))
            success = False
            break
    # delete job
    batch_client.job.delete(job_id)
    if not success:
        raise RuntimeError('glusterfs setup failed')
    logger.info(
        'glusterfs setup task {} in job {} completed'.format(
            batchtask.id, job_id))


def _update_container_images_over_ssh(batch_client, config, pool, cmd):
    # type: (batchsc.BatchServiceClient, dict, batchmodels.CloudPool,
    #        list) -> None
    """Update docker images in pool over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param batchmodels.CloudPool pool: cloud pool
    :param list cmd: command
    """
    _pool = settings.pool_settings(config)
    # get ssh settings
    username = _pool.ssh.username
    if util.is_none_or_empty(username):
        raise ValueError(
            'cannot update container images without an SSH username')
    ssh_private_key = _pool.ssh.ssh_private_key
    if ssh_private_key is None:
        ssh_private_key = pathlib.Path(
            _pool.ssh.generated_file_export_path, crypto.get_ssh_key_prefix())
    if not ssh_private_key.exists():
        raise RuntimeError('SSH private key file not found at: {}'.format(
            ssh_private_key))
    command = ['sudo', '/bin/bash -c "{}"'.format(' && '.join(cmd))]
    if settings.verbose(config):
        logger.debug('executing command: {}'.format(command))
    # iterate through all nodes
    nodes = batch_client.compute_node.list(pool.id)
    procs = []
    failures = False
    for node in nodes:
        rls = batch_client.compute_node.get_remote_login_settings(
            pool.id, node.id)
        procs.append(crypto.connect_or_exec_ssh_command(
            rls.remote_login_ip_address, rls.remote_login_port,
            ssh_private_key, username, sync=False, command=command))
        if len(procs) >= 40:
            logger.debug('waiting for {} update processes to complete'.format(
                len(procs)))
            rcs = util.subprocess_wait_all(procs, poll=False)
            if any([x != 0 for x in rcs]):
                failures = True
            procs = []
            del rcs
    if len(procs) > 0:
        logger.debug('waiting for {} update processes to complete'.format(
            len(procs)))
        rcs = util.subprocess_wait_all(procs, poll=False)
        if any([x != 0 for x in rcs]):
            failures = True
        procs = []
        del rcs
    if failures:
        raise RuntimeError(
            'failures detected updating container image on pool: {}'.format(
                pool.id))
    else:
        logger.info('container image update completed for pool: {}'.format(
            pool.id))


def _update_container_images(
        batch_client, config, docker_image=None, docker_image_digest=None,
        singularity_image=None, force_ssh=False):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool) -> None
    """Update container images in pool
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param str docker_image: docker image to update
    :param str docker_image_digest: digest to update to
    :param str singularity_image: singularity image to update
    :param bool force_ssh: force update over SSH
    """
    # first check that peer-to-peer is disabled for pool
    pool_id = settings.pool_id(config)
    try:
        if settings.data_replication_settings(config).peer_to_peer.enabled:
            raise RuntimeError(
                'cannot update container images for a pool with peer-to-peer '
                'image distribution')
    except KeyError:
        pass
    native = settings.is_native_docker_pool(config)
    if native and not force_ssh:
        logger.debug('forcing update via SSH due to native mode')
        force_ssh = True
    # if image is not specified use images from global config
    singularity_images = None
    if util.is_none_or_empty(docker_image):
        docker_images = settings.global_resources_docker_images(config)
    else:
        # log warning if it doesn't exist in global resources
        if docker_image not in settings.global_resources_docker_images(config):
            logger.warning(
                ('docker image {} is not specified as a global resource '
                 'for pool {}').format(docker_image, pool_id))
        if docker_image_digest is None:
            docker_images = [docker_image]
        else:
            docker_images = ['{}@{}'.format(docker_image, docker_image_digest)]
    if util.is_none_or_empty(singularity_image):
        singularity_images = settings.global_resources_singularity_images(
            config)
    else:
        # log warning if it doesn't exist in global resources
        if (singularity_image not in
                settings.global_resources_singularity_images(config)):
            logger.warning(
                ('singularity image {} is not specified as a global resource '
                 'for pool {}').format(singularity_image, pool_id))
        singularity_images = [singularity_image]
    if (util.is_none_or_empty(docker_images) and
            util.is_none_or_empty(singularity_images)):
        logger.error('no images detected or specified to update')
        return
    # get pool current dedicated
    pool = batch_client.pool.get(pool_id)
    # check pool current vms is > 0. There is no reason to run updateimages
    # if pool has no nodes in it. When the pool is resized up, the nodes
    # will always fetch either :latest if untagged or the latest :tag if
    # updated in the upstream registry
    if (pool.current_dedicated_nodes == 0 and
            pool.current_low_priority_nodes == 0):
        logger.warning(
            ('not executing updateimages command as the current number of '
             'compute nodes is zero for pool {}').format(pool_id))
        return
    # force ssh on some paths
    if not force_ssh:
        if pool.current_low_priority_nodes > 0:
            logger.debug('forcing update via SSH due to low priority nodes')
            force_ssh = True
        if (pool.current_dedicated_nodes > 1 and
                not pool.enable_inter_node_communication):
            logger.debug(
                'forcing update via SSH due to non-internode communicaton '
                'enabled pool')
            force_ssh = True
    # check pool metadata version
    if util.is_none_or_empty(pool.metadata):
        logger.warning('pool version metadata not present')
    else:
        for md in pool.metadata:
            if (md.name == settings.get_metadata_version_name() and
                    md.value != __version__):
                logger.warning(
                    'pool version metadata mismatch: pool={} cli={}'.format(
                        md.value, __version__))
                break
    # perform windows compat checks
    is_windows = settings.is_windows_pool(config)
    if is_windows:
        if force_ssh:
            raise RuntimeError('cannot update images via SSH on windows')
        if util.is_not_empty(singularity_images):
            raise RuntimeError(
                'invalid configuration: windows pool with singularity images')
    # create coordination command line
    # 1. log in again in case of cred expiry
    # 2. pull images with respect to registry
    # 3. tag images that are in a private registry
    # 4. prune docker images with no tag
    taskenv, coordcmd = batch.generate_docker_login_settings(config, force_ssh)
    if util.is_not_empty(docker_images):
        coordcmd.extend(['docker pull {}'.format(x) for x in docker_images])
        coordcmd.append(
            'docker images --filter dangling=true -q --no-trunc | '
            'xargs --no-run-if-empty docker rmi')
    if util.is_not_empty(singularity_images):
        coordcmd.extend([
            'export SINGULARITY_TMPDIR={}'.format(
                settings.get_singularity_tmpdir(config)),
            'export SINGULARITY_CACHEDIR={}'.format(
                settings.get_singularity_cachedir(config)),
        ])
        coordcmd.extend(
            ['singularity pull -F {}'.format(x) for x in singularity_images]
        )
        coordcmd.append('chown -R _azbatch:_azbatchgrp {}'.format(
            settings.get_singularity_cachedir(config)))
    if force_ssh:
        _update_container_images_over_ssh(batch_client, config, pool, coordcmd)
        return
    if is_windows:
        coordcmd.append('copy /y nul .update_images_success')
    else:
        coordcmd.append('touch .update_images_success')
        # update taskenv for Singularity
        taskenv.append(
            batchmodels.EnvironmentSetting(
                'SINGULARITY_TMPDIR',
                settings.get_singularity_tmpdir(config)
            )
        )
        taskenv.append(
            batchmodels.EnvironmentSetting(
                'SINGULARITY_CACHEDIR',
                settings.get_singularity_cachedir(config)
            )
        )
    coordcmd = util.wrap_commands_in_shell(coordcmd, windows=is_windows)
    # create job for update
    job_id = 'shipyard-updateimages-{}'.format(uuid.uuid4())
    job = batchmodels.JobAddParameter(
        id=job_id,
        pool_info=batchmodels.PoolInformation(pool_id=pool_id),
    )
    # create task
    batchtask = batchmodels.TaskAddParameter(
        id='update-container-images',
        command_line=coordcmd,
        environment_settings=taskenv,
        user_identity=batch._RUN_ELEVATED,
    )
    # create multi-instance task for pools with more than 1 node
    if pool.current_dedicated_nodes > 1:
        batchtask.multi_instance_settings = batchmodels.MultiInstanceSettings(
            number_of_instances=pool.current_dedicated_nodes,
            coordination_command_line=coordcmd,
        )
        # create application command line
        if is_windows:
            appcmd = util.wrap_commands_in_shell([
                'if not exist %AZ_BATCH_TASK_WORKING_DIR%\\'
                '.update_images_success exit 1'
            ], windows=is_windows)
        else:
            appcmd = util.wrap_commands_in_shell([
                '[[ -f $AZ_BATCH_TASK_WORKING_DIR/.update_images_success ]] '
                '|| exit 1'
            ], windows=is_windows)
        batchtask.command_line = appcmd
    # add job and task
    batch_client.job.add(job)
    batch_client.task.add(job_id=job_id, task=batchtask)
    logger.debug(
        ('waiting for update container images task {} in job {} '
         'to complete').format(batchtask.id, job_id))
    # wait for task to complete
    while True:
        batchtask = batch_client.task.get(job_id, batchtask.id)
        if batchtask.state == batchmodels.TaskState.completed:
            break
        time.sleep(1)
    # ensure all nodes have success file if multi-instance
    success = True
    if pool.current_dedicated_nodes > 1:
        if is_windows:
            sep = '\\'
        else:
            sep = '/'
        uis_file = sep.join(
            ('workitems', job_id, 'job-1', batchtask.id, 'wd',
             '.update_images_success')
        )
        nodes = batch_client.compute_node.list(pool_id)
        for node in nodes:
            try:
                batch_client.file.get_properties_from_compute_node(
                    pool_id, node.id, uis_file)
            except batchmodels.BatchErrorException:
                logger.error(
                    'update images success file absent on node {}'.format(
                        node.id))
                success = False
                break
    else:
        task = batch_client.task.get(job_id, batchtask.id)
        if task.execution_info is None or task.execution_info.exit_code != 0:
            success = False
            # stream stderr to console
            batch.stream_file_and_wait_for_task(
                batch_client, config,
                '{},{},stderr.txt'.format(batchtask.id, job_id))
    # delete job
    batch_client.job.delete(job_id)
    if not success:
        raise RuntimeError('update container images job failed')
    logger.info(
        'update container images task {} in job {} completed'.format(
            batchtask.id, job_id))


def _list_docker_images(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """List Docker images in pool over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param batchmodels.CloudPool pool: cloud pool
    """
    _pool = settings.pool_settings(config)
    pool = batch_client.pool.get(_pool.id)
    if (pool.current_dedicated_nodes == 0 and
            pool.current_low_priority_nodes == 0):
        logger.warning('pool {} has no compute nodes'.format(pool.id))
        return
    is_windows = settings.is_windows_pool(config)
    # TODO temporarily disable listimages with windows pools
    if is_windows:
        raise RuntimeError(
            'listing images is currently not supported for windows pools')
    # get ssh settings
    username = _pool.ssh.username
    if util.is_none_or_empty(username):
        raise ValueError('cannot list docker images without an SSH username')
    ssh_private_key = _pool.ssh.ssh_private_key
    if ssh_private_key is None:
        ssh_private_key = pathlib.Path(
            _pool.ssh.generated_file_export_path, crypto.get_ssh_key_prefix())
    if not ssh_private_key.exists():
        raise RuntimeError('SSH private key file not found at: {}'.format(
            ssh_private_key))
    # iterate through all nodes
    nodes = batch_client.compute_node.list(pool.id)
    procs = {}
    stdout = {}
    failures = False
    for node in nodes:
        rls = batch_client.compute_node.get_remote_login_settings(
            pool.id, node.id)
        procs[node.id] = crypto.connect_or_exec_ssh_command(
            rls.remote_login_ip_address, rls.remote_login_port,
            ssh_private_key, username, sync=False,
            command=[
                'sudo', 'docker', 'images', '--format',
                '"{{.ID}} {{.Repository}}:{{.Tag}}"'
            ])
        if len(procs) >= 40:
            logger.debug('waiting for {} processes to complete'.format(
                len(procs)))
            for key in procs:
                stdout[key] = procs[key].communicate()[0].decode(
                    'utf8').split('\n')
            rcs = util.subprocess_wait_all(list(procs.values()))
            if any([x != 0 for x in rcs]):
                failures = True
            procs.clear()
            del rcs
    if len(procs) > 0:
        logger.debug('waiting for {} processes to complete'.format(
            len(procs)))
        for key in procs:
            stdout[key] = procs[key].communicate()[0].decode(
                'utf8').split('\n')
        rcs = util.subprocess_wait_all(list(procs.values()))
        if any([x != 0 for x in rcs]):
            failures = True
        procs.clear()
        del rcs
    if failures:
        raise RuntimeError(
            'failures retrieving docker images on pool: {}'.format(
                pool.id))
    # process stdout
    node_images = {}
    all_images = {}
    for key in stdout:
        node_images[key] = set()
        for out in stdout[key]:
            if util.is_not_empty(out):
                dec = out.split()
                if (not dec[1].startswith('alfpark/batch-shipyard') and
                        not dec[1].startswith('alfpark/blobxfer')):
                    node_images[key].add(dec[0])
                    if dec[0] not in all_images:
                        all_images[dec[0]] = dec[1]
    # find set intersection among all nodes
    intersecting_images = set.intersection(*list(node_images.values()))
    logger.info('Common Docker images across all nodes in pool {}:{}{}'.format(
        pool.id,
        os.linesep,
        os.linesep.join(
            ['{} {}'.format(key, all_images[key])
             for key in intersecting_images])
    ))
    # find mismatched images on nodes
    for node in node_images:
        images = set(node_images[node])
        diff = images.difference(intersecting_images)
        if len(diff) > 0:
            logger.warning('Docker images present only on node {}:{}{}'.format(
                node, os.linesep,
                os.linesep.join(
                    ['{} {}'.format(key, all_images[key])
                     for key in diff])
            ))


def _adjust_settings_for_pool_creation(config):
    # type: (dict) -> None
    """Adjust settings for pool creation
    :param dict config: configuration dict
    """
    # get settings
    pool = settings.pool_settings(config)
    publisher = settings.pool_publisher(config, lower=True)
    offer = settings.pool_offer(config, lower=True)
    sku = settings.pool_sku(config, lower=True)
    node_agent = settings.pool_custom_image_node_agent(config)
    if util.is_not_empty(node_agent) and util.is_not_empty(sku):
        raise ValueError(
            'cannot specify both a platform_image and a custom_image in the '
            'pool specification')
    is_windows = settings.is_windows_pool(config)
    bs = settings.batch_shipyard_settings(config)
    # enforce publisher/offer/sku restrictions
    allowed = False
    shipyard_container_required = True
    # oracle linux is not supported due to UEKR4 requirement
    if publisher == 'canonical':
        if offer == 'ubuntuserver':
            if sku.startswith('14.04'):
                allowed = True
            elif sku.startswith('16.04'):
                allowed = True
                shipyard_container_required = False
    elif publisher == 'credativ':
        if offer == 'debian':
            if sku >= '8':
                allowed = True
    elif publisher == 'openlogic':
        if offer.startswith('centos'):
            if sku >= '7':
                allowed = True
    elif publisher == 'redhat':
        if offer == 'rhel':
            if sku >= '7':
                allowed = True
    elif publisher == 'suse':
        if offer.startswith('sles'):
            if sku >= '12-sp1':
                allowed = True
        elif offer == 'opensuse-leap':
            if sku >= '42':
                allowed = True
    elif publisher == 'microsoftwindowsserver':
        if offer == 'windowsserver':
            if sku == '2016-datacenter-with-containers':
                allowed = True
    # check if allowed for gpu (if gpu vm size)
    if allowed:
        allowed = settings.gpu_configuration_check(
            config, vm_size=pool.vm_size)
    if not allowed and util.is_none_or_empty(node_agent):
        raise ValueError(
            ('unsupported Docker (and/or GPU) Host VM Config, publisher={} '
             'offer={} sku={} vm_size={}').format(
                 publisher, offer, sku, pool.vm_size))
    # ensure HPC offers are matched with RDMA sizes
    if (not is_windows and (
            (offer == 'centos-hpc' or offer == 'sles-hpc') and
            not settings.is_rdma_pool(pool.vm_size))):
        raise ValueError(
            ('cannot allocate an HPC VM config of publisher={} offer={} '
             'sku={} with a non-RDMA vm_size={}').format(
                 publisher, offer, sku, pool.vm_size))
    # compute total vm count
    pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
    # adjust for shipyard container requirement
    if (not bs.use_shipyard_docker_image and
            (shipyard_container_required or util.is_not_empty(node_agent))):
        settings.set_use_shipyard_docker_image(config, True)
        logger.debug(
            ('forcing shipyard docker image to be used due to '
             'VM config, publisher={} offer={} sku={}').format(
                 publisher, offer, sku))
    # re-read pool and data replication settings
    pool = settings.pool_settings(config)
    dr = settings.data_replication_settings(config)
    native = settings.is_native_docker_pool(
        config, vm_config=pool.vm_configuration)
    # ensure singularity images are not specified for native pools
    if native:
        images = settings.global_resources_singularity_images(config)
        if util.is_not_empty(images):
            raise ValueError(
                'cannot specify a native container pool with Singularity '
                'images as global resources')
    # ensure settings p2p/as/internode settings are compatible
    if dr.peer_to_peer.enabled:
        if native:
            raise ValueError(
                'cannot enable peer-to-peer and native container pools')
        if settings.is_pool_autoscale_enabled(config, pas=pool.autoscale):
            raise ValueError('cannot enable peer-to-peer and autoscale')
        if pool.inter_node_communication_enabled:
            logger.warning(
                'force enabling inter-node communication due to peer-to-peer '
                'transfer')
            settings.set_inter_node_communication_enabled(config, True)
    # hpn-ssh can only be used for Ubuntu currently
    try:
        if (pool.ssh.hpn_server_swap and
                ((publisher != 'canonical' and offer != 'ubuntuserver') or
                 util.is_not_empty(node_agent))):
            logger.warning('cannot enable HPN SSH swap on {} {} {}'.format(
                publisher, offer, sku))
            settings.set_hpn_server_swap(config, False)
    except KeyError:
        pass
    # force disable block for global resources if ingressing data
    if (pool.transfer_files_on_pool_creation and
            pool.block_until_all_global_resources_loaded):
        logger.warning(
            'disabling block until all global resources loaded with '
            'transfer files on pool creation enabled')
        settings.set_block_until_all_global_resources_loaded(config, False)
    # re-read pool settings
    pool = settings.pool_settings(config)
    # ensure internode is not enabled for mix node pools
    if (pool.inter_node_communication_enabled and
            pool.vm_count.dedicated > 0 and pool.vm_count.low_priority > 0):
        raise ValueError(
            'inter node communication cannot be enabled with both '
            'dedicated and low priority nodes')
    # check shared data volume settings
    try:
        num_gluster = 0
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'glusterfs on compute is not supported on windows')
                if settings.is_pool_autoscale_enabled(
                        config, pas=pool.autoscale):
                    raise ValueError(
                        'glusterfs on compute cannot be installed on an '
                        'autoscale-enabled pool')
                if not pool.inter_node_communication_enabled:
                    # do not modify value and proceed since this interplays
                    # with p2p settings, simply raise exception and force
                    # user to reconfigure
                    raise ValueError(
                        'inter node communication in pool configuration '
                        'must be enabled for glusterfs on compute')
                if pool.vm_count.low_priority > 0:
                    raise ValueError(
                        'glusterfs on compute cannot be installed on pools '
                        'with low priority nodes')
                if pool.vm_count.dedicated <= 1:
                    raise ValueError(
                        'vm_count dedicated should exceed 1 for glusterfs '
                        'on compute')
                if pool.max_tasks_per_node > 1:
                    raise ValueError(
                        'max_tasks_per_node cannot exceed 1 for glusterfs '
                        'on compute')
                num_gluster += 1
                try:
                    if settings.gluster_volume_type(sdv, sdvkey) != 'replica':
                        raise ValueError(
                            'only replicated GlusterFS volumes are '
                            'currently supported')
                except KeyError:
                    pass
            elif settings.is_shared_data_volume_storage_cluster(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'storage cluster mounting is not supported on windows')
            elif settings.is_shared_data_volume_azure_blob(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'azure blob mounting is not supported on windows')
                if native:
                    raise ValueError(
                        'azure blob mounting is not supported on native '
                        'container pools')
                if offer == 'ubuntuserver':
                    if sku < '16.04-lts':
                        raise ValueError(
                            ('azure blob mounting is not supported '
                             'on publisher={} offer={} sku={}').format(
                                 publisher, offer, sku))
                elif not offer.startswith('centos'):
                    raise ValueError(
                        ('azure blob mounting is not supported '
                         'on publisher={} offer={} sku={}').format(
                             publisher, offer, sku))
            elif settings.is_shared_data_volume_custom_linux_mount(
                    sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'custom linux mounting is not supported on windows')
        if num_gluster > 1:
            raise ValueError(
                'cannot create more than one GlusterFS on compute volume '
                'per pool')
    except KeyError:
        pass
    # check data ingress on pool creation on windows
    if is_windows and pool.transfer_files_on_pool_creation:
        raise ValueError(
            'cannot transfer files on pool creation to windows compute nodes')
    # check singularity images are not present for windows
    if (is_windows and util.is_not_empty(
            settings.global_resources_singularity_images(config))):
        raise ValueError('cannot deploy Singularity images on windows pools')
    # check pool count of 0 and remote login
    if pool_total_vm_count == 0:
        if is_windows:
            # TODO RDP check
            pass
        else:
            if util.is_not_empty(pool.ssh.username):
                logger.warning('cannot add SSH user with zero target nodes')
    # ensure unusable recovery is not enabled for custom image
    if (pool.attempt_recovery_on_unusable and
            not settings.is_platform_image(
                config, vm_config=pool.vm_configuration)):
        logger.warning(
            'override attempt recovery on unusable due to custom image')
        settings.set_attempt_recovery_on_unusable(config, False)
    # TODO temporarily disable credential encryption with windows
    if is_windows and settings.batch_shipyard_encryption_enabled(config):
        raise ValueError(
            'cannot enable credential encryption with windows pools')


def _check_settings_for_auto_pool(config):
    # type: (dict) -> None
    """Check settings for autopool
    :param dict config: configuration dict
    """
    # check glusterfs on compute
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                raise ValueError(
                    'GlusterFS on compute is not possible with autopool')
                break
    except KeyError:
        pass
    # get settings
    pool = settings.pool_settings(config)
    # check local data movement to pool
    if pool.transfer_files_on_pool_creation:
        raise ValueError('Cannot ingress data on pool creation with autopool')
    # check ssh
    if util.is_not_empty(pool.ssh.username):
        logger.warning('cannot add SSH user with autopool')


def _check_resource_client(resource_client):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient) -> None
    """Check resource client validity"""
    if resource_client is None:
        raise RuntimeError(
            'resource management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_compute_client(compute_client):
    # type: (azure.mgmt.resource.compute.ComputeManagementClient) -> None
    """Check compute client validity"""
    if compute_client is None:
        raise RuntimeError(
            'compute management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_network_client(network_client):
    # type: (azure.mgmt.resource.network.NetworkManagementClient) -> None
    """Check network client validity"""
    if network_client is None:
        raise RuntimeError(
            'network management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_keyvault_client(keyvault_client):
    # type: (azure.keyvault.KeyVaultClient) -> None
    """Check keyvault client validity"""
    if keyvault_client is None:
        raise RuntimeError(
            'keyvault client is invalid, ensure you have specified '
            'proper "keyvault" credentials')


def _check_batch_client(batch_client):
    # type: (batchsc.BatchServiceClient) -> None
    """Check batch client validity"""
    if batch_client is None:
        raise RuntimeError(
            'batch client is invalid, ensure you have specified '
            'proper "batch" credentials')


def action_fs_disks_add(resource_client, compute_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient, dict) -> None
    """Action: Fs Disks Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    remotefs.create_managed_disks(resource_client, compute_client, config)


def action_fs_disks_del(
        compute_client, config, name, resource_group, all, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        str, bool, bool) -> None
    """Action: Fs Disks Del
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str name: disk name
    :param str resource_group: resource group
    :param bool all: delete all in resource group
    :param bool wait: wait for operation to complete
    """
    _check_compute_client(compute_client)
    remotefs.delete_managed_disks(
        compute_client, config, name, resource_group, all, wait,
        confirm_override=False)


def action_fs_disks_list(
        compute_client, config, resource_group, restrict_scope):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Disks List
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str resource_group: resource group
    :param bool restrict_scope: restrict scope to config
    """
    _check_compute_client(compute_client)
    remotefs.list_disks(compute_client, config, resource_group, restrict_scope)


def action_fs_cluster_add(
        resource_client, compute_client, network_client, blob_client,
        config, storage_cluster_id):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str) -> None
    """Action: Fs Cluster Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    storage.set_storage_remotefs_container(storage_cluster_id)
    remotefs.create_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, _REMOTEFSPREP_FILE[0], _ALL_REMOTEFS_FILES)


def action_fs_cluster_resize(
        compute_client, network_client, blob_client, config,
        storage_cluster_id):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str) -> None
    """Action: Fs Cluster Resize
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.resize_storage_cluster(
        compute_client, network_client, blob_client, config,
        storage_cluster_id, _REMOTEFSPREP_FILE[0], _REMOTEFSADDBRICK_FILE[0],
        _ALL_REMOTEFS_FILES)


def action_fs_cluster_del(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, delete_all_resources, delete_data_disks,
        delete_virtual_network, generate_from_prefix, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str, bool, bool,
    #        bool, bool, bool) -> None
    """Action: Fs Cluster Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool delete_all_resources: delete all resources
    :param bool delete_data_disks: delete data disks
    :param bool delete_virtual_network: delete virtual network
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for deletion to complete
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if (generate_from_prefix and
            (delete_all_resources or delete_data_disks or
             delete_virtual_network)):
        raise ValueError(
            'Cannot specify generate_from_prefix and a delete_* option')
    storage.set_storage_remotefs_container(storage_cluster_id)
    remotefs.delete_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, delete_data_disks=delete_data_disks,
        delete_virtual_network=delete_virtual_network,
        delete_resource_group=delete_all_resources,
        generate_from_prefix=generate_from_prefix, wait=wait)


def action_fs_cluster_expand(
        compute_client, network_client, config, storage_cluster_id, rebalance):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Expand
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool rebalance: rebalance filesystem
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if remotefs.expand_storage_cluster(
            compute_client, network_client, config, storage_cluster_id,
            _REMOTEFSPREP_FILE[0], rebalance):
        action_fs_cluster_status(
            compute_client, network_client, config, storage_cluster_id,
            detail=True, hosts=False)


def action_fs_cluster_suspend(
        compute_client, config, storage_cluster_id, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Suspend
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool wait: wait for suspension to complete
    """
    _check_compute_client(compute_client)
    remotefs.suspend_storage_cluster(
        compute_client, config, storage_cluster_id, wait)


def action_fs_cluster_start(
        compute_client, network_client, config, storage_cluster_id, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Start
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool wait: wait for restart to complete
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.start_storage_cluster(
        compute_client, config, storage_cluster_id, wait)
    if wait:
        action_fs_cluster_status(
            compute_client, network_client, config, storage_cluster_id,
            detail=True, hosts=False)


def action_fs_cluster_status(
        compute_client, network_client, config, storage_cluster_id,
        detail, hosts):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, bool,
    #        bool) -> None
    """Action: Fs Cluster Status
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool detail: detailed status
    :param bool hosts: dump info for /etc/hosts
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.stat_storage_cluster(
        compute_client, network_client, config, storage_cluster_id,
        _REMOTEFSSTAT_FILE[0], detail, hosts)


def action_fs_cluster_ssh(
        compute_client, network_client, config, storage_cluster_id,
        cardinal, hostname, tty, command):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, int,
    #        str, bool, tuple) -> None
    """Action: Fs Cluster Ssh
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param int cardinal: cardinal number
    :param str hostname: hostname
    :param bool tty: allocate pseudo-tty
    :param tuple command: command
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if cardinal is not None and hostname is not None:
        raise ValueError('cannot specify both cardinal and hostname options')
    if cardinal is None and hostname is None:
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or hostname option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
            raise ValueError('invalid cardinal option value')
    remotefs.ssh_storage_cluster(
        compute_client, network_client, config, storage_cluster_id,
        cardinal, hostname, tty, command)


def action_keyvault_add(keyvault_client, config, keyvault_uri, name):
    # type: (azure.keyvault.KeyVaultClient, dict, str, str) -> None
    """Action: Keyvault Add
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    :param str keyvault_uri: keyvault uri
    :param str name: secret name
    """
    _check_keyvault_client(keyvault_client)
    keyvault.store_credentials_conf(
        keyvault_client, config, keyvault_uri, name)


def action_keyvault_del(keyvault_client, keyvault_uri, name):
    # type: (azure.keyvault.KeyVaultClient, str, str) -> None
    """Action: Keyvault Del
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    :param str name: secret name
    """
    _check_keyvault_client(keyvault_client)
    keyvault.delete_secret(keyvault_client, keyvault_uri, name)


def action_keyvault_list(keyvault_client, keyvault_uri):
    # type: (azure.keyvault.KeyVaultClient, str) -> None
    """Action: Keyvault List
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    """
    _check_keyvault_client(keyvault_client)
    keyvault.list_secrets(keyvault_client, keyvault_uri)


def action_cert_create(config):
    # type: (dict) -> None
    """Action: Cert Create
    :param dict config: configuration dict
    """
    sha1tp = crypto.generate_pem_pfx_certificates(config)
    logger.info('SHA1 Thumbprint: {}'.format(sha1tp))


def action_cert_add(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Cert Add
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.add_certificate_to_account(batch_client, config, False)


def action_cert_list(batch_client):
    # type: (batchsc.BatchServiceClient) -> None
    """Action: Cert List
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    """
    _check_batch_client(batch_client)
    batch.list_certificates_in_account(batch_client)


def action_cert_del(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Cert Del
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.del_certificate_from_account(batch_client, config)


def action_pool_listskus(batch_client):
    # type: (batchsc.BatchServiceClient) -> None
    """Action: Pool Listskus
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    """
    _check_batch_client(batch_client)
    batch.list_node_agent_skus(batch_client)


def action_pool_add(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, table_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azureblob.BlockBlobService, azuretable.TableService,
    #        dict) -> None
    """Action: Pool Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    # first check if pool exists to prevent accidential metadata clear
    if batch_client.pool.exists(settings.pool_id(config)):
        raise RuntimeError(
            'attempting to create a pool that already exists: {}'.format(
                settings.pool_id(config)))
    _adjust_settings_for_pool_creation(config)
    storage.create_storage_containers(blob_client, table_client, config)
    storage.clear_storage_containers(blob_client, table_client, config)
    if not settings.is_native_docker_pool(config):
        storage.populate_global_resource_blobs(
            blob_client, table_client, config)
    _add_pool(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, config
    )


def action_pool_list(batch_client):
    # type: (batchsc.BatchServiceClient) -> None
    """Action: Pool List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    """
    _check_batch_client(batch_client)
    batch.list_pools(batch_client)


def action_pool_delete(
        batch_client, blob_client, table_client, config, pool_id=None,
        wait=False):
    # type: (batchsc.BatchServiceClient, azureblob.BlockBlobService,
    #        azuretable.TableService, dict, str, bool) -> None
    """Action: Pool Delete
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param str pool_id: poolid to delete
    :param bool wait: wait for pool to delete
    """
    _check_batch_client(batch_client)
    deleted = False
    try:
        deleted = batch.del_pool(batch_client, config, pool_id=pool_id)
    except batchmodels.BatchErrorException as ex:
        if ('The specified pool does not exist' in ex.message.value or
                'The specified pool has been marked for deletion' in
                ex.message.value):
            deleted = True
        else:
            logger.exception(ex)
    if deleted:
        # reset storage settings to target poolid if required
        if util.is_not_empty(pool_id):
            populate_global_settings(config, False, pool_id=pool_id)
        else:
            pool_id = settings.pool_id(config)
        storage.cleanup_with_del_pool(
            blob_client, table_client, config, pool_id=pool_id)
        if wait:
            logger.debug('waiting for pool {} to delete'.format(pool_id))
            while batch_client.pool.exists(pool_id):
                time.sleep(3)


def action_pool_resize(batch_client, blob_client, config, wait):
    # type: (batchsc.BatchServiceClient, azureblob.BlockBlobService,
    #        dict, bool) -> None
    """Resize pool that may contain glusterfs
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param bool wait: wait for operation to complete
    """
    _check_batch_client(batch_client)
    pool = settings.pool_settings(config)
    # check direction of resize
    _pool = batch_client.pool.get(pool.id)
    if (pool.vm_count.dedicated == _pool.current_dedicated_nodes ==
            _pool.target_dedicated_nodes and
            pool.vm_count.low_priority == _pool.current_low_priority_nodes ==
            _pool.target_low_priority_nodes):
        logger.error(
            'pool {} is already at {} nodes'.format(pool.id, pool.vm_count))
        return
    resize_up_d = False
    resize_up_lp = False
    if pool.vm_count.dedicated > _pool.current_dedicated_nodes:
        resize_up_d = True
    if pool.vm_count.low_priority > _pool.current_low_priority_nodes:
        resize_up_lp = True
    del _pool
    create_ssh_user = False
    # try to get handle on public key, avoid generating another set
    # of keys
    if resize_up_d or resize_up_lp:
        if pool.ssh.username is None:
            logger.info('not creating ssh user on new nodes of pool {}'.format(
                pool.id))
        else:
            if pool.ssh.ssh_public_key is None:
                sfp = pathlib.Path(crypto.get_ssh_key_prefix() + '.pub')
                if sfp.exists():
                    logger.debug(
                        'setting public key for ssh user to: {}'.format(sfp))
                    settings.set_ssh_public_key(config, str(sfp))
                    create_ssh_user = True
                else:
                    logger.warning(
                        ('not creating ssh user for new nodes of pool {} as '
                         'an existing ssh public key cannot be found').format(
                             pool.id))
                    create_ssh_user = False
    # check if this is a glusterfs-enabled pool
    gluster_present = False
    voltype = None
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                gluster_present = True
                try:
                    voltype = settings.gluster_volume_type(sdv, sdvkey)
                except KeyError:
                    pass
                break
    except KeyError:
        pass
    logger.debug('glusterfs shared volume present: {}'.format(
        gluster_present))
    if gluster_present:
        if resize_up_lp:
            raise RuntimeError(
                'cannot resize up a pool with glusterfs_on_compute and '
                'low priority nodes')
        logger.debug('forcing wait to True due to glusterfs')
        wait = True
    # cache old nodes
    old_nodes = {}
    if gluster_present or create_ssh_user:
        for node in batch_client.compute_node.list(pool.id):
            old_nodes[node.id] = node.ip_address
    # resize pool
    nodes = batch.resize_pool(batch_client, config, wait)
    # add ssh user to new nodes if present
    if create_ssh_user and (resize_up_d or resize_up_lp):
        if wait:
            # get list of new nodes only
            new_nodes = [node for node in nodes if node.id not in old_nodes]
            # create admin user on each new node if requested
            batch.add_ssh_user(batch_client, config, nodes=new_nodes)
            # log remote login settings for new ndoes
            batch.get_remote_login_settings(
                batch_client, config, nodes=new_nodes)
            del new_nodes
        else:
            logger.warning('ssh user was not added as --wait was not given')
    # add brick for new nodes
    if gluster_present and resize_up_d:
        # get pool current dedicated
        _pool = batch_client.pool.get(pool.id)
        # ensure current dedicated is the target
        if pool.vm_count.dedicated != _pool.current_dedicated_nodes:
            raise RuntimeError(
                ('cannot perform glusterfs setup on new nodes, unexpected '
                 'current dedicated {} to vm_count {}').format(
                     _pool.current_dedicated_nodes, pool.vm_count.dedicated))
        del _pool
        # get internal ip addresses of new nodes
        new_nodes = [
            node.ip_address for node in nodes if node.id not in old_nodes
        ]
        masterip = next(iter(old_nodes.values()))
        # get tempdisk mountpoint
        tempdisk = settings.temp_disk_mountpoint(config)
        # construct cmdline
        cmdline = util.wrap_commands_in_shell([
            '$AZ_BATCH_TASK_DIR/{} {} {} {} {} {}'.format(
                _GLUSTERRESIZE_FILE[0], voltype.lower(), tempdisk,
                pool.vm_count.dedicated, masterip, ' '.join(new_nodes))])
        # setup gluster
        _setup_glusterfs(
            batch_client, blob_client, config, nodes, _GLUSTERRESIZE_FILE,
            cmdline=cmdline)


def action_pool_nodes_grls(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Nodes Grls
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.get_remote_login_settings(batch_client, config)
    batch.generate_ssh_tunnel_script(
        batch_client, settings.pool_settings(config), None, None)


def action_pool_nodes_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Nodes List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.list_nodes(batch_client, config)


def action_pool_user_add(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool User Add
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    if settings.is_windows_pool(config):
        batch.add_rdp_user(batch_client, config)
    else:
        batch.add_ssh_user(batch_client, config)


def action_pool_user_del(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Dru
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    if settings.is_windows_pool(config):
        batch.del_rdp_user(batch_client, config)
    else:
        batch.del_ssh_user(batch_client, config)


def action_pool_ssh(batch_client, config, cardinal, nodeid, tty, command):
    # type: (batchsc.BatchServiceClient, dict, int, str, bool, tuple) -> None
    """Action: Pool Ssh
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param int cardinal: cardinal node num
    :param str nodeid: node id
    :param bool tty: allocate pseudo-tty
    :param tuple command: command to execute
    """
    _check_batch_client(batch_client)
    if cardinal is not None and nodeid is not None:
        raise ValueError('cannot specify both cardinal and nodeid options')
    if cardinal is None and nodeid is None:
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or nodeid option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
            raise ValueError('invalid cardinal option value')
    pool = settings.pool_settings(config)
    ssh_private_key = pool.ssh.ssh_private_key
    if ssh_private_key is None:
        ssh_private_key = pathlib.Path(
            pool.ssh.generated_file_export_path, crypto.get_ssh_key_prefix())
    ip, port = batch.get_remote_login_setting_for_node(
        batch_client, config, cardinal, nodeid)
    crypto.connect_or_exec_ssh_command(
        ip, port, ssh_private_key, pool.ssh.username, tty=tty,
        command=command)


def action_pool_nodes_del(
        batch_client, config, all_start_task_failed, all_starting,
        all_unusable, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, bool, bool, str) -> None
    """Action: Pool Nodes Del
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all_start_task_failed: delete all start task failed nodes
    :param bool all_starting: delete all starting nodes
    :param bool all_unusable: delete all unusable nodes
    :param str nodeid: nodeid to delete
    """
    _check_batch_client(batch_client)
    if ((all_start_task_failed or all_starting or all_unusable) and
            nodeid is not None):
        raise ValueError(
            'cannot specify all start task failed nodes or unusable with '
            'a specific node id')
    batch.del_node(
        batch_client, config, all_start_task_failed, all_starting,
        all_unusable, nodeid)


def action_pool_nodes_reboot(
        batch_client, config, all_start_task_failed, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, str) -> None
    """Action: Pool Nodes Reboot
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all_start_task_failed: reboot all start task failed nodes
    :param str nodeid: nodeid to reboot
    """
    _check_batch_client(batch_client)
    if all_start_task_failed and nodeid is not None:
        raise ValueError(
            'cannot specify all start task failed nodes with a specific '
            'node id')
    batch.reboot_nodes(batch_client, config, all_start_task_failed, nodeid)


def action_pool_images_update(
        batch_client, config, docker_image, docker_image_digest,
        singularity_image, ssh):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool) -> None
    """Action: Pool Images Update
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str docker_image: docker image to update
    :param str docker_image_digest: docker image digest to update to
    :param str singularity_image: singularity image to update
    :param bool ssh: use direct SSH update mode
    """
    _check_batch_client(batch_client)
    if docker_image_digest is not None and docker_image is None:
        raise ValueError(
            'cannot specify a digest to update to without the image')
    _update_container_images(
        batch_client, config, docker_image, docker_image_digest,
        singularity_image, force_ssh=ssh)


def action_pool_images_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Images List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    _list_docker_images(batch_client, config)


def action_pool_stats(batch_client, config, pool_id):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Pool Stats
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str pool_id: pool id
    """
    _check_batch_client(batch_client)
    batch.pool_stats(batch_client, config, pool_id=pool_id)


def action_pool_autoscale_disable(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Disable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_disable(batch_client, config)


def action_pool_autoscale_enable(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Enable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_enable(batch_client, config)


def action_pool_autoscale_evaluate(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Evaluate
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_evaluate(batch_client, config)


def action_pool_autoscale_lastexec(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Lastexec
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_lastexec(batch_client, config)


def action_jobs_add(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, table_client, keyvault_client, config,
        recreate, tail):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azureblob.BlockBlobService, azuretable.TableService,
    #        azure.keyvault.KeyVaultClient, dict, bool, str) -> None
    """Action: Jobs Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    :param bool recreate: recreate jobs if completed
    :param str tail: file to tail or last job and task added
    """
    _check_batch_client(batch_client)
    # check for job autopools
    autopool = batch.check_jobs_for_auto_pool(config)
    if autopool:
        # check to ensure pool id is within 20 chars
        pool_id = settings.pool_id(config)
        if len(pool_id) > 20:
            raise ValueError(
                'pool id must be less than 21 characters: {}'.format(pool_id))
        # check if a pool id with existing pool id exists
        try:
            batch_client.pool.get(pool_id)
        except batchmodels.BatchErrorException as ex:
            if 'The specified pool does not exist' in ex.message.value:
                pass
        else:
            raise RuntimeError(
                'pool with id of {} already exists'.format(pool_id))
        _adjust_settings_for_pool_creation(config)
        # create storage containers and clear
        storage.create_storage_containers(blob_client, table_client, config)
        storage.clear_storage_containers(blob_client, table_client, config)
        if not settings.is_native_docker_pool(config):
            storage.populate_global_resource_blobs(
                blob_client, table_client, config)
        # create autopool specification object
        autopool = _construct_auto_pool_specification(
            resource_client, compute_client, network_client, batch_mgmt_client,
            batch_client, blob_client, config
        )
        # check settings and warn
        _check_settings_for_auto_pool(config)
    else:
        autopool = None
    # add jobs
    is_windows = settings.is_windows_pool(config)
    batch.add_jobs(
        batch_client, blob_client, keyvault_client, config, autopool,
        _IMAGE_BLOCK_FILE,
        _BLOBXFER_WINDOWS_FILE if is_windows else _BLOBXFER_FILE,
        recreate, tail)


def action_jobs_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Jobs List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.list_jobs(batch_client, config)


def action_jobs_tasks_list(
        batch_client, config, all, jobid, poll_until_tasks_complete):
    # type: (batchsc.BatchServiceClient, dict, bool, str, bool) -> None
    """Action: Jobs Tasks List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: all jobs
    :param str jobid: job id
    :param bool poll_until_tasks_complete: poll until tasks complete
    """
    _check_batch_client(batch_client)
    if all and jobid is not None:
        raise ValueError('cannot specify both --all and --jobid')
    while True:
        all_complete = batch.list_tasks(
            batch_client, config, all=all, jobid=jobid)
        if not poll_until_tasks_complete or all_complete:
            break
        time.sleep(5)


def action_jobs_tasks_term(batch_client, config, jobid, taskid, wait, force):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool, bool) -> None
    """Action: Jobs Tasks Term
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    :param str taskid: task id
    :param bool wait: wait for action to complete
    :param bool force: force docker kill even if completed
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to terminate without the corresponding '
            'job id')
    if force and (taskid is None or jobid is None):
        raise ValueError('cannot force docker kill without task id/job id')
    batch.terminate_tasks(
        batch_client, config, jobid=jobid, taskid=taskid, wait=wait,
        force=force)


def action_jobs_tasks_del(batch_client, config, jobid, taskid, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Jobs Tasks Del
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    :param str taskid: task id
    :param bool wait: wait for action to complete
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to delete without the corresponding '
            'job id')
    batch.del_tasks(
        batch_client, config, jobid=jobid, taskid=taskid, wait=wait)


def action_jobs_del_or_term(
        batch_client, blob_client, table_client, config, delete, all_jobs,
        all_jobschedules, jobid, jobscheduleid, termtasks, wait):
    # type: (batchsc.BatchServiceClient, azureblob.BlockBlobService,
    #        azuretable.TableService, dict, bool, bool, str, str,
    #        bool, bool) -> None
    """Action: Jobs Del or Term
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool all_jobs: all jobs
    :param bool all_jobschedules: all job schedules
    :param str jobid: job id
    :param str jobscheduleid: job schedule id
    :param bool termtasks: terminate tasks prior
    :param bool wait: wait for action to complete
    """
    _check_batch_client(batch_client)
    if jobid is not None and jobscheduleid is not None:
        raise ValueError('cannot specify both --jobid and --jobscheduleid')
    if all_jobs:
        if jobid is not None:
            raise ValueError('cannot specify both --all-jobs and --jobid')
        batch.delete_or_terminate_all_jobs(
            batch_client, config, delete, termtasks=termtasks, wait=wait)
    elif all_jobschedules:
        if jobscheduleid is not None:
            raise ValueError(
                'cannot specify both --all-jobschedules and --jobscheduleid')
        if termtasks:
            raise ValueError(
                'Cannot specify --termtasks with --all-jobschedules. '
                'Please terminate tasks with each individual job first.')
        batch.delete_or_terminate_all_job_schedules(
            batch_client, config, delete, wait=wait)
    else:
        # check for autopool
        if util.is_none_or_empty(jobid):
            autopool = batch.check_jobs_for_auto_pool(config)
            if autopool:
                # check if a pool id with existing pool id exists
                try:
                    batch_client.pool.get(settings.pool_id(config))
                except batchmodels.BatchErrorException as ex:
                    if 'The specified pool does not exist' in ex.message.value:
                        pass
                else:
                    autopool = False
        else:
            autopool = False
        # terminate the jobs
        batch.delete_or_terminate_jobs(
            batch_client, config, delete, jobid=jobid,
            jobscheduleid=jobscheduleid, termtasks=termtasks, wait=wait)
        # if autopool, delete the storage
        if autopool:
            storage.cleanup_with_del_pool(blob_client, table_client, config)


def action_jobs_cmi(batch_client, config, delete):
    # type: (batchsc.BatchServiceClient, dict, bool) -> None
    """Action: Jobs Cmi
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool delete: delete all cmi jobs
    """
    _check_batch_client(batch_client)
    if delete:
        batch.del_clean_mi_jobs(batch_client, config)
    else:
        batch.clean_mi_jobs(batch_client, config)
        batch.del_clean_mi_jobs(batch_client, config)


def action_jobs_migrate(
        batch_client, config, jobid, jobscheduleid, poolid, requeue,
        terminate, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool, bool,
    #        bool) -> None
    """Action: Jobs Migrate
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to migrate to in lieu of config
    :param str jobscheduleid: job schedule id to migrate to in lieu of config
    :param str poolid: pool id to migrate to in lieu of config
    :param bool requeue: requeue action
    :param bool terminate: terminate action
    :param bool wait: wait action
    """
    _check_batch_client(batch_client)
    if jobid is not None:
        if jobscheduleid is not None:
            raise ValueError('cannot specify both --jobid and --jobscheduleid')
        if [requeue, terminate, wait].count(True) != 1:
            raise ValueError(
                'must specify only one option of --requeue, --terminate, '
                '--wait')
    if requeue:
        action = 'requeue'
    elif terminate:
        action = 'terminate'
    elif wait:
        action = 'wait'
    else:
        action = None
    # check jobs to see if targetted pool id is the same
    batch.check_pool_for_job_migration(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid,
        poolid=poolid)
    if not util.confirm_action(
            config, msg='migration of jobs or job schedules'):
        return
    logger.warning(
        'ensure that the new target pool has the proper Docker images '
        'loaded, or you have enabled allow_run_on_missing_image')
    # disable job and wait for disabled state
    batch.disable_jobs(
        batch_client, config, action, jobid=jobid, jobscheduleid=jobscheduleid,
        suppress_confirm=True)
    # patch job
    batch.update_job_with_pool(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid,
        poolid=poolid)
    # enable job
    batch.enable_jobs(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid)


def action_jobs_disable(
        batch_client, config, jobid, jobscheduleid, requeue, terminate, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool, bool,
    #        bool) -> None
    """Action: Jobs Disable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to disable to in lieu of config
    :param str jobscheduleid: job schedule id to disable to in lieu of config
    :param bool requeue: requeue action
    :param bool terminate: terminate action
    :param bool wait: wait action
    """
    _check_batch_client(batch_client)
    if jobid is not None:
        if jobscheduleid is not None:
            raise ValueError('cannot specify both --jobid and --jobscheduleid')
        if [requeue, terminate, wait].count(True) != 1:
            raise ValueError(
                'must specify only one option of --requeue, --terminate, '
                '--wait')
    if requeue:
        action = 'requeue'
    elif terminate:
        action = 'terminate'
    elif wait:
        action = 'wait'
    else:
        action = None
    batch.disable_jobs(
        batch_client, config, action, jobid=jobid,
        jobscheduleid=jobscheduleid, disabling_state_ok=True)


def action_jobs_enable(batch_client, config, jobid, jobscheduleid):
    # type: (batchsc.BatchServiceClient, dict, str, str) -> None
    """Action: Jobs Enable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to enable to in lieu of config
    :param str jobscheduleid: job schedule id to enable to in lieu of config
    """
    _check_batch_client(batch_client)
    batch.enable_jobs(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid)


def action_jobs_stats(batch_client, config, job_id):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Jobs Stats
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str job_id: job id
    """
    _check_batch_client(batch_client)
    batch.job_stats(batch_client, config, jobid=job_id)


def action_storage_del(
        blob_client, table_client, config, clear_tables, poolid):
    # type: (azureblob.BlockBlobService, azuretable.TableService,
    #        dict, bool, str) -> None
    """Action: Storage Del
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool clear_tables: clear tables instead of deleting
    :param str poolid: pool id to target
    """
    # reset storage settings to target poolid
    if util.is_not_empty(poolid):
        populate_global_settings(config, False, pool_id=poolid)
    if clear_tables:
        storage.clear_storage_containers(
            blob_client, table_client, config, tables_only=True,
            pool_id=poolid)
    storage.delete_storage_containers(
        blob_client, table_client, config, skip_tables=clear_tables)


def action_storage_clear(blob_client, table_client, config, poolid):
    # type: (azureblob.BlockBlobService, azuretable.TableService, dict,
    #        str) -> None
    """Action: Storage Clear
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param str poolid: pool id to target
    """
    # reset storage settings to target poolid
    if util.is_not_empty(poolid):
        populate_global_settings(config, False, pool_id=poolid)
    storage.clear_storage_containers(
        blob_client, table_client, config, pool_id=poolid)


def action_data_files_stream(batch_client, config, filespec, disk):
    # type: (batchsc.BatchServiceClient, dict, str, bool) -> None
    """Action: Data Files Stream
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str filespec: filespec of file to retrieve
    :param bool disk: write streamed data to disk instead
    """
    _check_batch_client(batch_client)
    batch.stream_file_and_wait_for_task(batch_client, config, filespec, disk)


def action_data_files_list(batch_client, config, jobid, taskid):
    # type: (batchsc.BatchServiceClient, dict, str, str) -> None
    """Action: Data Files List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to list
    :param str taskid: task id to list
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to list files without the corresponding '
            'job id')
    batch.list_task_files(batch_client, config, jobid, taskid)


def action_data_files_task(batch_client, config, all, filespec):
    # type: (batchsc.BatchServiceClient, dict, bool, str) -> None
    """Action: Data Files Task
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: retrieve all files
    :param str filespec: filespec of file to retrieve
    """
    _check_batch_client(batch_client)
    if all:
        batch.get_all_files_via_task(batch_client, config, filespec)
    else:
        batch.get_file_via_task(batch_client, config, filespec)


def action_data_files_node(batch_client, config, all, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, str) -> None
    """Action: Data Files Node
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: retrieve all files
    :param str nodeid: node id to retrieve file from
    """
    _check_batch_client(batch_client)
    if all:
        batch.get_all_files_via_node(batch_client, config, nodeid)
    else:
        batch.get_file_via_node(batch_client, config, nodeid)


def action_data_ingress(
        batch_client, compute_client, network_client, config, to_fs):
    # type: (batchsc.BatchServiceClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str) -> None
    """Action: Data Ingress
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str to_fs: ingress to remote filesystem
    """
    pool_total_vm_count = None
    if util.is_none_or_empty(to_fs):
        try:
            # get pool current dedicated
            pool = batch_client.pool.get(settings.pool_id(config))
            pool_total_vm_count = (
                pool.current_dedicated_nodes + pool.current_low_priority_nodes
            )
            del pool
            # ensure there are remote login settings
            rls = batch.get_remote_login_settings(
                batch_client, config, nodes=None)
            # ensure nodes are at least idle/running for shared ingress
            kind = 'all'
            if not batch.check_pool_nodes_runnable(batch_client, config):
                kind = 'storage'
        except batchmodels.BatchErrorException as ex:
            if 'The specified pool does not exist' in ex.message.value:
                rls = None
                kind = 'storage'
            else:
                raise
    else:
        rls = None
        kind = 'remotefs'
        if compute_client is None or network_client is None:
            raise RuntimeError(
                'required ARM clients are invalid, please provide management '
                'AAD credentials')
    storage_threads = data.ingress_data(
        batch_client, compute_client, network_client, config, rls=rls,
        kind=kind, total_vm_count=pool_total_vm_count, to_fs=to_fs)
    data.wait_for_storage_threads(storage_threads)


def action_misc_tensorboard(
        batch_client, config, jobid, taskid, logdir, image):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, str) -> None
    """Action: Misc Tensorboard
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to list
    :param str taskid: task id to list
    :param str logdir: log dir
    :param str image: tensorflow image to use
    """
    _check_batch_client(batch_client)
    if util.is_none_or_empty(jobid):
        jobspecs = settings.job_specifications(config)
        if len(jobspecs) != 1:
            raise ValueError(
                'The number of jobs in the specified jobs config is not '
                'one. Please specify which job with --jobid.')
        if util.is_not_empty(taskid):
            raise ValueError(
                'cannot specify a task to tunnel Tensorboard to without the '
                'corresponding job id')
    misc.tunnel_tensorboard(batch_client, config, jobid, taskid, logdir, image)