batch-shipyard/convoy/fleet.py

# Copyright (c) Microsoft Corporation
#
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# compat imports
from __future__ import (
    absolute_import, division, print_function, unicode_literals
)
from builtins import (  # noqa
    bytes, dict, int, list, object, range, str, ascii, chr, hex, input,
    next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports
import concurrent.futures
import logging
import os
try:
    import pathlib2 as pathlib
except ImportError:
    import pathlib
import requests
import sys
import tempfile
import time
import uuid
# non-stdlib imports
import azure.batch.models as batchmodels
# local imports
from . import autoscale
from . import batch
from . import crypto
from . import data
from . import federation
from . import keyvault
from . import misc
from . import monitor
from . import remotefs
from . import resource
from . import settings
from . import slurm
from . import storage
from . import util
from .version import __version__

# create logger
logger = logging.getLogger(__name__)
util.setup_logger(logger)
# global defines
_REQUEST_CHUNK_SIZE = 4194304
_ROOT_PATH = pathlib.Path(__file__).resolve().parent.parent
_RESOURCES_PATH = None
_NVIDIA_DRIVER = {
    'compute_cc37': {
        'url': (
            'http://us.download.nvidia.com/tesla/'
            '410.104/NVIDIA-Linux-x86_64-410.104.run'
        ),
        'sha256': (
            '6ac4f7355a11e4b6ea2ff5b3d6ea677247f286daecf2e9373ef947ee08682cb7'
        ),
        'target': 'nvidia-driver_cc37.run'
    },
    'compute_cc6-7': {
        'url': (
            'http://us.download.nvidia.com/tesla/'
            '410.104/NVIDIA-Linux-x86_64-410.104.run'
        ),
        'sha256': (
            '6ac4f7355a11e4b6ea2ff5b3d6ea677247f286daecf2e9373ef947ee08682cb7'
        ),
        'target': 'nvidia-driver_cc6-7.run'
    },
    'viz_cc52': {
        # https://aka.ms/nvgrid-linux
        # https://go.microsoft.com/fwlink/?linkid=874272
        'url': (
            'http://download.microsoft.com/download/8/5/D/'
            '85DC7798-B9F7-4BB9-84E8-B3350D7B52F7/'
            'NVIDIA-Linux-x86_64-410.92-grid.run'
        ),
        'sha256': (
            '8289790fe89bb1e071422e2bc2c7e66ff89652b59eaca5ef4dbab16c0864ccb9'
        ),
        'target': 'nvidia-driver-grid.run'
    },
    'license': (
        'http://www.nvidia.com/content/DriverDownload-March2009'
        '/licence.php?lang=us'
    ),
}
_INTEL_MPI_RT_PACKAGE = {
    'url': (
        'http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/9279/'
        'l_mpi-rt_p_5.1.3.223.tgz'
    ),
    'sha256': (
        '91c5f7575c6b5fbf493c07a255c39ae91e15cd75b26ee90355fa27e0e1b4f22e'
    ),
    'target': 'intel_mpi_rt.tar.gz',
    'license': (
        'https://software.intel.com/license/intel-simplified-software-license'
    )
}
_LIS_PACKAGE = {
    # https://aka.ms/lis
    'url': (
        'http://download.microsoft.com/download/6/8/F/'
        '68FE11B8-FAA4-4F8D-8C7D-74DA7F2CFC8C/'
        'lis-rpms-4.2.8-1.tar.gz'
    ),
    'sha256': (
        'aee81e79b81efd1db97e470b5d179fa4c7e954a2c0d996553dfbc081734a5304'
    ),
    'target': 'lis.tar.gz',
    'intermediate': 'lis_compact.tar',
    'target_compact': 'lis_compact.tar.gz'
}
_BATCH_INSIGHTS = {
    'linux': {
        'url': (
            'https://github.com/Azure/batch-insights/releases/download/v'
            '1.2.0/batch-insights'
        ),
        'sha256': (
            'd9ad5e38162772c93e241fb99dc07d1c49181a4ceb04ba175f92bf9b3698c6de'
        ),
        'target': 'batch-insights'
    },
    'windows': {
        'url': (
            'https://github.com/Azure/batch-insights/releases/download/v'
            '1.2.0/batch-insights.exe'
        ),
        'sha256': (
            '58c957d8c69c070b52e3d51f84f6cad214789305b8ffd61777564c32b81a2e21'
        ),
        'target': 'batch-insights.exe'
    }
}
_PROMETHEUS = {
    'node_exporter': {
        'url': (
            'https://github.com/prometheus/node_exporter/releases/download/v'
            '0.16.0/node_exporter-0.16.0.linux-amd64.tar.gz'
        ),
        'sha256': (
            'e92a601a5ef4f77cce967266b488a978711dabc527a720bea26505cba426c029'
        ),
        'target': 'node_exporter.tar.gz'
    },
    'cadvisor': {
        'url': (
            'https://github.com/google/cadvisor/releases/download/v'
            '0.27.4/cadvisor'
        ),
        'sha256': (
            '378df92f532166251fa3f116beea26ca6364e45e3d6a63ea78b7627ea54bd303'
        ),
        'target': 'cadvisor',
        'target_compact': 'cadvisor.gz'
    },
}
_CASCADE_FILE = (
    'cascade.py',
    pathlib.Path(_ROOT_PATH, 'cascade/cascade.py')
)
_CASCADE_REQ_FILE = (
    'requirements.txt',
    pathlib.Path(_ROOT_PATH, 'cascade/requirements.txt')
)
_PERF_FILE = (
    'perf.py',
    pathlib.Path(_ROOT_PATH, 'cascade/perf.py')
)
_MIRROR_SYSTEM_IMAGES_FILE = (
    'replicate_batch_shipyard_images.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/replicate_batch_shipyard_images.sh')
)
_NODEPREP_FILE = (
    'shipyard_nodeprep.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_nodeprep.sh')
)
_NODEPREP_WINDOWS_FILE = (
    'shipyard_nodeprep_nativedocker.ps1',
    pathlib.Path(
        _ROOT_PATH,
        'scripts/windows/shipyard_nodeprep_nativedocker.ps1'
    )
)
_GLUSTERPREP_FILE = (
    'shipyard_glusterfs_on_compute.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_glusterfs_on_compute.sh')
)
_GLUSTERRESIZE_FILE = (
    'shipyard_glusterfs_on_compute_resize.sh',
    pathlib.Path(
        _ROOT_PATH, 'scripts/shipyard_glusterfs_on_compute_resize.sh')
)
_AUTOSCRATCH_FILE = (
    'shipyard_auto_scratch.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_auto_scratch.sh')
)
_HPNSSH_FILE = (
    'shipyard_hpnssh.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_hpnssh.sh')
)
_IMAGE_BLOCK_FILE = (
    'wait_for_images.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/wait_for_images.sh')
)
_REGISTRY_LOGIN_FILE = (
    'registry_login.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/registry_login.sh')
)
_REGISTRY_LOGIN_WINDOWS_FILE = (
    'registry_login.ps1',
    pathlib.Path(_ROOT_PATH, 'scripts/windows/registry_login.ps1')
)
_BLOBXFER_FILE = (
    'shipyard_blobxfer.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_blobxfer.sh')
)
_BLOBXFER_WINDOWS_FILE = (
    'shipyard_blobxfer.ps1',
    pathlib.Path(_ROOT_PATH, 'scripts/windows/shipyard_blobxfer.ps1')
)
_REMOTEFSPREP_FILE = (
    'shipyard_remotefs_bootstrap.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_bootstrap.sh')
)
_REMOTEFSADDBRICK_FILE = (
    'shipyard_remotefs_addbrick.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_addbrick.sh')
)
_REMOTEFSSTAT_FILE = (
    'shipyard_remotefs_stat.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_remotefs_stat.sh')
)
_ALL_REMOTEFS_FILES = [
    _REMOTEFSPREP_FILE, _REMOTEFSADDBRICK_FILE, _REMOTEFSSTAT_FILE,
]
_MONITORINGPREP_FILE = (
    'shipyard_monitoring_bootstrap.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_monitoring_bootstrap.sh')
)
_MONITORINGSERVICES_FILE = (
    'docker-compose.yml',
    pathlib.Path(_ROOT_PATH, 'heimdall/docker-compose.yml')
)
_MONITORINGSERVICESNONGINX_FILE = (
    'docker-compose-nonginx.yml',
    pathlib.Path(_ROOT_PATH, 'heimdall/docker-compose-nonginx.yml')
)
_MONITORINGPROMCONF_FILE = (
    'prometheus.yml',
    pathlib.Path(_ROOT_PATH, 'heimdall/prometheus.yml')
)
_MONITORINGNGINXCONF_FILE = (
    'nginx.conf',
    pathlib.Path(_ROOT_PATH, 'heimdall/nginx.conf')
)
_MONITORINGGRAFANADASHBOARD_FILE = (
    'batch_shipyard_dashboard.json',
    pathlib.Path(_ROOT_PATH, 'heimdall/batch_shipyard_dashboard.json')
)
_CONFIGURABLE_MONITORING_FILES = {
    'compose': _MONITORINGSERVICES_FILE,
    'compose-nonginx': _MONITORINGSERVICESNONGINX_FILE,
    'prometheus': _MONITORINGPROMCONF_FILE,
    'nginx': _MONITORINGNGINXCONF_FILE,
    'dashboard': _MONITORINGGRAFANADASHBOARD_FILE,
}
_FEDERATIONPREP_FILE = (
    'shipyard_federation_bootstrap.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_federation_bootstrap.sh')
)
_FEDERATIONSERVICES_FILE = (
    'docker-compose.yml',
    pathlib.Path(_ROOT_PATH, 'federation/docker-compose.yml')
)
_ALL_FEDERATION_FILES = [
    _FEDERATIONPREP_FILE, _FEDERATIONSERVICES_FILE,
]
_SLURMMASTERPREP_FILE = (
    'shipyard_slurm_master_bootstrap.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_slurm_master_bootstrap.sh')
)
_SLURMCOMPUTENODEPREP_FILE = (
    'shipyard_slurm_computenode_nodeprep.sh',
    pathlib.Path(_ROOT_PATH, 'scripts/shipyard_slurm_computenode_nodeprep.sh')
)
_SLURMPY_FILE = (
    'slurm.py',
    pathlib.Path(_ROOT_PATH, 'slurm/slurm.py')
)
_SLURMREQ_FILE = (
    'requirements.txt',
    pathlib.Path(_ROOT_PATH, 'slurm/requirements.txt')
)
_SLURMCONF_FILE = (
    'slurm.conf',
    pathlib.Path(_ROOT_PATH, 'slurm/slurm.conf')
)
_SLURMDBDCONF_FILE = (
    'slurmdbd.conf',
    pathlib.Path(_ROOT_PATH, 'slurm/slurmdbd.conf')
)
_SLURMDBSQL_FILE = (
    'slurmdb.sql',
    pathlib.Path(_ROOT_PATH, 'slurm/slurmdb.sql')
)
_CONFIGURABLE_SLURM_FILES = {
    'slurm': _SLURMCONF_FILE,
    'slurmdbd': _SLURMDBDCONF_FILE,
    'slurmdbsql': _SLURMDBSQL_FILE,
}


def initialize_globals(verbose):
    # type: (bool) -> None
    """Initialize any runtime globals
    :param bool verbose: verbose
    """
    global _RESOURCES_PATH
    if _RESOURCES_PATH is None:
        _RESOURCES_PATH = _ROOT_PATH / 'resources'
        if not _RESOURCES_PATH.exists():
            _RESOURCES_PATH = pathlib.Path(
                tempfile.gettempdir()) / 'batch-shipyard-{}-resources'.format(
                    __version__)
            _RESOURCES_PATH.mkdir(parents=True, exist_ok=True)
        if verbose:
            logger.debug('initialized resources path to: {}'.format(
                _RESOURCES_PATH))


def populate_global_settings(config, fs_storage, pool_id=None, sc=None):
    # type: (dict, bool, str, settings.StorageCredentialsSettings) -> None
    """Populate global settings from config
    :param dict config: configuration dict
    :param bool fs_storage: adjust for fs context
    :param str pool_id: pool id override
    :param settings.StorageCredentialsSettings sc: storage creds
    """
    bs = settings.batch_shipyard_settings(config)
    if sc is None:
        sc = settings.credentials_storage(config, bs.storage_account_settings)
    if fs_storage:
        # set postfix to empty for now, it will be populated with the
        # storage cluster during the actual calls
        postfix = ''
        if util.is_not_empty(pool_id):
            raise ValueError('pool id specified for fs_storage')
    else:
        bc = settings.credentials_batch(config)
        if util.is_none_or_empty(pool_id):
            pool_id = settings.pool_id(config, lower=True)
        postfix = '-'.join((bc.account.lower(), pool_id))
    storage.set_storage_configuration(
        bs.storage_entity_prefix,
        postfix,
        sc.account,
        sc.account_key,
        sc.endpoint,
        bs.generated_sas_expiry_days)


def fetch_credentials_conf_from_keyvault(
        keyvault_client, keyvault_uri, keyvault_credentials_secret_id):
    # type: (azure.keyvault.KeyVaultClient, str, str) -> dict
    """Fetch a credentials conf from keyvault
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    :param str keyvault_credentials_secret_id: keyvault cred secret id
    :rtype: dict
    :return: credentials conf
    """
    if keyvault_uri is None:
        raise ValueError('credentials conf was not specified or is invalid')
    if keyvault_client is None:
        raise ValueError('no Azure KeyVault or AAD credentials specified')
    return keyvault.fetch_credentials_conf(
        keyvault_client, keyvault_uri, keyvault_credentials_secret_id)


def fetch_secrets_from_keyvault(keyvault_client, config):
    # type: (azure.keyvault.KeyVaultClient, dict) -> None
    """Fetch secrets with secret ids in config from keyvault
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    """
    if keyvault_client is not None:
        keyvault.parse_secret_ids(keyvault_client, config)


def fetch_storage_account_keys_from_aad(
        storage_mgmt_client, config, fs_storage):
    # type: (azure.mgmt.storage.StorageManagementClient, dict, bool) -> None
    """Fetch secrets with secret ids in config from keyvault
    :param azure.mgmt.storage.StorageManagementClient storage_mgmt_client:
        storage client
    :param dict config: configuration dict
    :param bool fs_storage: adjust for fs context
    """
    if storage.populate_storage_account_keys_from_aad(
            storage_mgmt_client, config):
        populate_global_settings(config, fs_storage)


def _download_file(desc, pkg, dldict):
    # type: (str, pathlib.Path, dict) -> None
    """Download a file and check sha256
    :param str desc: description
    :param pathlib.Path pkg: package
    :param dict dldict: download dict
    """
    logger.debug('downloading {} to {}'.format(desc, dldict['target']))
    response = requests.get(dldict['url'], stream=True)
    with pkg.open('wb') as f:
        for chunk in response.iter_content(chunk_size=_REQUEST_CHUNK_SIZE):
            if chunk:
                f.write(chunk)
    logger.debug('wrote {} bytes to {}'.format(pkg.stat().st_size, pkg))
    # check sha256
    if util.compute_sha256_for_file(pkg, False) != dldict['sha256']:
        raise RuntimeError('sha256 mismatch for {}'.format(pkg))


def _setup_nvidia_driver_package(config, vm_size):
    # type: (dict, str) -> pathlib.Path
    """Set up the nvidia driver package
    :param dict config: configuration dict
    :param str vm_size: vm size
    :rtype: pathlib.Path
    :return: package path
    """
    gpu_type = settings.get_gpu_type_from_vm_size(vm_size)
    pkg = _RESOURCES_PATH / _NVIDIA_DRIVER[gpu_type]['target']
    # check to see if package is downloaded
    if (not pkg.exists() or
            util.compute_sha256_for_file(pkg, False) !=
            _NVIDIA_DRIVER[gpu_type]['sha256']):
        # display license link
        if not util.confirm_action(
                config,
                msg=('agreement with License for Customer Use of NVIDIA '
                     'Software @ {}').format(_NVIDIA_DRIVER['license']),
                allow_auto=True):
            raise RuntimeError(
                'Cannot proceed with deployment due to non-agreement with '
                'license for NVIDIA driver')
        else:
            logger.info('NVIDIA Software License accepted')
        # download driver
        _download_file('NVIDIA driver', pkg, _NVIDIA_DRIVER[gpu_type])
    return pkg


def _setup_intel_mpi_rt_package(config, pool_settings):
    # type: (dict, settings.PoolSettings) -> pathlib.Path
    """Set up the intel mpi runtime package
    :param dict config: configuration dict
    :param settings.PoolSettings pool_settings: pool settings
    :rtype: pathlib.Path
    :return: package path
    """
    # only for native ubuntu rdma
    if (not settings.is_rdma_pool(pool_settings.vm_size) or
            not pool_settings.vm_configuration.offer ==
            'ubuntu-server-container-rdma'):
        return None
    pkg = _RESOURCES_PATH / _INTEL_MPI_RT_PACKAGE['target']
    # check to see if package is downloaded
    if (not pkg.exists() or
            util.compute_sha256_for_file(pkg, False) !=
            _INTEL_MPI_RT_PACKAGE['sha256']):
        # display license link
        if not util.confirm_action(
                config,
                msg=('agreement with Intel Simplified Software License @ '
                     '{}').format(_INTEL_MPI_RT_PACKAGE['license']),
                allow_auto=True):
            raise RuntimeError(
                'Cannot proceed with deployment due to non-agreement with '
                'license for Intel MPI Runtime')
        else:
            logger.info('Intel Simplified Software License accepted')
        # download package
        _download_file('Intel MPI Runtime', pkg, _INTEL_MPI_RT_PACKAGE)
    return pkg


def _setup_batch_insights_package(config, pool_settings):
    # type: (dict, settings.PoolSettings) -> pathlib.Path
    """Set up the Batch insights package
    :param dict config: configuration dict
    :param settings.PoolSettings pool_settings: pool settings
    :rtype: pathlib.Path
    :return: package path
    """
    if settings.is_windows_pool(config, pool_settings.vm_configuration):
        os = 'windows'
    else:
        os = 'linux'
    pkg = _RESOURCES_PATH / _BATCH_INSIGHTS[os]['target']
    # check to see if package is downloaded
    if (not pkg.exists() or
            util.compute_sha256_for_file(pkg, False) !=
            _BATCH_INSIGHTS[os]['sha256']):
        # download package
        _download_file('Batch Insights', pkg, _BATCH_INSIGHTS[os])
    return pkg


def _setup_lis_package(config, vm_size):
    # type: (dict, str) -> pathlib.Path
    """Set up the LIS package
    :param dict config: configuration dict
    :param str vm_size: vm size
    :rtype: pathlib.Path
    :return: package path
    """
    # check to see if lis is required first
    if not settings.is_lis_install_required(config, vm_size=vm_size):
        return None
    pkg = _RESOURCES_PATH / _LIS_PACKAGE['target']
    compact_pkg = _RESOURCES_PATH / _LIS_PACKAGE['target_compact']
    # check to see if package is downloaded
    if (not compact_pkg.exists() or not pkg.exists() or
            util.compute_sha256_for_file(pkg, False) !=
            _LIS_PACKAGE['sha256']):
        _download_file('LIS package', pkg, _LIS_PACKAGE)
        logger.debug('compacting LIS package')
        util.subprocess_with_output(
            'gunzip -f -k {}'.format(pkg), shell=True, suppress_output=True)
        tmp = pkg.parent / pkg.stem
        inter = pkg.parent / _LIS_PACKAGE['intermediate']
        tmp.replace(inter)
        util.subprocess_with_output(
            ('tar vf {} --wildcards --delete LISISO/Oracle* '
             '--delete LISISO/RHEL* --delete LISISO/CentOS5* '
             '--delete LISISO/CentOS6* --delete LISISO/CentOS70* '
             '--delete LISISO/CentOS71* --delete LISISO/CentOS72* '
             '--delete LISISO/CentOS73* ').format(inter),
            shell=True, suppress_output=True)
        util.subprocess_with_output(
            'gzip -f {}'.format(inter), shell=True, suppress_output=True)
        logger.debug('LIS package compacted: {} => {} bytes'.format(
            compact_pkg, compact_pkg.stat().st_size))
    return compact_pkg


def setup_prometheus_node_exporter():
    # type: (None) -> pathlib.Path
    """Setup node exporter package
    :return ne package
    """
    ne_pkg = _RESOURCES_PATH / _PROMETHEUS['node_exporter']['target']
    if (not ne_pkg.exists() or
            util.compute_sha256_for_file(ne_pkg, False) !=
            _PROMETHEUS['node_exporter']['sha256']):
        _download_file(
            'Prometheus Node Exporter', ne_pkg,
            _PROMETHEUS['node_exporter'])
    return ne_pkg


def _setup_prometheus_monitoring_tools(pool_settings):
    # type: settings.PoolSettings -> Tuple[pathlib.Path, pathlib.Path]
    """Setup the prometheus monitoring tools
    :param settings.PoolSettings pool_settings: pool settings
    :rtype: tuple
    :return: tuple of ne_pkg, ca_pkg
    """
    ne_pkg = None
    ca_pkg_compact = None
    if pool_settings.prometheus.ne_enabled:
        ne_pkg = setup_prometheus_node_exporter()
    if pool_settings.prometheus.ca_enabled:
        ca_pkg = _RESOURCES_PATH / _PROMETHEUS['cadvisor']['target']
        ca_pkg_compact = (
            _RESOURCES_PATH / _PROMETHEUS['cadvisor']['target_compact']
        )
        if (not ca_pkg.exists() or not ca_pkg_compact.exists() or
                util.compute_sha256_for_file(ca_pkg, False) !=
                _PROMETHEUS['cadvisor']['sha256']):
            _download_file('cAdvisor', ca_pkg, _PROMETHEUS['cadvisor'])
            logger.debug('compacting cAdvisor package')
            util.subprocess_with_output(
                'gzip -f -k {}'.format(ca_pkg), shell=True,
                suppress_output=True)
            logger.debug('cAdvisor package compacted: {} => {} bytes'.format(
                ca_pkg_compact, ca_pkg_compact.stat().st_size))
    return ne_pkg, ca_pkg_compact


def _generate_azure_mount_script_name(
        batch_account_name, pool_id, is_file_share, is_windows):
    # type: (str, str, bool, bool) -> pathlib.Path
    """Generate an azure blob/file mount script name
    :param str batch_account_name: batch account name
    :param str pool_id: pool id
    :param boo is_file_share: is file share
    :param bool is_windows: is windows
    :rtype: pathlib.Path
    :return: path to azure mount script
    """
    if is_file_share:
        prefix = 'azurefile'
    else:
        prefix = 'azureblob'
    return _RESOURCES_PATH / '{}-mount-{}-{}.{}'.format(
        prefix, batch_account_name.lower(), pool_id.lower(),
        'cmd' if is_windows else 'sh')


def _setup_azureblob_mounts(blob_client, config, bc):
    # type: (azure.storage.blob.BlockBlobService, dict,
    #        settings.BatchCredentials) -> tuple
    """Set up the Azure Blob container via blobfuse
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param settings.BatchCredentials bc: batch creds
    :rtype: tuple
    :return: (bin path, service file path, service env file path,
        volume creation script path)
    """
    tmpmount = settings.temp_disk_mountpoint(config)
    # construct mount commands
    cmds = []
    sdv = settings.global_resources_shared_data_volumes(config)
    for svkey in sdv:
        if settings.is_shared_data_volume_azure_blob(sdv, svkey):
            sa = settings.credentials_storage(
                config,
                settings.azure_storage_account_settings(sdv, svkey))
            cont = settings.azure_blob_container_name(sdv, svkey)
            hmp = settings.azure_blob_host_mount_path(sa.account, cont)
            sas = storage.create_blob_container_saskey(
                sa, cont, 'egress', create_container=True)
            tmpmp = '{}/blobfuse-tmp/{}-{}'.format(tmpmount, sa.account, cont)
            cmds.append('mkdir -p {}'.format(hmp))
            cmds.append('chmod 0770 {}'.format(hmp))
            cmds.append('mkdir -p {}'.format(tmpmp))
            cmds.append('chown _azbatch:_azbatchgrp {}'.format(tmpmp))
            cmds.append('chmod 0770 {}'.format(tmpmp))
            cmds.append('export AZURE_STORAGE_ACCOUNT="{}"'.format(sa.account))
            cmds.append('export AZURE_STORAGE_SAS_TOKEN="{}"'.format(sas))
            cmd = (
                'blobfuse {hmp} --container-name={cont} '
                '--tmp-path={tmpmp} -o allow_other'
            ).format(hmp=hmp, cont=cont, tmpmp=tmpmp)
            # add any additional mount options
            mo = settings.shared_data_volume_mount_options(sdv, svkey)
            if util.is_not_empty(mo):
                opts = []
                for opt in mo:
                    if opt.strip() == '-o allow_other':
                        continue
                    opts.append(opt)
                cmd = '{} {}'.format(cmd, ' '.join(opts))
            if '-o attr_timeout=' not in cmd:
                cmd = '{} -o attr_timeout=240'.format(cmd)
            if '-o entry_timeout=' not in cmd:
                cmd = '{} -o entry_timeout=240'.format(cmd)
            if '-o negative_timeout=' not in cmd:
                cmd = '{} -o negative_timeout=120'.format(cmd)
            cmds.append(cmd)
    # create file share mount command script
    if util.is_none_or_empty(cmds):
        raise RuntimeError('Generated Azure blob mount commands are invalid')
    volcreate = _generate_azure_mount_script_name(
        bc.account, settings.pool_id(config), False, False)
    newline = '\n'
    with volcreate.open('w', newline=newline) as f:
        f.write('#!/usr/bin/env bash')
        f.write(newline)
        f.write('set -e')
        f.write(newline)
        f.write('set -o pipefail')
        f.write(newline)
        for cmd in cmds:
            f.write(cmd)
            f.write(newline)
    return volcreate


def _setup_azurefile_mounts(blob_client, config, bc, is_windows):
    # type: (azure.storage.blob.BlockBlobService, dict,
    #        settings.BatchCredentials, bool) -> tuple
    """Set up the Azure File shares
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param settings.BatchCredentials bc: batch creds
    :param bool is_windows: is windows pool
    :rtype: tuple
    :return: (bin path, service file path, service env file path,
        volume creation script path)
    """
    # construct mount commands
    cmds = []
    sdv = settings.global_resources_shared_data_volumes(config)
    for svkey in sdv:
        if settings.is_shared_data_volume_azure_file(sdv, svkey):
            sa = settings.credentials_storage(
                config,
                settings.azure_storage_account_settings(sdv, svkey))
            share = settings.azure_file_share_name(sdv, svkey)
            hmp = settings.azure_file_host_mount_path(
                sa.account, share, is_windows)
            if is_windows:
                cmd = (
                    'net use \\\\{sa}.file.{ep}\{share} {sakey} '  # noqa
                    '/user:Azure\{sa}'
                ).format(
                    sa=sa.account, ep=sa.endpoint, share=share,
                    sakey=sa.account_key)
                cmds.append(cmd)
                cmd = (
                    'mklink /d {hmp} \\\\{sa}.file.{ep}\{share}'.format(  # noqa
                        hmp=hmp, sa=sa.account, ep=sa.endpoint, share=share)
                )
            else:
                cmd = (
                    'mount -t cifs //{sa}.file.{ep}/{share} {hmp} -o '
                    'vers=3.0,username={sa},password={sakey},'
                    'serverino'
                ).format(
                    sa=sa.account, ep=sa.endpoint, share=share, hmp=hmp,
                    sakey=sa.account_key)
                # add any additional mount options
                mo = settings.shared_data_volume_mount_options(sdv, svkey)
                if util.is_not_empty(mo):
                    opts = []
                    # retain backward compatibility with filemode/dirmode
                    # options from the old Azure File Docker volume driver
                    for opt in mo:
                        tmp = opt.split('=')
                        if tmp[0] == 'filemode':
                            opts.append('file_mode={}'.format(tmp[1]))
                        elif tmp[0] == 'dirmode':
                            opts.append('dir_mode={}'.format(tmp[1]))
                        else:
                            opts.append(opt)
                    cmd = '{},{}'.format(cmd, ','.join(opts))
            if not is_windows:
                cmds.append('mkdir -p {}'.format(hmp))
            cmds.append(cmd)
    # create file share mount command script
    if util.is_none_or_empty(cmds):
        raise RuntimeError('Generated Azure file mount commands are invalid')
    volcreate = _generate_azure_mount_script_name(
        bc.account, settings.pool_id(config), True, is_windows)
    newline = '\r\n' if is_windows else '\n'
    with volcreate.open('w', newline=newline) as f:
        if is_windows:
            f.write('@echo off')
            f.write(newline)
        else:
            f.write('#!/usr/bin/env bash')
            f.write(newline)
            f.write('set -e')
            f.write(newline)
            f.write('set -o pipefail')
            f.write(newline)
        for cmd in cmds:
            f.write(cmd)
            f.write(newline)
    return volcreate


def _create_storage_cluster_mount_args(
        compute_client, network_client, batch_mgmt_client, config, sc_id,
        bc, subnet_id):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient, dict, str,
    #        settings.BatchCredentials, str) -> Tuple[str, str]
    """Create storage cluster mount arguments
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param settings.BatchCredentials bc: batch creds
    :param str subnet_id: subnet id
    :rtype: tuple
    :return: (fstab mount, storage cluster arg)
    """
    # check for vnet/subnet presence
    if util.is_none_or_empty(subnet_id):
        raise RuntimeError(
            'cannot mount a storage cluster without a valid virtual '
            'network or subnet')
    # get remotefs settings
    rfs = settings.remotefs_settings(config, sc_id)
    sc = rfs.storage_cluster
    # perform checks
    vnet_subid, vnet_rg, _, vnet_name, subnet_name = \
        util.explode_arm_subnet_id(subnet_id)
    # check for same vnet name
    if vnet_name.lower() != sc.virtual_network.name.lower():
        raise RuntimeError(
            'cannot link storage cluster {} on virtual '
            'network {} with pool virtual network {}'.format(
                sc_id, sc.virtual_network.name, vnet_name))
    # cross check vnet resource group
    if vnet_rg.lower() != sc.virtual_network.resource_group.lower():
        raise RuntimeError(
            'cannot link storage cluster {} virtual network in resource group '
            '{} with pool virtual network in resource group {}'.format(
                sc_id, sc.virtual_network.resource_group, vnet_rg))
    # cross check vnet subscription id
    ba, _ = batch.get_batch_account(batch_mgmt_client, config)
    _ba_tmp = ba.id.lower().split('/')
    if vnet_subid.lower() != _ba_tmp[2]:
        raise RuntimeError(
            'cannot link storage cluster {} virtual network in subscription '
            '{} with pool virtual network in subscription {}'.format(
                sc_id, vnet_subid, _ba_tmp[2]))
    del _ba_tmp
    # construct host mount path
    host_mount_path = '{}/{}'.format(
        settings.get_host_mounts_path(False), sc_id)
    # return fstab and sc arg
    return remotefs.create_storage_cluster_mount_args(
        compute_client, network_client, config, sc_id, host_mount_path)


def _create_custom_linux_mount_args(config, mount_name):
    # type: (dict, str) -> str
    """Create a custom linux mount fstab entry
    :param dict config: configuration dict
    :param str mount_name: mount name
    :rtype: str
    :return: fstab entry
    """
    sdv = settings.global_resources_shared_data_volumes(config)
    fstab = settings.custom_linux_mount_fstab_options(sdv, mount_name)
    if 'noauto' in fstab.fs_mntops:
        raise RuntimeError(
            ('noauto cannot be specified as a mount option for custom '
             'linux mount {}').format(mount_name))
    elif 'auto' in fstab.fs_mntops:
        raise RuntimeError(
            ('auto cannot be specified as a mount option for custom '
             'linux mount {}').format(mount_name))
    fstab_mount = (
        '{fs_spec} {hmp}/{name} {fs_vfstype} noauto,{fs_mntops} {fs_freq} '
        '{fs_passno}').format(
            fs_spec=fstab.fs_spec,
            hmp=settings.get_host_mounts_path(False),
            name=mount_name,
            fs_vfstype=fstab.fs_vfstype,
            fs_mntops=fstab.fs_mntops,
            fs_freq=fstab.fs_freq,
            fs_passno=fstab.fs_passno)
    return fstab_mount


def _pick_node_agent_for_vm(batch_client, config, pool_settings):
    # type: (azure.batch.batch_service_client.BatchServiceClient,
    #        dict, settings.PoolSettings) -> (str, str)
    """Pick a node agent id for the vm
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param settings.PoolSettings pool_settings: pool settings
    :rtype: tuple
    :return: image reference to use, node agent id to use
    """
    publisher = pool_settings.vm_configuration.publisher
    offer = pool_settings.vm_configuration.offer
    sku = pool_settings.vm_configuration.sku
    # backward compat for CentOS HPC 7.1, 7.3, 7.4 and normal 7.4, 7.5, 7.6
    if publisher == 'openlogic':
        if ((offer == 'centos-hpc' and
                (sku == '7.1' or sku == '7.3' or sku == '7.4')) or
                (offer == 'centos' and
                 (sku == '7.4' or sku == '7.5' or sku == '7.6'))):
            return ({
                'publisher': publisher,
                'offer': offer,
                'sku': sku,
                'version': pool_settings.vm_configuration.version,
            }, 'batch.node.centos 7')
    # support windows server semi annual
    if (publisher == 'microsoftwindowsserver' and
            offer == 'windowsserversemiannual' and 'with-containers' in sku):
        return ({
            'publisher': publisher,
            'offer': offer,
            'sku': sku,
            'version': pool_settings.vm_configuration.version,
        }, 'batch.node.windows amd64')
    # pick latest sku
    node_agent_skus = batch_client.account.list_node_agent_skus()
    skus_to_use = [
        (nas, image_ref) for nas in node_agent_skus
        for image_ref in sorted(
            nas.verified_image_references,
            key=lambda item: item.sku
        )
        if image_ref.publisher.lower() == publisher and
        image_ref.offer.lower() == offer and
        image_ref.sku.lower() == sku
    ]
    try:
        sku_to_use, image_ref_to_use = skus_to_use[-1]
    except IndexError:
        raise RuntimeError(
            ('Could not find an Azure Batch Node Agent Sku for this '
             'offer={} publisher={} sku={}. You can list the valid and '
             'available Marketplace images with the command: pool '
             'listskus').format(
                 pool_settings.vm_configuration.offer,
                 pool_settings.vm_configuration.publisher,
                 pool_settings.vm_configuration.sku))
    # set image version to use
    image_ref_to_use.version = pool_settings.vm_configuration.version
    logger.info('deploying vm config: {}'.format(image_ref_to_use))
    return (image_ref_to_use, sku_to_use.id)


def _check_for_batch_aad(bc, rmsg):
    # type: (settings.BatchCredentialSettings, str) -> None
    """Check for Batch AAD
    :param settings.BatchCredentialsSettings bc: batch cred settings
    :param str rmsg: error message
    """
    if util.is_not_empty(bc.account_key):
        raise RuntimeError(
            'Cannot {} without Batch AAD credentials. Please ensure that '
            'an "account_key" is not specified under the "batch" section in '
            'credentials and an "aad" section is specified either directly '
            'under "credentials" or under "batch".')


def _pool_virtual_network_subnet_address_space_check(
        resource_client, network_client, config, pool_settings, bc):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict,
    #        settings.PoolSettings, settings.BatchCredentialsSettings) -> str
    """Pool Virtual Network and subnet address space check and create if
    specified
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param settings.PoolSettings pool_settings: pool settings
    :param settings.BatchCredentialsSettings bc: batch cred settings
    :rtype: str
    :return: subnet id
    """
    if (util.is_none_or_empty(pool_settings.virtual_network.arm_subnet_id) and
            util.is_none_or_empty(pool_settings.virtual_network.name)):
        logger.debug('no virtual network settings specified')
        return None
    # check if AAD is enabled
    _check_for_batch_aad(bc, 'allocate a pool with a virtual network')
    # get subnet object
    subnet_id = None
    if util.is_not_empty(pool_settings.virtual_network.arm_subnet_id):
        subnet_components = util.explode_arm_subnet_id(
            pool_settings.virtual_network.arm_subnet_id)
        logger.debug(
            ('arm subnet id breakdown: subid={} rg={} provider={} vnet={} '
             'subnet={}').format(
                 subnet_components[0], subnet_components[1],
                 subnet_components[2], subnet_components[3],
                 subnet_components[4]))
        subnet_id = pool_settings.virtual_network.arm_subnet_id
        if network_client is None:
            logger.info('using virtual network subnet id: {}'.format(
                subnet_id))
            logger.warning(
                'cannot perform IP space validation without a valid '
                'network_client, please specify management AAD credentials '
                'to allow pre-validation')
            return subnet_id
        # retrieve address prefix for subnet
        _subnet = network_client.subnets.get(
            subnet_components[1], subnet_components[3], subnet_components[4])
    else:
        if util.is_not_empty(pool_settings.virtual_network.resource_group):
            _vnet_rg = pool_settings.virtual_network.resource_group
        else:
            _vnet_rg = bc.resource_group
        # create virtual network and subnet if specified
        _, _subnet = resource.create_virtual_network_and_subnet(
            resource_client, network_client, _vnet_rg, bc.location,
            pool_settings.virtual_network)
        del _vnet_rg
        subnet_id = _subnet.id
    # ensure address prefix for subnet is valid
    tmp = _subnet.address_prefix.split('/')
    if len(tmp) <= 1:
        raise RuntimeError(
            'subnet address_prefix is invalid for Batch pools: {}'.format(
                _subnet.address_prefix))
    mask = int(tmp[-1])
    # subtract 5 for guideline and Azure numbering start
    allowable_addresses = (1 << (32 - mask)) - 5
    logger.debug('subnet {} mask is {} and allows {} addresses'.format(
        _subnet.name, mask, allowable_addresses))
    pool_total_vm_count = (
        pool_settings.vm_count.dedicated +
        pool_settings.vm_count.low_priority
    )
    if allowable_addresses < pool_total_vm_count:
        raise RuntimeError(
            ('subnet {} mask is {} and allows {} addresses but desired '
             'pool vm_count is {}').format(
                 _subnet.name, mask, allowable_addresses, pool_total_vm_count))
    elif int(allowable_addresses * 0.9) <= pool_total_vm_count:
        # if within 90% tolerance, warn user due to potential
        # address shortage if other compute resources are in this subnet
        if not util.confirm_action(
                config,
                msg=('subnet {} mask is {} and allows {} addresses '
                     'but desired pool vm_count is {}, proceed?').format(
                         _subnet.name, mask, allowable_addresses,
                         pool_total_vm_count)):
            raise RuntimeError('Pool deployment rejected by user')
    logger.info('using virtual network subnet id: {}'.format(subnet_id))
    return subnet_id


def _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.keyvault.KeyVaultClient, dict) -> None
    """Construct a pool add parameter object for create pool along with
    uploading resource files
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    """
    # check shared data volume mounts before proceeding to allocate
    azureblob_vd = False
    azurefile_vd = False
    gluster_on_compute = False
    storage_cluster_mounts = []
    custom_linux_mounts = []
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_azure_file(sdv, sdvkey):
                azurefile_vd = True
            elif settings.is_shared_data_volume_azure_blob(sdv, sdvkey):
                azureblob_vd = True
            elif settings.is_shared_data_volume_gluster_on_compute(
                    sdv, sdvkey):
                if gluster_on_compute:
                    raise ValueError(
                        'only one glusterfs on compute can be created')
                gluster_on_compute = True
            elif settings.is_shared_data_volume_storage_cluster(sdv, sdvkey):
                storage_cluster_mounts.append(sdvkey)
            elif settings.is_shared_data_volume_custom_linux_mount(
                    sdv, sdvkey):
                custom_linux_mounts.append(sdvkey)
            else:
                raise ValueError('Unknown shared data volume: {}'.format(
                    settings.shared_data_volume_driver(sdv, sdvkey)))
    except KeyError:
        pass
    # retrieve settings
    pool_settings = settings.pool_settings(config)
    native = settings.is_native_docker_pool(
        config, vm_config=pool_settings.vm_configuration)
    is_windows = settings.is_windows_pool(
        config, vm_config=pool_settings.vm_configuration)
    # get autoscale settings
    if settings.is_pool_autoscale_enabled(config, pas=pool_settings.autoscale):
        asenable = True
        asformula = autoscale.get_formula(pool_settings)
        asei = pool_settings.autoscale.evaluation_interval
        if pool_settings.resize_timeout is not None:
            logger.warning(
                'ignoring resize timeout for autoscale-enabled pool')
    else:
        asenable = False
        asformula = None
        asei = None
    logger.debug('autoscale enabled: {}'.format(asenable))
    # task scheduling policy settings
    if util.is_not_empty(pool_settings.node_fill_type):
        task_scheduling_policy = batchmodels.TaskSchedulingPolicy(
            node_fill_type=batchmodels.ComputeNodeFillType(
                pool_settings.node_fill_type),
        )
    else:
        task_scheduling_policy = None
    # custom image settings
    custom_image_na = settings.pool_custom_image_node_agent(config)
    # check for virtual network settings
    bc = settings.credentials_batch(config)
    subnet_id = _pool_virtual_network_subnet_address_space_check(
        resource_client, network_client, config, pool_settings, bc)
    # construct fstab mounts for storage clusters
    sc_fstab_mounts = []
    sc_args = []
    if util.is_not_empty(storage_cluster_mounts):
        for sc_id in storage_cluster_mounts:
            fm, sca = _create_storage_cluster_mount_args(
                compute_client, network_client, batch_mgmt_client, config,
                sc_id, bc, subnet_id)
            sc_fstab_mounts.append(fm)
            sc_args.append(sca)
        if settings.verbose(config):
            logger.debug('storage cluster args: {}'.format(sc_args))
    del storage_cluster_mounts
    # constrcut fstab mounts for custom mounts
    custom_linux_fstab_mounts = []
    if util.is_not_empty(custom_linux_mounts):
        for id in custom_linux_mounts:
            custom_linux_fstab_mounts.append(
                _create_custom_linux_mount_args(config, id))
    del custom_linux_mounts
    # add encryption cert to account if specified
    encrypt = settings.batch_shipyard_encryption_enabled(config)
    if encrypt:
        pfx = crypto.get_encryption_pfx_settings(config)
        batch.add_certificate_to_account(
            batch_client, config, None, False, False, None)
    # shipyard settings
    bs = settings.batch_shipyard_settings(config)
    # delay docker image preload settings
    delay_image_preload = False
    if bs.delay_docker_image_preload:
        if native:
            if is_windows:
                logger.warning(
                    'cannot delay docker image preload for windows '
                    'native pools')
            else:
                delay_image_preload = True
        else:
            logger.debug(
                'delay docker image preload specified for non-native pools, '
                'which is the default behavior for these pools')
    # construct block list
    block_for_gr = None
    if pool_settings.block_until_all_global_resources_loaded:
        block_for_gr_docker = ''
        block_for_gr_singularity = ''
        docker_images = settings.global_resources_docker_images(config)
        if len(docker_images) > 0:
            block_for_gr_docker = ','.join([x for x in docker_images])
        singularity_images = settings.global_resources_singularity_images(
            config)
        if len(singularity_images) > 0:
            block_for_gr_singularity = ','.join(
                [util.singularity_image_name_on_disk(x)
                 for x in singularity_images])
        if (util.is_none_or_empty(block_for_gr_docker) and
                util.is_none_or_empty(block_for_gr_singularity)):
            logger.warning(
                'no Docker and Singularity images specified in global '
                'resources')
        # native pools without delay will auto preload
        if native and not delay_image_preload:
            block_for_gr_docker = ''
        block_for_gr = '{}#{}'.format(
            block_for_gr_docker, block_for_gr_singularity)
    # data replication and peer-to-peer settings
    dr = settings.data_replication_settings(config)
    # create torrent flags
    torrentflags = '{}:{}:{}:{}'.format(
        dr.peer_to_peer.enabled, dr.concurrent_source_downloads,
        dr.peer_to_peer.direct_download_seed_bias,
        dr.peer_to_peer.compression)
    # create resource files list
    if is_windows:
        _rflist = [_REGISTRY_LOGIN_WINDOWS_FILE, _BLOBXFER_WINDOWS_FILE]
    else:
        _rflist = [_REGISTRY_LOGIN_FILE, _BLOBXFER_FILE]
    if (not native or delay_image_preload) and not is_windows:
        _rflist.append(_IMAGE_BLOCK_FILE)
        if not bs.use_shipyard_docker_image:
            _rflist.append(_CASCADE_FILE)
            _rflist.append(_CASCADE_REQ_FILE)
        if bs.store_timing_metrics:
            _rflist.append(_PERF_FILE)
    if pool_settings.ssh.hpn_server_swap:
        _rflist.append(_HPNSSH_FILE)
    # handle azure mounts
    if azureblob_vd:
        abms = _setup_azureblob_mounts(blob_client, config, bc)
        _rflist.append(('azureblob-mount.sh', abms))
    if azurefile_vd:
        afms = _setup_azurefile_mounts(blob_client, config, bc, is_windows)
        _rflist.append(
            ('azurefile-mount.{}'.format('cmd' if is_windows else 'sh'), afms)
        )
    # lis settings
    if (not is_windows and not native and
            util.is_none_or_empty(custom_image_na)):
        lis_pkg = _setup_lis_package(config, pool_settings.vm_size)
        if lis_pkg is not None:
            _rflist.append((lis_pkg.name, lis_pkg))
    else:
        lis_pkg = None
    # intel mpi rt settings
    if (not is_windows and native and
            util.is_none_or_empty(custom_image_na)):
        intel_mpi_rt_pkg = _setup_intel_mpi_rt_package(config, pool_settings)
        if intel_mpi_rt_pkg is not None:
            _rflist.append((intel_mpi_rt_pkg.name, intel_mpi_rt_pkg))
    else:
        intel_mpi_rt_pkg = None
    # gpu settings
    if (not native and settings.is_gpu_pool(pool_settings.vm_size) and
            util.is_none_or_empty(custom_image_na)):
        if pool_settings.gpu_driver is None:
            gpu_driver = _setup_nvidia_driver_package(
                config, pool_settings.vm_size)
            _rflist.append((gpu_driver.name, gpu_driver))
        else:
            gpu_type = settings.get_gpu_type_from_vm_size(
                pool_settings.vm_size)
            gpu_driver = pathlib.Path(_NVIDIA_DRIVER[gpu_type]['target'])
        gpu_env = '{}:{}'.format(
            settings.is_gpu_visualization_pool(pool_settings.vm_size),
            gpu_driver.name)
    else:
        gpu_env = None
    # prometheus settings
    if (not is_windows and
            (pool_settings.prometheus.ne_enabled or
             pool_settings.prometheus.ca_enabled)):
        ne_pkg, ca_pkg = _setup_prometheus_monitoring_tools(pool_settings)
        if pool_settings.prometheus.ne_enabled:
            _rflist.append((ne_pkg.name, ne_pkg))
        if pool_settings.prometheus.ca_enabled:
            _rflist.append((ca_pkg.name, ca_pkg))
    # batch insights settings
    if pool_settings.batch_insights_enabled:
        if (util.is_none_or_empty(bc.app_insights_application_id) or
                util.is_none_or_empty(bc.app_insights_instrumentation_key)):
            raise ValueError(
                'Application Insights Instrumentation Key or Application '
                'Id is invalid. Please specify the proper values in '
                'credentials under batch')
        bi_pkg = _setup_batch_insights_package(config, pool_settings)
        _rflist.append((bi_pkg.name, bi_pkg))
    # get container registries
    docker_registries = settings.docker_registries(config)
    # set additional start task commands (pre version)
    start_task = pool_settings.additional_node_prep.commands_pre
    # set vm configuration
    if native:
        if util.is_not_empty(custom_image_na):
            # check if AAD is enabled
            _check_for_batch_aad(bc, 'allocate a pool with a custom image')
            vmconfig = batchmodels.VirtualMachineConfiguration(
                image_reference=batchmodels.ImageReference(
                    virtual_machine_image_id=pool_settings.
                    vm_configuration.arm_image_id,
                ),
                node_agent_sku_id=pool_settings.vm_configuration.node_agent,
                license_type=pool_settings.vm_configuration.license_type,
            )
            logger.debug(
                ('deploying custom image to pool in native mode: {} '
                 'node agent: {}').format(
                     vmconfig.image_reference.virtual_machine_image_id,
                     vmconfig.node_agent_sku_id))
        else:
            image_ref, na_ref = _pick_node_agent_for_vm(
                batch_client, config, pool_settings)
            vmconfig = batchmodels.VirtualMachineConfiguration(
                image_reference=image_ref,
                node_agent_sku_id=na_ref,
                license_type=pool_settings.vm_configuration.license_type,
            )
            logger.debug('deploying pool in native mode')
        # attach container config
        vmconfig.container_configuration = batchmodels.ContainerConfiguration(
            container_image_names=settings.global_resources_docker_images(
                config) if not delay_image_preload else None,
            container_registries=docker_registries
            if not delay_image_preload else None,
        )
    elif util.is_not_empty(custom_image_na):
        # check if AAD is enabled
        _check_for_batch_aad(bc, 'allocate a pool with a custom image')
        vmconfig = batchmodels.VirtualMachineConfiguration(
            image_reference=batchmodels.ImageReference(
                virtual_machine_image_id=pool_settings.
                vm_configuration.arm_image_id,
            ),
            node_agent_sku_id=pool_settings.vm_configuration.node_agent,
            license_type=pool_settings.vm_configuration.license_type,
        )
        logger.debug('deploying custom image: {} node agent: {}'.format(
            vmconfig.image_reference.virtual_machine_image_id,
            vmconfig.node_agent_sku_id))
    else:
        image_ref, na_ref = _pick_node_agent_for_vm(
            batch_client, config, pool_settings)
        vmconfig = batchmodels.VirtualMachineConfiguration(
            image_reference=image_ref,
            node_agent_sku_id=na_ref,
            license_type=pool_settings.vm_configuration.license_type,
        )
    # modify rflist and start task for node prep script
    if is_windows:
        if util.is_not_empty(custom_image_na):
            raise RuntimeError(
                'Native mode and Windows custom images is not supported')
        _rflist.append(_NODEPREP_WINDOWS_FILE)
        # create start task commandline
        start_task.append(
            ('powershell -ExecutionPolicy Unrestricted -command '
             '{npf}{a}{e}{q}{u}{v}{x}').format(
                 npf=_NODEPREP_WINDOWS_FILE[0],
                 a=' -a' if azurefile_vd else '',
                 e=' -e {}'.format(pfx.sha1) if encrypt else '',
                 q=' -q' if pool_settings.batch_insights_enabled else '',
                 u=' -u' if util.is_not_empty(custom_image_na) else '',
                 v=' -v {}'.format(__version__),
                 x=' -x {}'.format(data._BLOBXFER_VERSION))
        )
    else:
        _rflist.append(_NODEPREP_FILE)
        # create start task commandline
        start_task.append(
            ('{npf}{a}{b}{c}{d}{e}{f}{g}{i}{j}{k}{lis}{m}{n}{o}{p}{q}{r}{s}'
             '{t}{u}{v}{w}{x}{y}{z}').format(
                npf=_NODEPREP_FILE[0],
                a=' -a' if azurefile_vd else '',
                b=' -b' if util.is_not_empty(block_for_gr) else '',
                c=' -c' if azureblob_vd else '',
                d=' -d' if bs.use_shipyard_docker_image else '',
                e=' -e {}'.format(pfx.sha1) if encrypt else '',
                f=' -f' if gluster_on_compute else '',
                g=' -g {}'.format(gpu_env) if gpu_env is not None else '',
                i=' -i {}'.format(misc._SINGULARITY_VERSION) if (
                    'singularity' in
                    pool_settings.container_runtimes_install) else '',
                j=' -j' if delay_image_preload else '',
                k=' -k' if ('kata_containers' in
                            pool_settings.container_runtimes_install) else '',
                lis=' -l {}'.format(
                    lis_pkg.name) if lis_pkg is not None else '',
                m=' -m {}'.format(','.join(sc_args)) if util.is_not_empty(
                    sc_args) else '',
                n=' -n' if native else '',
                o=' -o {}'.format(
                    bs.fallback_registry) if util.is_not_empty(
                        bs.fallback_registry) else '',
                p=' -p {}'.format(bs.storage_entity_prefix) if (
                    bs.storage_entity_prefix) else '',
                q=' -q' if pool_settings.batch_insights_enabled else '',
                r=' -r' if pool_settings.ssh.allow_docker_access else '',
                s=' -s {}'.format(torrentflags),
                t=' -t' if settings.can_tune_tcp(
                    pool_settings.vm_size) else '',
                u=' -u' if util.is_not_empty(custom_image_na) else '',
                v=' -v {}'.format(__version__),
                w=' -w' if pool_settings.ssh.hpn_server_swap else '',
                x=' -x {}'.format(data._BLOBXFER_VERSION),
                y=' -y' if pool_settings.per_job_auto_scratch else '',
                z=' -z {}'.format(pool_settings.container_runtimes_default),

            )
        )
    # upload resource files
    sas_urls = storage.upload_resource_files(blob_client, _rflist)
    del _rflist
    # remove temporary az mount files created
    if azureblob_vd:
        try:
            abms.unlink()
            pass
        except OSError:
            pass
    if azurefile_vd:
        try:
            afms.unlink()
        except OSError:
            pass
    # digest any input data
    addlcmds = data.process_input_data(
        config, _BLOBXFER_WINDOWS_FILE if is_windows else _BLOBXFER_FILE,
        settings.pool_specification(config))
    if addlcmds is not None:
        start_task.append(addlcmds)
    del addlcmds
    # add additional start task commands (post version)
    start_task.extend(pool_settings.additional_node_prep.commands_post)
    # create pool param
    pool = batchmodels.PoolAddParameter(
        id=pool_settings.id,
        virtual_machine_configuration=vmconfig,
        vm_size=pool_settings.vm_size,
        target_dedicated_nodes=(
            pool_settings.vm_count.dedicated if not asenable else None
        ),
        target_low_priority_nodes=(
            pool_settings.vm_count.low_priority if not asenable else None
        ),
        resize_timeout=pool_settings.resize_timeout if not asenable else None,
        max_tasks_per_node=pool_settings.max_tasks_per_node,
        enable_inter_node_communication=pool_settings.
        inter_node_communication_enabled,
        start_task=batchmodels.StartTask(
            command_line=util.wrap_commands_in_shell(
                start_task, windows=is_windows, wait=False),
            user_identity=batch._RUN_ELEVATED,
            wait_for_success=True,
            environment_settings=[
                batchmodels.EnvironmentSetting(
                    name='LC_ALL', value='en_US.UTF-8'),
            ],
            resource_files=[],
        ),
        enable_auto_scale=asenable,
        auto_scale_formula=asformula,
        auto_scale_evaluation_interval=asei,
        metadata=[
            batchmodels.MetadataItem(
                name=settings.get_metadata_version_name(),
                value=__version__,
            ),
            batchmodels.MetadataItem(
                name='BATCH_SHIPYARD_NATIVE_CONTAINER_POOL',
                value='1' if native else '0',
            ),
        ],
        task_scheduling_policy=task_scheduling_policy,
        certificate_references=[]
    )
    if encrypt:
        if is_windows:
            pool.certificate_references.append(
                batchmodels.CertificateReference(
                    thumbprint=pfx.sha1,
                    thumbprint_algorithm='sha1',
                    visibility=[
                        batchmodels.CertificateVisibility.start_task,
                        batchmodels.CertificateVisibility.task,
                    ]
                )
            )
        else:
            pool.certificate_references.append(
                batchmodels.CertificateReference(
                    thumbprint=pfx.sha1,
                    thumbprint_algorithm='sha1',
                    visibility=[batchmodels.CertificateVisibility.start_task]
                )
            )
    if util.is_not_empty(pool_settings.certificates):
        pool.certificate_references.extend(pool_settings.certificates)
    for rf in sas_urls:
        pool.start_task.resource_files.append(
            batchmodels.ResourceFile(
                file_path=rf,
                http_url=sas_urls[rf])
        )
    if not native or delay_image_preload:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SHIPYARD_STORAGE_ENV',
                value=crypto.encrypt_string(
                    encrypt,
                    '{}:{}:{}'.format(
                        storage.get_storageaccount(),
                        storage.get_storageaccount_endpoint(),
                        storage.get_storageaccount_key()),
                    config
                )
            )
        )
    if not native:
        if pool_settings.gpu_driver and util.is_none_or_empty(custom_image_na):
            pool.start_task.resource_files.append(
                batchmodels.ResourceFile(
                    file_path=gpu_driver.name,
                    http_url=pool_settings.gpu_driver,
                    file_mode='0755')
            )
    # add any additional specified resource files
    if util.is_not_empty(pool_settings.resource_files):
        for rf in pool_settings.resource_files:
            pool.start_task.resource_files.append(
                batchmodels.ResourceFile(
                    file_path=rf.file_path,
                    http_url=rf.blob_source,
                    file_mode=rf.file_mode,
                )
            )
    # network settings (subnet and/or remote access control)
    if (subnet_id is not None or
            pool_settings.remote_access_control.allow is not None or
            pool_settings.remote_access_control.deny is not None):
        priority = 150
        rules = []
        inp = batchmodels.InboundNATPool(
            name='BatchShipyard-RemoteAccessControl',
            protocol=pool_settings.remote_access_control.protocol,
            backend_port=pool_settings.remote_access_control.backend_port,
            frontend_port_range_start=pool_settings.remote_access_control.
            starting_port,
            frontend_port_range_end=pool_settings.remote_access_control.
            starting_port + 999,
            network_security_group_rules=rules,
        )
        if pool_settings.remote_access_control.allow is not None:
            for ar in pool_settings.remote_access_control.allow:
                rules.append(batchmodels.NetworkSecurityGroupRule(
                    priority=priority,
                    access=batchmodels.NetworkSecurityGroupRuleAccess.allow,
                    source_address_prefix=ar,
                ))
                priority += 1
        if pool_settings.remote_access_control.deny is not None:
            for dr in pool_settings.remote_access_control.deny:
                rules.append(batchmodels.NetworkSecurityGroupRule(
                    priority=priority,
                    access=batchmodels.NetworkSecurityGroupRuleAccess.deny,
                    source_address_prefix=dr,
                ))
                priority += 1
        if util.is_not_empty(rules):
            pec = batchmodels.PoolEndpointConfiguration(
                inbound_nat_pools=[inp],
            )
        else:
            pec = None
        # add subnet and NAT rules
        pool.network_configuration = batchmodels.NetworkConfiguration(
            subnet_id=subnet_id,
            endpoint_configuration=pec,
        )
    # storage cluster settings
    if util.is_not_empty(sc_fstab_mounts):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SHIPYARD_STORAGE_CLUSTER_FSTAB',
                value='#'.join(sc_fstab_mounts)
            )
        )
        del sc_args
        del sc_fstab_mounts
    # custom linux mount settings
    if util.is_not_empty(custom_linux_fstab_mounts):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SHIPYARD_CUSTOM_MOUNTS_FSTAB',
                value='#'.join(custom_linux_fstab_mounts)
            )
        )
        del custom_linux_fstab_mounts
    # add optional environment variables
    if not native and bs.store_timing_metrics:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(name='SHIPYARD_TIMING', value='1')
        )
    # add docker login settings
    pool.start_task.environment_settings.extend(
        batch.generate_docker_login_settings(config)[0])
    # image preload setting
    if util.is_not_empty(block_for_gr):
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SHIPYARD_CONTAINER_IMAGES_PRELOAD',
                value=block_for_gr,
            )
        )
    # Linux-only settings
    if not is_windows:
        # singularity env vars
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SINGULARITY_TMPDIR',
                value=settings.get_singularity_tmpdir(config)
            )
        )
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='SINGULARITY_CACHEDIR',
                value=settings.get_singularity_cachedir(config)
            )
        )
        # prometheus env vars
        if pool_settings.prometheus.ne_enabled:
            pool.start_task.environment_settings.append(
                batchmodels.EnvironmentSetting(
                    name='PROM_NODE_EXPORTER_PORT',
                    value=pool_settings.prometheus.ne_port
                )
            )
            if util.is_not_empty(pool_settings.prometheus.ne_options):
                pool.start_task.environment_settings.append(
                    batchmodels.EnvironmentSetting(
                        name='PROM_NODE_EXPORTER_OPTIONS',
                        value=','.join(pool_settings.prometheus.ne_options)
                    )
                )
        if pool_settings.prometheus.ca_enabled:
            pool.start_task.environment_settings.append(
                batchmodels.EnvironmentSetting(
                    name='PROM_CADVISOR_PORT',
                    value=pool_settings.prometheus.ca_port
                )
            )
            if util.is_not_empty(pool_settings.prometheus.ca_options):
                pool.start_task.environment_settings.append(
                    batchmodels.EnvironmentSetting(
                        name='PROM_CADVISOR_OPTIONS',
                        value=','.join(pool_settings.prometheus.ca_options)
                    )
                )
    # batch insights
    if pool_settings.batch_insights_enabled:
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='APP_INSIGHTS_INSTRUMENTATION_KEY',
                value=bc.app_insights_instrumentation_key,
            )
        )
        pool.start_task.environment_settings.append(
            batchmodels.EnvironmentSetting(
                name='APP_INSIGHTS_APP_ID',
                value=bc.app_insights_application_id,
            )
        )
    # add custom env vars to the batch start task
    if util.is_not_empty(
            pool_settings.additional_node_prep.
            environment_variables_keyvault_secret_id):
        _check_keyvault_client(keyvault_client)
        env_vars = keyvault.get_secret(
            keyvault_client,
            pool_settings.additional_node_prep.
            environment_variables_keyvault_secret_id,
            value_is_json=True)
        env_vars = util.merge_dict(
            pool_settings.additional_node_prep.environment_variables,
            env_vars or {})
    else:
        env_vars = pool_settings.additional_node_prep.environment_variables
    if util.is_not_empty(env_vars):
        for key in env_vars:
            pool.start_task.environment_settings.append(
                batchmodels.EnvironmentSetting(name=key, value=env_vars[key])
            )
    del env_vars
    return (pool_settings, gluster_on_compute, pool)


def _construct_auto_pool_specification(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.keyvault.KeyVaultClient, dict) -> None
    """Construct an auto pool specification
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    """
    # upload resource files and construct pool add parameter object
    pool_settings, gluster_on_compute, pool = _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config)
    # convert pool add parameter object to a pool specification object
    poolspec = batchmodels.PoolSpecification(
        vm_size=pool.vm_size,
        virtual_machine_configuration=pool.virtual_machine_configuration,
        max_tasks_per_node=pool.max_tasks_per_node,
        task_scheduling_policy=pool.task_scheduling_policy,
        resize_timeout=pool.resize_timeout,
        target_dedicated_nodes=pool.target_dedicated_nodes,
        target_low_priority_nodes=pool.target_low_priority_nodes,
        enable_auto_scale=pool.enable_auto_scale,
        auto_scale_formula=pool.auto_scale_formula,
        auto_scale_evaluation_interval=pool.auto_scale_evaluation_interval,
        enable_inter_node_communication=pool.enable_inter_node_communication,
        network_configuration=pool.network_configuration,
        start_task=pool.start_task,
        certificate_references=pool.certificate_references,
        metadata=pool.metadata,
    )
    # add auto pool env var for cascade
    poolspec.start_task.environment_settings.append(
        batchmodels.EnvironmentSetting(name='SHIPYARD_AUTOPOOL', value='1')
    )
    return poolspec


def _add_pool(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.keyvault.KeyVaultClient, dict) -> None
    """Add a Batch pool to account
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    """
    # upload resource files and construct pool add parameter object
    pool_settings, gluster_on_compute, pool = _construct_pool_object(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config)
    # ingress data to Azure Blob Storage if specified
    storage_threads = []
    if pool_settings.transfer_files_on_pool_creation:
        storage_threads = data.ingress_data(
            batch_client, compute_client, network_client, config, rls=None,
            kind='storage')
    # create pool
    nodes = batch.create_pool(batch_client, blob_client, config, pool)
    _pool = batch_client.pool.get(pool.id)
    pool_current_vm_count = (
        _pool.current_dedicated_nodes + _pool.current_low_priority_nodes
    )
    pool_target_vm_count = (
        _pool.target_dedicated_nodes + _pool.target_low_priority_nodes
    )
    if util.is_none_or_empty(nodes) and pool_target_vm_count > 0:
        raise RuntimeError(
            ('No nodes could be allocated for pool: {}. If the pool is '
             'comprised entirely of low priority nodes, then there may not '
             'have been enough available capacity in the region to satisfy '
             'your request. Please inspect the pool for resize errors and '
             'issue pool resize to try again.').format(pool.id))
    # post allocation actions
    if pool_current_vm_count > 0:
        # set up gluster on compute if specified
        if gluster_on_compute:
            _setup_glusterfs(
                batch_client, blob_client, config, nodes, _GLUSTERPREP_FILE,
                cmdline=None)
        # create admin user on each node
        try:
            batch.add_rdp_user(batch_client, config, nodes)
        except Exception as e:
            logger.exception(e)
        try:
            batch.add_ssh_user(batch_client, config, nodes)
        except Exception as e:
            logger.exception(e)
            logger.error(
                'Could not add SSH users to nodes. Please ensure ssh-keygen '
                'is available in your PATH or cwd. Skipping data ingress if '
                'specified.')
        else:
            rls = None
            # ingress data to shared fs if specified
            if pool_settings.transfer_files_on_pool_creation:
                if rls is None:
                    rls = batch.get_remote_login_settings(
                        batch_client, config, nodes=nodes,
                        suppress_output=True)
                data.ingress_data(
                    batch_client, compute_client, network_client, config,
                    rls=rls, kind='shared',
                    total_vm_count=pool_current_vm_count)
            # log remote login settings
            if rls is None:
                if pool_current_vm_count <= 100:
                    batch.get_remote_login_settings(
                        batch_client, config, nodes=nodes,
                        suppress_output=False)
                else:
                    logger.info(
                        'Not listing remote login settings due to VM count. '
                        'If you need a list of remote login settings for all '
                        'nodes in the pool, issue the "pool nodes grls" '
                        'command.')
    # wait for storage ingress processes
    data.wait_for_storage_threads(storage_threads)


def _setup_glusterfs(
        batch_client, blob_client, config, nodes, shell_script, cmdline=None):
    # type: (batchsc.BatchServiceClient, azure.storage.blob.BlockBlobService,
    #        dict, List[batchmodels.ComputeNode], str, str) -> None
    """Setup glusterfs via multi-instance task
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param list nodes: list of nodes
    :param str shell_script: glusterfs setup script to use
    :param str cmdline: coordination cmdline
    """
    # get volume type/options
    voltype = None
    volopts = None
    sdv = settings.global_resources_shared_data_volumes(config)
    for sdvkey in sdv:
        try:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                voltype = settings.gluster_volume_type(sdv, sdvkey)
                volopts = settings.gluster_volume_options(sdv, sdvkey)
                break
        except KeyError:
            pass
    if voltype is None:
        raise RuntimeError('glusterfs volume not defined')
    pool_id = settings.pool_id(config)
    job_id = 'shipyard-glusterfs-{}'.format(uuid.uuid4())
    job = batchmodels.JobAddParameter(
        id=job_id,
        pool_info=batchmodels.PoolInformation(pool_id=pool_id),
    )
    # create coordination command line
    if cmdline is None:
        tempdisk = settings.temp_disk_mountpoint(config)
        cmdline = util.wrap_commands_in_shell([
            '$AZ_BATCH_TASK_DIR/{} {} {}'.format(
                shell_script[0], voltype.lower(), tempdisk)])
    # create application command line
    appcmd = [
        '[[ -f $AZ_BATCH_TASK_WORKING_DIR/.glusterfs_success ]] || exit 1',
    ]
    if volopts is not None:
        for vo in volopts:
            appcmd.append('gluster volume set {} {}'.format(
                settings.get_gluster_default_volume_name(), vo))
    # upload script
    sas_urls = storage.upload_resource_files(blob_client, [shell_script])
    # get pool current dedicated
    pool = batch_client.pool.get(pool_id)
    batchtask = batchmodels.TaskAddParameter(
        id='gluster-setup',
        multi_instance_settings=batchmodels.MultiInstanceSettings(
            number_of_instances=pool.current_dedicated_nodes,
            coordination_command_line=cmdline,
            common_resource_files=[
                batchmodels.ResourceFile(
                    file_path=shell_script[0],
                    http_url=sas_urls[shell_script[0]],
                    file_mode='0755'),
            ],
        ),
        command_line=util.wrap_commands_in_shell(appcmd),
        user_identity=batch._RUN_ELEVATED,
    )
    # add job and task
    batch_client.job.add(job)
    batch_client.task.add(job_id=job_id, task=batchtask)
    logger.debug(
        'waiting for glusterfs setup task {} in job {} to complete'.format(
            batchtask.id, job_id))
    # wait for gluster fs setup task to complete
    while True:
        batchtask = batch_client.task.get(job_id, batchtask.id)
        if batchtask.state == batchmodels.TaskState.completed:
            break
        time.sleep(1)
    # ensure all nodes have glusterfs success file
    if nodes is None:
        nodes = batch_client.compute_node.list(pool_id)
    success = True
    for node in nodes:
        try:
            batch_client.file.get_properties_from_compute_node(
                pool_id, node.id,
                ('workitems/{}/job-1/gluster-setup/wd/'
                 '.glusterfs_success').format(job_id))
        except batchmodels.BatchErrorException:
            logger.error('gluster success file absent on node {}'.format(
                node.id))
            success = False
            break
    # delete job
    batch_client.job.delete(job_id)
    if not success:
        raise RuntimeError('glusterfs setup failed')
    logger.info(
        'glusterfs setup task {} in job {} completed'.format(
            batchtask.id, job_id))


def _execute_command_on_pool_over_ssh_with_keyed_output(
        batch_client, config, pool, desc, cmd):
    # type: (batchsc.BatchServiceClient, dict, batchmodels.CloudPool, str,
    #        list) -> dict
    """Execute a command on all nodes in pool over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param batchmodels.CloudPool pool: cloud pool
    :param str desc: description of action
    :param list cmd: command
    :rtype: dict
    :return: keyed stdout by node id
    """
    # TODO NYI disable SSH commands with windows pools
    is_windows = settings.is_windows_pool(config)
    if is_windows:
        raise RuntimeError(
            '{} is currently not supported for windows pools'.format(desc))
    # check if there are nodes to run command on
    if (pool.current_dedicated_nodes == 0 and
            pool.current_low_priority_nodes == 0):
        logger.warning('pool {} has no compute nodes'.format(pool.id))
        return
    _pool = settings.pool_settings(config)
    # get ssh settings
    username = _pool.ssh.username
    if util.is_none_or_empty(username):
        raise ValueError(
            'cannot {} without an SSH username'.format(desc))
    ssh_private_key = _pool.ssh.ssh_private_key
    if ssh_private_key is None:
        ssh_private_key = pathlib.Path(
            _pool.ssh.generated_file_export_path, crypto.get_ssh_key_prefix())
    if not ssh_private_key.exists():
        raise RuntimeError('SSH private key file not found at: {}'.format(
            ssh_private_key))
    # set command
    command = ['sudo', '/bin/bash -c \'{}\''.format(' && '.join(cmd))]
    if settings.verbose(config):
        logger.debug('executing command: {}'.format(command))
    # iterate through all nodes
    nodes = batch_client.compute_node.list(pool.id)
    procs = {}
    stdout = {}
    stderr = {}
    failures = False
    for node in nodes:
        rls = batch_client.compute_node.get_remote_login_settings(
            pool.id, node.id)
        procs[node.id] = crypto.connect_or_exec_ssh_command(
            rls.remote_login_ip_address, rls.remote_login_port,
            ssh_private_key, username, sync=False, tty=False,
            command=command)
        if len(procs) >= 40:
            logger.debug('waiting for {} processes to complete'.format(
                len(procs)))
            for key in procs:
                stdout[key], stderr[key] = procs[key].communicate()
            rcs, _, _ = util.subprocess_wait_all(
                list(procs.values()), poll=True)
            if any([x != 0 for x in rcs]):
                if settings.verbose(config):
                    logger.warning('return codes: {}'.format(rcs))
                    logger.warning('stdout: {}'.format(stdout))
                    logger.warning('stderr: {}'.format(stderr))
                failures = True
            procs = []
            del rcs
    if len(procs) > 0:
        logger.debug('waiting for {} processes to complete'.format(
            len(procs)))
        for key in procs:
            stdout[key], stderr[key] = procs[key].communicate()
        rcs, _, _ = util.subprocess_wait_all(list(procs.values()), poll=True)
        if any([x != 0 for x in rcs]):
            if settings.verbose(config):
                logger.warning('return codes: {}'.format(rcs))
                logger.warning('stdout: {}'.format(stdout))
                logger.warning('stderr: {}'.format(stderr))
            failures = True
        del procs
        del rcs
    if failures:
        raise RuntimeError(
            'failures detected performing {} on pool: {}'.format(
                desc, pool.id))
    else:
        logger.info('{} completed for pool: {}'.format(desc, pool.id))
    return stdout


def _log_stdout_by_nodeid(config, pool_id, desc, stdout):
    # type: (dict, str, str, list) -> None
    """Log stdout returned by keyed SSH remote execution
    :param dict config: configuration dict
    :param str pool_id: pool id
    :param str desc: description
    :param list stdout: keyed stdout by node id
    """
    if settings.raw(config):
        raw = {
            'pool_id': pool_id,
            'nodes': {},
        }
        for key in stdout:
            raw['nodes'][key] = []
            for line in stdout[key].split('\n'):
                if len(line) == 0:
                    continue
                raw['nodes'][key].append(line)
        util.print_raw_json(raw)
    else:
        log = [
            'stdout for {} on pool {}'.format(desc, pool_id)
        ]
        for key in stdout:
            log.append('* node id: {}'.format(key))
            for line in stdout[key].split('\n'):
                if len(line) == 0:
                    continue
                log.append('  >> {}'.format(line))
        logger.info(os.linesep.join(log))


def _docker_system_prune_over_ssh(batch_client, config, volumes):
    # type: (batchsc.BatchServiceClient, dict, bool) -> None
    """Prune docker system data over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param bool volumes: remove volumes as well
    """
    pool_id = settings.pool_id(config)
    if not util.confirm_action(
            config,
            msg='prune all unused Docker data on pool {}'.format(pool_id)):
        return
    pool = batch_client.pool.get(pool_id)
    desc = 'prune unused data'
    cmd = [
        'docker system prune -f{}'.format(' --volumes' if volumes else '')
    ]
    stdout = _execute_command_on_pool_over_ssh_with_keyed_output(
        batch_client, config, pool, desc, cmd)
    _log_stdout_by_nodeid(config, pool_id, desc, stdout)


def _zap_all_container_processes_over_ssh(batch_client, config, remove, stop):
    # type: (batchsc.BatchServiceClient, dict, bool, bool) -> None
    """Zap all docker container processes over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param bool remove: remove exited containers as well
    :param bool stop: docker stop instead of kill
    """
    pool_id = settings.pool_id(config)
    if not util.confirm_action(
            config,
            msg='zap all Docker containers on pool {}'.format(pool_id)):
        return
    pool = batch_client.pool.get(pool_id)
    desc = 'zap all container processes'
    cmd = [
        'docker ps -q | xargs -r docker {}'.format('stop' if stop else 'kill'),
    ]
    if remove:
        cmd.append('docker ps -aq -f status=exited | xargs -r docker rm')
    stdout = _execute_command_on_pool_over_ssh_with_keyed_output(
        batch_client, config, pool, desc, cmd)
    _log_stdout_by_nodeid(config, pool_id, desc, stdout)


def _update_container_images(
        batch_client, config, docker_image=None, docker_image_digest=None,
        singularity_image=None, force_ssh=False):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool) -> None
    """Update container images in pool
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    :param str docker_image: docker image to update
    :param str docker_image_digest: digest to update to
    :param str singularity_image: singularity image to update
    :param bool force_ssh: force update over SSH
    """
    # first check that peer-to-peer is disabled for pool
    pool_id = settings.pool_id(config)
    try:
        if settings.data_replication_settings(config).peer_to_peer.enabled:
            raise RuntimeError(
                'cannot update container images for a pool with peer-to-peer '
                'image distribution')
    except KeyError:
        pass
    native = settings.is_native_docker_pool(config)
    if native and not force_ssh:
        logger.debug('forcing update via SSH due to native mode')
        force_ssh = True
    # if image is not specified use images from global config
    singularity_images = None
    if util.is_none_or_empty(docker_image):
        docker_images = settings.global_resources_docker_images(config)
    else:
        # log warning if it doesn't exist in global resources
        if docker_image not in settings.global_resources_docker_images(config):
            logger.warning(
                ('docker image {} is not specified as a global resource '
                 'for pool {}').format(docker_image, pool_id))
        if docker_image_digest is None:
            docker_images = [docker_image]
        else:
            docker_images = ['{}@{}'.format(docker_image, docker_image_digest)]
    if util.is_none_or_empty(singularity_image):
        singularity_images = settings.global_resources_singularity_images(
            config)
    else:
        # log warning if it doesn't exist in global resources
        if (singularity_image not in
                settings.global_resources_singularity_images(config)):
            logger.warning(
                ('singularity image {} is not specified as a global resource '
                 'for pool {}').format(singularity_image, pool_id))
        singularity_images = [singularity_image]
    if (util.is_none_or_empty(docker_images) and
            util.is_none_or_empty(singularity_images)):
        logger.error('no images detected or specified to update')
        return
    # get pool current dedicated
    pool = batch_client.pool.get(pool_id)
    # check pool current vms is > 0. There is no reason to run updateimages
    # if pool has no nodes in it. When the pool is resized up, the nodes
    # will always fetch either :latest if untagged or the latest :tag if
    # updated in the upstream registry
    if (pool.current_dedicated_nodes == 0 and
            pool.current_low_priority_nodes == 0):
        logger.warning(
            ('not executing updateimages command as the current number of '
             'compute nodes is zero for pool {}').format(pool_id))
        return
    # force ssh on some paths
    if not force_ssh:
        if pool.current_low_priority_nodes > 0:
            logger.debug('forcing update via SSH due to low priority nodes')
            force_ssh = True
        if (pool.current_dedicated_nodes > 1 and
                not pool.enable_inter_node_communication):
            logger.debug(
                'forcing update via SSH due to non-internode communicaton '
                'enabled pool')
            force_ssh = True
    # check pool metadata version
    if util.is_none_or_empty(pool.metadata):
        logger.warning('pool version metadata not present')
    else:
        for md in pool.metadata:
            if (md.name == settings.get_metadata_version_name() and
                    md.value != __version__):
                logger.warning(
                    'pool version metadata mismatch: pool={} cli={}'.format(
                        md.value, __version__))
                break
    # perform windows compat checks
    is_windows = settings.is_windows_pool(config)
    if is_windows:
        if force_ssh:
            raise RuntimeError('cannot update images via SSH on windows')
        if util.is_not_empty(singularity_images):
            raise RuntimeError(
                'invalid configuration: windows pool with singularity images')
    # create coordination command line
    # 1. log in again in case of cred expiry
    # 2. pull images with respect to registry
    # 3. tag images that are in a private registry
    # 4. prune docker images with no tag
    taskenv, coordcmd = batch.generate_docker_login_settings(config, force_ssh)
    if util.is_not_empty(docker_images):
        coordcmd.extend(['docker pull {}'.format(x) for x in docker_images])
        coordcmd.append(
            'docker images --filter dangling=true -q --no-trunc | '
            'xargs --no-run-if-empty docker rmi')
    if util.is_not_empty(singularity_images):
        coordcmd.extend([
            'export SINGULARITY_TMPDIR={}'.format(
                settings.get_singularity_tmpdir(config)),
            'export SINGULARITY_CACHEDIR={}'.format(
                settings.get_singularity_cachedir(config)),
        ])
        coordcmd.extend(
            ['singularity pull -F {}'.format(x) for x in singularity_images]
        )
        coordcmd.append('chown -R _azbatch:_azbatchgrp {}'.format(
            settings.get_singularity_cachedir(config)))
    if force_ssh:
        stdout = _execute_command_on_pool_over_ssh_with_keyed_output(
            batch_client, config, pool, 'update container images', coordcmd)
        _log_stdout_by_nodeid(config, pool_id, 'uci', stdout)
        return
    if not is_windows:
        # update taskenv for Singularity
        taskenv.append(
            batchmodels.EnvironmentSetting(
                name='SINGULARITY_TMPDIR',
                value=settings.get_singularity_tmpdir(config)
            )
        )
        taskenv.append(
            batchmodels.EnvironmentSetting(
                name='SINGULARITY_CACHEDIR',
                value=settings.get_singularity_cachedir(config)
            )
        )
    coordcmd = util.wrap_commands_in_shell(coordcmd, windows=is_windows)
    # create job for update
    job_id = 'shipyard-updateimages-{}'.format(uuid.uuid4())
    job = batchmodels.JobAddParameter(
        id=job_id,
        pool_info=batchmodels.PoolInformation(pool_id=pool_id),
    )
    # create task
    batchtask = batchmodels.TaskAddParameter(
        id='update-container-images',
        command_line=coordcmd,
        environment_settings=taskenv,
        user_identity=batch._RUN_ELEVATED,
    )
    # create multi-instance task for pools with more than 1 node
    if pool.current_dedicated_nodes > 1:
        batchtask.multi_instance_settings = batchmodels.MultiInstanceSettings(
            number_of_instances=pool.current_dedicated_nodes,
            coordination_command_line=coordcmd,
        )
        # create application command line
        if is_windows:
            appcmd = util.wrap_commands_in_shell(['rem'], windows=is_windows)
        else:
            appcmd = util.wrap_commands_in_shell([':'], windows=is_windows)
        batchtask.command_line = appcmd
    if settings.verbose(config):
        logger.debug('update command: {}'.format(coordcmd))
    # add job and task
    batch_client.job.add(job)
    batch_client.task.add(job_id=job_id, task=batchtask)
    logger.debug(
        ('waiting for update container images task {} in job {} '
         'to complete').format(batchtask.id, job_id))
    # wait for task to complete
    while True:
        batchtask = batch_client.task.get(job_id, batchtask.id)
        if batchtask.state == batchmodels.TaskState.completed:
            break
        time.sleep(1)
    # stream out the stdout file for diagnosis of issues
    batch.stream_file_and_wait_for_task(
        batch_client, config, filespec='{},{},{}'.format(
            job_id, batchtask.id, 'stdout.txt'))
    # clean up
    batch_client.job.delete(job_id)
    if batchtask.execution_info.exit_code != 0:
        raise RuntimeError('update container images job failed')
    logger.info(
        'update container images task {} in job {} completed'.format(
            batchtask.id, job_id))


def _docker_ps_over_ssh(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """docker ps in pool over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    """
    pool_id = settings.pool_id(config)
    pool = batch_client.pool.get(pool_id)
    desc = 'docker ps'
    cmd = ['docker ps -a']
    stdout = _execute_command_on_pool_over_ssh_with_keyed_output(
        batch_client, config, pool, desc, cmd)
    _log_stdout_by_nodeid(config, pool_id, desc, stdout)


def _list_docker_images(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """List Docker images in pool over ssh
    :param batch_client: The batch client to use.
    :type batch_client: `azure.batch.batch_service_client.BatchServiceClient`
    :param dict config: configuration dict
    """
    pool_id = settings.pool_id(config)
    pool = batch_client.pool.get(pool_id)
    cmd = ['docker images --format "{{.ID}} {{.Repository}}:{{.Tag}}"']
    stdout = _execute_command_on_pool_over_ssh_with_keyed_output(
        batch_client, config, pool, 'list docker images', cmd)
    # process stdout
    node_images = {}
    all_images = {}
    for key in stdout:
        node_images[key] = set()
        spout = stdout[key].split('\n')
        for out in spout:
            if util.is_not_empty(out):
                dec = out.split()
                if (not dec[1].startswith('alfpark/batch-shipyard') and
                        not dec[1].startswith('alfpark/blobxfer')):
                    node_images[key].add(dec[0])
                    if dec[0] not in all_images:
                        all_images[dec[0]] = dec[1]
    # find set intersection among all nodes
    intersecting_images = set.intersection(*list(node_images.values()))
    if settings.raw(config):
        raw = {
            'pool_id': pool.id,
            'common': {},
            'mismatched': {}
        }
        for key in intersecting_images:
            raw['common'][key] = all_images[key]
    else:
        logger.info(
            'Common Docker images across all nodes in pool {}:{}{}'.format(
                pool.id,
                os.linesep,
                os.linesep.join(
                    ['{} {}'.format(key, all_images[key])
                     for key in intersecting_images])
            ))
    # find mismatched images on nodes
    for node in node_images:
        images = set(node_images[node])
        diff = images.difference(intersecting_images)
        if len(diff) > 0:
            if settings.raw(config):
                if node not in raw['mismatched']:
                    raw['mismatched'][node] = {}
                for key in diff:
                    raw['mismatched'][node][key] = all_images[key]
            else:
                logger.warning(
                    'Docker images present only on node {}:{}{}'.format(
                        node, os.linesep,
                        os.linesep.join(
                            ['{} {}'.format(key, all_images[key])
                             for key in diff])
                    ))
    if settings.raw(config):
        util.print_raw_json(raw)


def _adjust_settings_for_pool_creation(config):
    # type: (dict) -> None
    """Adjust settings for pool creation
    :param dict config: configuration dict
    """
    # get settings
    pool = settings.pool_settings(config)
    publisher = settings.pool_publisher(config, lower=True)
    offer = settings.pool_offer(config, lower=True)
    sku = settings.pool_sku(config, lower=True)
    node_agent = settings.pool_custom_image_node_agent(config)
    if util.is_not_empty(node_agent) and util.is_not_empty(sku):
        raise ValueError(
            'cannot specify both a platform_image and a custom_image in the '
            'pool specification')
    is_windows = settings.is_windows_pool(config)
    bs = settings.batch_shipyard_settings(config)
    # enforce publisher/offer/sku restrictions
    allowed = False
    shipyard_container_required = True
    if publisher == 'microsoft-azure-batch':
        if offer == 'centos-container':
            allowed = True
        elif offer == 'centos-container-rdma':
            allowed = True
        elif offer == 'ubuntu-server-container':
            allowed = True
        elif offer == 'ubuntu-server-container-rdma':
            allowed = True
    elif publisher == 'canonical':
        if offer == 'ubuntuserver':
            if sku == '16.04-lts':
                allowed = True
                shipyard_container_required = False
            elif sku == '18.04-lts':
                allowed = True
                shipyard_container_required = False
    elif publisher == 'credativ':
        if offer == 'debian':
            if sku >= '9':
                allowed = True
    elif publisher == 'openlogic':
        if offer.startswith('centos'):
            if sku >= '7':
                allowed = True
    elif publisher == 'microsoftwindowsserver':
        if offer == 'windowsserver':
            if (sku == '2016-datacenter-with-containers' or
                    sku == '2019-datacenter-with-containers' or
                    sku == '2019-datacenter-with-containers-smalldisk' or
                    sku == '2019-datacenter-core-with-containers' or
                    sku == '2019-datacenter-core-with-containers-smalldisk'):
                allowed = True
        elif offer == 'windowsserversemiannual':
            if (sku == 'datacenter-core-1709-with-containers-smalldisk' or
                    sku == 'datacenter-core-1803-with-containers-smalldisk' or
                    sku == 'datacenter-core-1809-with-containers-smalldisk'):
                allowed = True
    # check if allowed for gpu (if gpu vm size)
    if allowed:
        allowed = settings.gpu_configuration_check(
            config, vm_size=pool.vm_size)
    if not allowed and util.is_none_or_empty(node_agent):
        raise ValueError(
            ('unsupported Docker (and/or GPU) Host VM Config, publisher={} '
             'offer={} sku={} vm_size={}').format(
                 publisher, offer, sku, pool.vm_size))
    # ensure HPC offers are matched with RDMA sizes
    if (not is_windows and (
            (offer == 'centos-hpc' or offer == 'sles-hpc' or
             offer == 'centos-container-rdma' or
             offer == 'ubuntu-server-container-rdma') and
            not settings.is_rdma_pool(pool.vm_size))):
        raise ValueError(
            ('cannot allocate an HPC VM config of publisher={} offer={} '
             'sku={} with a non-RDMA vm_size={}').format(
                 publisher, offer, sku, pool.vm_size))
    # compute total vm count
    pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
    # adjust for shipyard container requirement
    if (not bs.use_shipyard_docker_image and
            (shipyard_container_required or util.is_not_empty(node_agent))):
        settings.set_use_shipyard_docker_image(config, True)
        logger.debug(
            ('forcing shipyard docker image to be used due to '
             'VM config, publisher={} offer={} sku={}').format(
                 publisher, offer, sku))
    # re-read pool and data replication settings
    pool = settings.pool_settings(config)
    dr = settings.data_replication_settings(config)
    native = settings.is_native_docker_pool(
        config, vm_config=pool.vm_configuration)
    # ensure singularity images are not specified for native pools
    if native:
        images = settings.global_resources_singularity_images(config)
        if util.is_not_empty(images):
            raise ValueError(
                'cannot specify a native container pool with Singularity '
                'images as global resources')
    # ensure settings p2p/as/internode settings are compatible
    if dr.peer_to_peer.enabled:
        if native and not bs.delay_docker_image_preload:
            raise ValueError(
                'cannot enable peer-to-peer and native container pools')
        if settings.is_pool_autoscale_enabled(config, pas=pool.autoscale):
            raise ValueError('cannot enable peer-to-peer and autoscale')
        if pool.inter_node_communication_enabled:
            logger.warning(
                'force enabling inter-node communication due to peer-to-peer '
                'transfer')
            settings.set_inter_node_communication_enabled(config, True)
    # hpn-ssh can only be used for Ubuntu currently
    try:
        if (pool.ssh.hpn_server_swap and
                ((publisher != 'canonical' and offer != 'ubuntuserver') or
                 util.is_not_empty(node_agent))):
            logger.warning('cannot enable HPN SSH swap on {} {} {}'.format(
                publisher, offer, sku))
            settings.set_hpn_server_swap(config, False)
    except KeyError:
        pass
    # force disable block for global resources if ingressing data
    if (pool.transfer_files_on_pool_creation and
            pool.block_until_all_global_resources_loaded):
        logger.warning(
            'disabling block until all global resources loaded with '
            'transfer files on pool creation enabled')
        settings.set_block_until_all_global_resources_loaded(config, False)
    # re-read pool settings
    pool = settings.pool_settings(config)
    # ensure internode is not enabled for mix node pools
    if (pool.inter_node_communication_enabled and
            pool.vm_count.dedicated > 0 and pool.vm_count.low_priority > 0):
        raise ValueError(
            'inter node communication cannot be enabled with both '
            'dedicated and low priority nodes')
    # check shared data volume settings
    try:
        num_gluster = 0
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'glusterfs on compute is not supported on windows')
                if settings.is_pool_autoscale_enabled(
                        config, pas=pool.autoscale):
                    raise ValueError(
                        'glusterfs on compute cannot be installed on an '
                        'autoscale-enabled pool')
                if not pool.inter_node_communication_enabled:
                    # do not modify value and proceed since this interplays
                    # with p2p settings, simply raise exception and force
                    # user to reconfigure
                    raise ValueError(
                        'inter node communication in pool configuration '
                        'must be enabled for glusterfs on compute')
                if pool.vm_count.low_priority > 0:
                    raise ValueError(
                        'glusterfs on compute cannot be installed on pools '
                        'with low priority nodes')
                if pool.vm_count.dedicated <= 1:
                    raise ValueError(
                        'vm_count dedicated should exceed 1 for glusterfs '
                        'on compute')
                if pool.max_tasks_per_node > 1:
                    raise ValueError(
                        'max_tasks_per_node cannot exceed 1 for glusterfs '
                        'on compute')
                num_gluster += 1
                try:
                    if settings.gluster_volume_type(sdv, sdvkey) != 'replica':
                        raise ValueError(
                            'only replicated GlusterFS volumes are '
                            'currently supported')
                except KeyError:
                    pass
            elif settings.is_shared_data_volume_storage_cluster(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'storage cluster mounting is not supported on windows')
            elif settings.is_shared_data_volume_azure_blob(sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'azure blob mounting is not supported on windows')
            elif settings.is_shared_data_volume_custom_linux_mount(
                    sdv, sdvkey):
                if is_windows:
                    raise ValueError(
                        'custom linux mounting is not supported on windows')
        if num_gluster > 1:
            raise ValueError(
                'cannot create more than one GlusterFS on compute volume '
                'per pool')
    except KeyError:
        pass
    # check auto scratch settings
    if pool.per_job_auto_scratch:
        if is_windows:
            raise ValueError('per job auto scratch is only available on Linux')
        if not pool.inter_node_communication_enabled:
            raise ValueError(
                'inter node communication in pool configuration '
                'must be enabled for per job auto scratch')
        if (pool.virtual_network.arm_subnet_id is not None or
                pool.virtual_network.name is not None or
                pool.remote_access_control.allow is not None or
                pool.remote_access_control.deny is not None):
            logger.warning(
                'ensure that you allow SSH access within the virtual network '
                'or do not deny intranet SSH traffic for per job auto scratch')
    # check data ingress on pool creation on windows
    if is_windows and pool.transfer_files_on_pool_creation:
        raise ValueError(
            'cannot transfer files on pool creation to windows compute nodes')
    # check singularity images are not present for windows
    if (is_windows and util.is_not_empty(
            settings.global_resources_singularity_images(config))):
        raise ValueError('cannot deploy Singularity images on windows pools')
    # check pool count of 0 and remote login
    if pool_total_vm_count == 0:
        if is_windows:
            # TODO RDP check
            pass
        else:
            if util.is_not_empty(pool.ssh.username):
                logger.warning('cannot add SSH user with zero target nodes')
    # ensure unusable recovery is not enabled for custom image
    if (pool.attempt_recovery_on_unusable and
            not settings.is_platform_image(
                config, vm_config=pool.vm_configuration)):
        logger.warning(
            'override attempt recovery on unusable due to custom image')
        settings.set_attempt_recovery_on_unusable(config, False)
    # currently prometheus monitoring is only available on Linux nodes
    if (is_windows and
            (pool.prometheus.ne_enabled or pool.prometheus.ca_enabled)):
        raise ValueError(
            'Prometheus monitoring is only available for Linux nodes')
    # check container runtime compatibility
    if util.is_not_empty(pool.container_runtimes_install):
        if 'kata_containers' in pool.container_runtimes_install:
            if is_windows:
                raise ValueError(
                    'Cannot install kata_containers runtime on Windows')
            if not ((publisher == 'canonical' and offer == 'ubuntuserver') or
                    (publisher == 'openlogic' and
                     offer.startswith('centos')) or
                    publisher == 'microsoft-azure-batch'):
                raise ValueError(
                    'Cannot install kata_containers runtime on an '
                    'unsupported OS: {} {} {}'.format(publisher, offer, sku))
            if not settings.is_nested_virtualization_capable(pool.vm_size):
                raise ValueError(
                    'Cannot use kata_containers runtime on VMs that do not '
                    'support nested virtualization: {}'.format(pool.vm_size))


def _check_settings_for_auto_pool(config):
    # type: (dict) -> None
    """Check settings for autopool
    :param dict config: configuration dict
    """
    # check glusterfs on compute
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                raise ValueError(
                    'GlusterFS on compute is not possible with autopool')
                break
    except KeyError:
        pass
    # get settings
    pool = settings.pool_settings(config)
    # check local data movement to pool
    if pool.transfer_files_on_pool_creation:
        raise ValueError('Cannot ingress data on pool creation with autopool')
    # check ssh
    if util.is_not_empty(pool.ssh.username):
        logger.warning('cannot add SSH user with autopool')


def _check_resource_client(resource_client):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient) -> None
    """Check resource client validity"""
    if resource_client is None:
        raise RuntimeError(
            'resource management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_compute_client(compute_client):
    # type: (azure.mgmt.resource.compute.ComputeManagementClient) -> None
    """Check compute client validity"""
    if compute_client is None:
        raise RuntimeError(
            'compute management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_network_client(network_client):
    # type: (azure.mgmt.resource.network.NetworkManagementClient) -> None
    """Check network client validity"""
    if network_client is None:
        raise RuntimeError(
            'network management client is invalid, ensure you have '
            'specified proper "management" credentials')


def _check_keyvault_client(keyvault_client):
    # type: (azure.keyvault.KeyVaultClient) -> None
    """Check keyvault client validity"""
    if keyvault_client is None:
        raise RuntimeError(
            'keyvault client is invalid, ensure you have specified '
            'proper "keyvault" credentials')


def _check_batch_client(batch_client):
    # type: (batchsc.BatchServiceClient) -> None
    """Check batch client validity"""
    if batch_client is None:
        raise RuntimeError(
            'batch client is invalid, ensure you have specified '
            'proper "batch" credentials')


def action_account_info(batch_mgmt_client, config, name, resource_group):
    # type: (azure.mgmt.batch.BatchManagementClient, dict, str, str) -> None
    """Action: Account Info
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param dict config: configuration dict
    :param str account_name: account name
    :param str resource_group: resource group of Batch account
    """
    batch.log_batch_account_info(
        batch_mgmt_client, config, account_name=name,
        resource_group=resource_group)


def action_account_list(batch_mgmt_client, config, resource_group):
    # type: (azure.mgmt.batch.BatchManagementClient, dict, str) -> None
    """Action: Account List
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param dict config: configuration dict
    :param str resource_group: resource group limiter
    """
    batch.log_batch_account_list(
        batch_mgmt_client, config, resource_group=resource_group)


def action_account_quota(batch_mgmt_client, config, location):
    # type: (azure.mgmt.batch.BatchManagementClient, dict, str) -> None
    """Action: Account Quota
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param dict config: configuration dict
    :param str location: location
    """
    batch.log_batch_account_service_quota(
        batch_mgmt_client, config, location)


def action_fs_disks_add(resource_client, compute_client, config):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient, dict) -> None
    """Action: Fs Disks Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    remotefs.create_managed_disks(resource_client, compute_client, config)


def action_fs_disks_del(
        resource_client, compute_client, config, name, resource_group, all,
        delete_resource_group, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        str, bool, bool, bool) -> None
    """Action: Fs Disks Del
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str name: disk name
    :param str resource_group: resource group
    :param bool all: delete all in resource group
    :param bool delete_resource_group: delete resource group
    :param bool wait: wait for operation to complete
    """
    _check_compute_client(compute_client)
    remotefs.delete_managed_disks(
        resource_client, compute_client, config, name, resource_group, all,
        delete_resource_group, wait, confirm_override=False)


def action_fs_disks_list(
        compute_client, config, resource_group, restrict_scope):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Disks List
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str resource_group: resource group
    :param bool restrict_scope: restrict scope to config
    """
    _check_compute_client(compute_client)
    remotefs.list_disks(compute_client, config, resource_group, restrict_scope)


def action_fs_cluster_add(
        resource_client, compute_client, network_client, blob_client,
        config, storage_cluster_id):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str) -> None
    """Action: Fs Cluster Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    storage.set_storage_remotefs_container(storage_cluster_id)
    rfs = settings.remotefs_settings(config, storage_cluster_id)
    # add node exporter if enabled
    if rfs.storage_cluster.prometheus.ne_enabled:
        ne_pkg = setup_prometheus_node_exporter()
        _ALL_REMOTEFS_FILES.append((ne_pkg.name, ne_pkg))
    remotefs.create_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, _REMOTEFSPREP_FILE[0], _ALL_REMOTEFS_FILES)


def action_fs_cluster_resize(
        compute_client, network_client, blob_client, config,
        storage_cluster_id):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str) -> None
    """Action: Fs Cluster Resize
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.resize_storage_cluster(
        compute_client, network_client, blob_client, config,
        storage_cluster_id, _REMOTEFSPREP_FILE[0], _REMOTEFSADDBRICK_FILE[0],
        _ALL_REMOTEFS_FILES)


def action_fs_cluster_del(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, delete_all_resources, delete_data_disks,
        delete_virtual_network, generate_from_prefix, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str, bool, bool,
    #        bool, bool, bool) -> None
    """Action: Fs Cluster Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool delete_all_resources: delete all resources
    :param bool delete_data_disks: delete data disks
    :param bool delete_virtual_network: delete virtual network
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for deletion to complete
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if (generate_from_prefix and
            (delete_all_resources or delete_data_disks or
             delete_virtual_network)):
        raise ValueError(
            'Cannot specify generate_from_prefix and a delete_* option')
    storage.set_storage_remotefs_container(storage_cluster_id)
    remotefs.delete_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        storage_cluster_id, delete_data_disks=delete_data_disks,
        delete_virtual_network=delete_virtual_network,
        delete_resource_group=delete_all_resources,
        generate_from_prefix=generate_from_prefix, wait=wait)


def action_fs_cluster_expand(
        compute_client, network_client, config, storage_cluster_id, rebalance):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Expand
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool rebalance: rebalance filesystem
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if remotefs.expand_storage_cluster(
            compute_client, network_client, config, storage_cluster_id,
            _REMOTEFSPREP_FILE[0], rebalance):
        action_fs_cluster_status(
            compute_client, network_client, config, storage_cluster_id,
            detail=True, hosts=False)


def action_fs_cluster_suspend(
        compute_client, config, storage_cluster_id, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Suspend
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool wait: wait for suspension to complete
    """
    _check_compute_client(compute_client)
    remotefs.suspend_storage_cluster(
        compute_client, config, storage_cluster_id, wait)


def action_fs_cluster_start(
        compute_client, network_client, config, storage_cluster_id, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str,
    #        bool) -> None
    """Action: Fs Cluster Start
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool wait: wait for restart to complete
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.start_storage_cluster(
        compute_client, config, storage_cluster_id, wait)
    if wait:
        action_fs_cluster_status(
            compute_client, network_client, config, storage_cluster_id,
            detail=False, hosts=False)


def action_fs_cluster_status(
        compute_client, network_client, config, storage_cluster_id,
        detail, hosts):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, bool,
    #        bool) -> None
    """Action: Fs Cluster Status
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param bool detail: detailed status
    :param bool hosts: dump info for /etc/hosts
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    remotefs.stat_storage_cluster(
        compute_client, network_client, config, storage_cluster_id,
        _REMOTEFSSTAT_FILE[0], detail, hosts)


def action_fs_cluster_ssh(
        compute_client, network_client, config, storage_cluster_id,
        cardinal, hostname, tty, command):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, int,
    #        str, bool, tuple) -> None
    """Action: Fs Cluster Ssh
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str storage_cluster_id: storage cluster id
    :param int cardinal: cardinal number
    :param str hostname: hostname
    :param bool tty: allocate pseudo-tty
    :param tuple command: command
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if cardinal is not None and hostname is not None:
        raise ValueError('cannot specify both cardinal and hostname options')
    if cardinal is None and hostname is None:
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or hostname option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
        raise ValueError('invalid cardinal option value')
    remotefs.ssh_storage_cluster(
        compute_client, network_client, config, storage_cluster_id,
        cardinal, hostname, tty, command)


def action_keyvault_add(keyvault_client, config, keyvault_uri, name):
    # type: (azure.keyvault.KeyVaultClient, dict, str, str) -> None
    """Action: Keyvault Add
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    :param str keyvault_uri: keyvault uri
    :param str name: secret name
    """
    _check_keyvault_client(keyvault_client)
    keyvault.store_credentials_conf(
        keyvault_client, config, keyvault_uri, name)


def action_keyvault_del(keyvault_client, keyvault_uri, name):
    # type: (azure.keyvault.KeyVaultClient, str, str) -> None
    """Action: Keyvault Del
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    :param str name: secret name
    """
    _check_keyvault_client(keyvault_client)
    keyvault.delete_secret(keyvault_client, keyvault_uri, name)


def action_keyvault_list(keyvault_client, keyvault_uri):
    # type: (azure.keyvault.KeyVaultClient, str) -> None
    """Action: Keyvault List
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param str keyvault_uri: keyvault uri
    """
    _check_keyvault_client(keyvault_client)
    keyvault.list_secrets(keyvault_client, keyvault_uri)


def action_cert_create(config, file_prefix, pfx_password):
    # type: (dict, str, str) -> None
    """Action: Cert Create
    :param dict config: configuration dict
    :param str file_prefix: prefix of file to create
    :param str pfx_password: pfx password
    """
    sha1tp = crypto.generate_pem_pfx_certificates(
        config, file_prefix, pfx_password)
    logger.info('SHA1 Thumbprint: {}'.format(sha1tp))


def action_cert_add(
        batch_client, config, file, pem_no_certs, pem_public_key,
        pfx_password):
    # type: (batchsc.BatchServiceClient, dict, str, bool, bool, str) -> None
    """Action: Cert Add
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    :param dict config: configuration dict
    :param str file: file to add
    :param bool pem_no_certs: don't export certs from pem
    :param bool pem_public_key: only add public key from pem
    :param str pfx_password: pfx password
    """
    _check_batch_client(batch_client)
    if pem_public_key or pem_no_certs:
        if util.is_not_empty(file) and not file.lower().endswith('.pem'):
            raise ValueError(
                'cannot specify a --pem-* option and a non-PEM file')
        elif util.is_none_or_empty(file):
            raise ValueError('cannot specify --pem-* option and no file')
    batch.add_certificate_to_account(
        batch_client, config, file, pem_no_certs, pem_public_key, pfx_password)


def action_cert_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Cert List
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.list_certificates_in_account(batch_client, config)


def action_cert_del(batch_client, config, sha1):
    # type: (batchsc.BatchServiceClient, dict, List[str]) -> None
    """Action: Cert Del
    :param azure.batch.batch_service_client.BatchServiceClient: batch client
    :param dict config: configuration dict
    :param list sha1: list of sha1 thumbprints to delete
    """
    _check_batch_client(batch_client)
    batch.del_certificate_from_account(batch_client, config, sha1)


def action_pool_listskus(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Listskus
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.list_node_agent_skus(batch_client, config)


def action_pool_add(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, table_client, keyvault_client, config,
        recreate):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.keyvault.KeyVaultClient, dict, bool) -> None
    """Action: Pool Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    :param bool recreate: recreate
    """
    _check_batch_client(batch_client)
    # first check if pool exists to prevent accidential metadata clear or
    # if recreation is intentional
    pool_id = settings.pool_id(config)
    exists = batch.pool_exists(batch_client, pool_id)
    if exists:
        if recreate:
            if not util.confirm_action(
                    config,
                    msg='recreate existing pool {}'.format(pool_id)):
                return
            confirm_setting = settings.get_auto_confirm(config)
            settings.set_auto_confirm(config, True)
            logger.debug('pool deletion may take a while, please be patient')
            action_pool_delete(
                batch_client, blob_client, table_client, config,
                pool_id=pool_id, wait=True)
            settings.set_auto_confirm(config, confirm_setting)
            del confirm_setting
        else:
            raise RuntimeError(
                'attempting to create a pool that already exists: {}'.format(
                    pool_id))
    del exists
    _adjust_settings_for_pool_creation(config)
    storage.create_storage_containers(blob_client, table_client, config)
    storage.clear_storage_containers(blob_client, table_client, config)
    if settings.requires_populate_global_resources_storage(config):
        storage.populate_global_resource_blobs(
            blob_client, table_client, config)
    _add_pool(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, keyvault_client, config
    )


def action_pool_exists(batch_client, config, pool_id=None):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Pool Exists
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str pool_id: poolid to check
    """
    _check_batch_client(batch_client)
    if util.is_none_or_empty(pool_id):
        pool_id = settings.pool_id(config)
    if not batch.pool_exists(batch_client, pool_id):
        logger.info('pool {} does not exist'.format(pool_id))
        sys.exit(1)
    else:
        logger.info('pool {} exists'.format(pool_id))


def action_pool_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.list_pools(batch_client, config)


def action_pool_delete(
        batch_client, blob_client, table_client, config, pool_id=None,
        wait=False):
    # type: (batchsc.BatchServiceClient, azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict, str, bool) -> None
    """Action: Pool Delete
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param str pool_id: poolid to delete
    :param bool wait: wait for pool to delete
    """
    _check_batch_client(batch_client)
    deleted = False
    try:
        deleted = batch.del_pool(batch_client, config, pool_id=pool_id)
    except batchmodels.BatchErrorException as ex:
        if ('The specified pool does not exist' in ex.message.value or
                'The specified pool has been marked for deletion' in
                ex.message.value):
            deleted = True
        else:
            logger.exception(ex)
    if deleted:
        # reset storage settings to target poolid if required
        if util.is_not_empty(pool_id):
            populate_global_settings(config, False, pool_id=pool_id)
        else:
            pool_id = settings.pool_id(config)
        storage.cleanup_with_del_pool(
            blob_client, table_client, config, pool_id=pool_id)
        if wait:
            logger.debug('waiting for pool {} to delete'.format(pool_id))
            while batch_client.pool.exists(pool_id):
                time.sleep(3)


def action_pool_resize(batch_client, blob_client, config, wait):
    # type: (batchsc.BatchServiceClient, azure.storage.blob.BlockBlobService,
    #        dict, bool) -> None
    """Resize pool that may contain glusterfs
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param bool wait: wait for operation to complete
    """
    _check_batch_client(batch_client)
    pool = settings.pool_settings(config)
    # check direction of resize
    _pool = batch_client.pool.get(pool.id)
    if (pool.vm_count.dedicated == _pool.current_dedicated_nodes ==
            _pool.target_dedicated_nodes and
            pool.vm_count.low_priority == _pool.current_low_priority_nodes ==
            _pool.target_low_priority_nodes):
        logger.error(
            'pool {} is already at {} nodes'.format(pool.id, pool.vm_count))
        return
    resize_up_d = False
    resize_up_lp = False
    if pool.vm_count.dedicated > _pool.current_dedicated_nodes:
        resize_up_d = True
    if pool.vm_count.low_priority > _pool.current_low_priority_nodes:
        resize_up_lp = True
    del _pool
    create_ssh_user = False
    # try to get handle on public key, avoid generating another set
    # of keys
    if resize_up_d or resize_up_lp:
        if pool.ssh.username is None:
            logger.info('not creating ssh user on new nodes of pool {}'.format(
                pool.id))
        else:
            if pool.ssh.ssh_public_key is None:
                sfp = pathlib.Path(crypto.get_ssh_key_prefix() + '.pub')
                if sfp.exists():
                    logger.debug(
                        'setting public key for ssh user to: {}'.format(sfp))
                    settings.set_ssh_public_key(config, str(sfp))
                    create_ssh_user = True
                else:
                    logger.warning(
                        ('not creating ssh user for new nodes of pool {} as '
                         'an existing ssh public key cannot be found').format(
                             pool.id))
                    create_ssh_user = False
            else:
                create_ssh_user = True
    # check if this is a glusterfs-enabled pool
    gluster_present = False
    voltype = None
    try:
        sdv = settings.global_resources_shared_data_volumes(config)
        for sdvkey in sdv:
            if settings.is_shared_data_volume_gluster_on_compute(sdv, sdvkey):
                gluster_present = True
                try:
                    voltype = settings.gluster_volume_type(sdv, sdvkey)
                except KeyError:
                    pass
                break
    except KeyError:
        pass
    logger.debug('glusterfs shared volume present: {}'.format(
        gluster_present))
    if gluster_present:
        if resize_up_lp:
            raise RuntimeError(
                'cannot resize up a pool with glusterfs_on_compute and '
                'low priority nodes')
        logger.debug('forcing wait to True due to glusterfs')
        wait = True
    # cache old nodes
    old_nodes = {}
    if gluster_present or create_ssh_user:
        for node in batch_client.compute_node.list(pool.id):
            old_nodes[node.id] = node.ip_address
    # resize pool
    nodes = batch.resize_pool(batch_client, blob_client, config, wait)
    # add ssh user to new nodes if present
    if create_ssh_user and (resize_up_d or resize_up_lp):
        if wait:
            # get list of new nodes only
            new_nodes = [node for node in nodes if node.id not in old_nodes]
            # create admin user on each new node if requested
            batch.add_ssh_user(batch_client, config, nodes=new_nodes)
            # log remote login settings for new nodes
            batch.get_remote_login_settings(
                batch_client, config, nodes=new_nodes, suppress_output=False)
            del new_nodes
        else:
            logger.warning('ssh user was not added as --wait was not given')
    # add brick for new nodes
    if gluster_present and resize_up_d:
        # get pool current dedicated
        _pool = batch_client.pool.get(pool.id)
        # ensure current dedicated is the target
        if pool.vm_count.dedicated != _pool.current_dedicated_nodes:
            raise RuntimeError(
                ('cannot perform glusterfs setup on new nodes, unexpected '
                 'current dedicated {} to vm_count {}').format(
                     _pool.current_dedicated_nodes, pool.vm_count.dedicated))
        del _pool
        # get internal ip addresses of new nodes
        new_nodes = [
            node.ip_address for node in nodes if node.id not in old_nodes
        ]
        masterip = next(iter(old_nodes.values()))
        # get tempdisk mountpoint
        tempdisk = settings.temp_disk_mountpoint(config)
        # construct cmdline
        cmdline = util.wrap_commands_in_shell([
            '$AZ_BATCH_TASK_DIR/{} {} {} {} {} {}'.format(
                _GLUSTERRESIZE_FILE[0], voltype.lower(), tempdisk,
                pool.vm_count.dedicated, masterip, ' '.join(new_nodes))])
        # setup gluster
        _setup_glusterfs(
            batch_client, blob_client, config, nodes, _GLUSTERRESIZE_FILE,
            cmdline=cmdline)


def action_pool_nodes_grls(
        batch_client, config, no_generate_tunnel_script):
    # type: (batchsc.BatchServiceClient, dict, bool) -> None
    """Action: Pool Nodes Grls
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool no_generate_tunnel_script: disable generating tunnel script
    """
    _check_batch_client(batch_client)
    rls = batch.get_remote_login_settings(
        batch_client, config, nodes=None, suppress_output=False)
    if not no_generate_tunnel_script:
        batch.generate_ssh_tunnel_script(
            batch_client, config, None, nodes=None, rls=rls)


def action_pool_nodes_count(batch_client, config, poolid):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Pool Nodes Counts
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str poolid: pool id
    """
    _check_batch_client(batch_client)
    batch.get_node_counts(batch_client, config, poolid)


def action_pool_nodes_list(batch_client, config, start_task_failed, unusable):
    # type: (batchsc.BatchServiceClient, dict, bool, bool) -> None
    """Action: Pool Nodes List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool start_task_failed: nodes in start task failed
    :param bool unusable: nodes in unusable
    """
    _check_batch_client(batch_client)
    batch.list_nodes(
        batch_client, config, start_task_failed=start_task_failed,
        unusable=unusable)


def action_pool_nodes_zap(batch_client, config, remove, stop):
    # type: (batchsc.BatchServiceClient, dict, bool, bool) -> None
    """Action: Pool Nodes Zap
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool remove: remove containers all exited containers
    :param bool stop: docker stop instead of kill
    """
    _check_batch_client(batch_client)
    _zap_all_container_processes_over_ssh(batch_client, config, remove, stop)


def action_pool_nodes_prune(batch_client, config, volumes):
    # type: (batchsc.BatchServiceClient, dict, bool) -> None
    """Action: Pool Nodes Prune
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool volumes: remove volumes as well
    """
    _check_batch_client(batch_client)
    _docker_system_prune_over_ssh(batch_client, config, volumes)


def action_pool_nodes_ps(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Nodes Ps
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    _docker_ps_over_ssh(batch_client, config)


def action_pool_user_add(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool User Add
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    if settings.is_windows_pool(config):
        batch.add_rdp_user(batch_client, config)
    else:
        batch.add_ssh_user(batch_client, config)


def action_pool_user_del(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Pool Dru
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    if settings.is_windows_pool(config):
        batch.del_rdp_user(batch_client, config)
    else:
        batch.del_ssh_user(batch_client, config)


def action_pool_ssh(
        batch_client, config, cardinal, nodeid, tty, command,
        ssh_username=None, ssh_private_key=None):
    # type: (batchsc.BatchServiceClient, dict, int, str, bool, tuple, str,
    #        str) -> None
    """Action: Pool Ssh
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param int cardinal: cardinal node num
    :param str nodeid: node id
    :param bool tty: allocate pseudo-tty
    :param tuple command: command to execute
    :param str ssh_username: ssh username
    :param pathlib.Path ssh_private_key: ssh private key
    """
    _check_batch_client(batch_client)
    if cardinal is not None and util.is_not_empty(nodeid):
        raise ValueError('cannot specify both cardinal and nodeid options')
    if cardinal is None and util.is_none_or_empty(nodeid):
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or nodeid option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
        raise ValueError('invalid cardinal option value')
    pool = settings.pool_settings(config)
    if ssh_private_key is None:
        ssh_private_key = pool.ssh.ssh_private_key
        if ssh_private_key is None:
            ssh_private_key = pathlib.Path(
                pool.ssh.generated_file_export_path,
                crypto.get_ssh_key_prefix())
    if util.is_none_or_empty(ssh_username):
        ssh_username = pool.ssh.username
    ip, port = batch.get_remote_login_setting_for_node(
        batch_client, config, cardinal, nodeid)
    crypto.connect_or_exec_ssh_command(
        ip, port, ssh_private_key, ssh_username, tty=tty,
        command=command)


def action_pool_rdp(batch_client, config, cardinal, nodeid, no_auto=False):
    # type: (batchsc.BatchServiceClient, dict, int, str, bool) -> None
    """Action: Pool Rdp
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param int cardinal: cardinal node num
    :param str nodeid: node id
    :param bool no_auto: no auto login
    """
    _check_batch_client(batch_client)
    if cardinal is not None and util.is_not_empty(nodeid):
        raise ValueError('cannot specify both cardinal and nodeid options')
    if cardinal is None and util.is_none_or_empty(nodeid):
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or nodeid option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
        raise ValueError('invalid cardinal option value')
    pool = settings.pool_settings(config)
    ip, port = batch.get_remote_login_setting_for_node(
        batch_client, config, cardinal, nodeid)
    if not no_auto and util.is_not_empty(pool.rdp.password):
        rc = util.subprocess_with_output(
            'cmdkey.exe /generic:TERMSRV/{ip} /user:{user} /pass:{pw}'.format(
                ip=ip, port=port, user=pool.rdp.username,
                pw=pool.rdp.password),
            shell=True)
        if rc != 0:
            logger.warning('cmdkey exit code: {}'.format(rc))
    util.subprocess_nowait(
        'mstsc.exe /v:{ip}:{port}'.format(ip=ip, port=port), shell=True)
    if not no_auto and util.is_not_empty(pool.rdp.password):
        time.sleep(2)
        rc = util.subprocess_with_output(
            'cmdkey.exe /delete:TERMSRV/{}'.format(ip), shell=True)
        if rc != 0:
            logger.warning('cmdkey exit code: {}'.format(rc))


def action_pool_nodes_del(
        batch_client, config, all_start_task_failed, all_starting,
        all_unusable, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, bool, bool, list) -> None
    """Action: Pool Nodes Del
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all_start_task_failed: delete all start task failed nodes
    :param bool all_starting: delete all starting nodes
    :param bool all_unusable: delete all unusable nodes
    :param list nodeid: list of nodeids to delete
    """
    _check_batch_client(batch_client)
    if ((all_start_task_failed or all_starting or all_unusable) and
            util.is_not_empty(nodeid)):
        raise ValueError(
            'cannot specify all start task failed nodes or unusable with '
            'a specific node id')
    batch.del_nodes(
        batch_client, config, all_start_task_failed, all_starting,
        all_unusable, nodeid)


def action_pool_nodes_reboot(
        batch_client, config, all_start_task_failed, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, list) -> None
    """Action: Pool Nodes Reboot
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all_start_task_failed: reboot all start task failed nodes
    :param list nodeid: list of nodeids to reboot
    """
    _check_batch_client(batch_client)
    if all_start_task_failed and util.is_not_empty(nodeid):
        raise ValueError(
            'cannot specify all start task failed nodes with a specific '
            'node id')
    batch.reboot_nodes(batch_client, config, all_start_task_failed, nodeid)


def action_diag_logs_upload(
        batch_client, blob_client, config, cardinal, nodeid, generate_sas,
        wait):
    # type: (batchsc.BatchServiceClient, azure.storage.blob.BlockBlobService,
    #        dict, int, str, bool, bool) -> None
    """Action: Diag Logs Upload
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param int cardinal: cardinal node num
    :param str nodeid: node id
    :param bool generate_sas: generate SAS token
    :param bool wait: wait for upload to complete
    """
    _check_batch_client(batch_client)
    if cardinal is not None and util.is_not_empty(nodeid):
        raise ValueError('cannot specify both cardinal and nodeid options')
    if cardinal is None and util.is_none_or_empty(nodeid):
        logger.warning(
            'assuming node cardinal of 0 as no cardinal or nodeid option '
            'was specified')
        cardinal = 0
    if cardinal is not None and cardinal < 0:
        raise ValueError('invalid cardinal option value')
    batch.egress_service_logs(
        batch_client, blob_client, config, cardinal, nodeid, generate_sas,
        wait)


def action_pool_images_update(
        batch_client, config, docker_image, docker_image_digest,
        singularity_image, ssh):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool) -> None
    """Action: Pool Images Update
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str docker_image: docker image to update
    :param str docker_image_digest: docker image digest to update to
    :param str singularity_image: singularity image to update
    :param bool ssh: use direct SSH update mode
    """
    _check_batch_client(batch_client)
    if docker_image_digest is not None and docker_image is None:
        raise ValueError(
            'cannot specify a digest to update to without the image')
    _update_container_images(
        batch_client, config, docker_image, docker_image_digest,
        singularity_image, force_ssh=ssh)


def action_pool_images_list(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Images List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    _list_docker_images(batch_client, config)


def action_pool_stats(batch_client, config, pool_id):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Pool Stats
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str pool_id: pool id
    """
    _check_batch_client(batch_client)
    batch.pool_stats(batch_client, config, pool_id=pool_id)


def action_pool_autoscale_disable(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Disable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_disable(batch_client, config)


def action_pool_autoscale_enable(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Enable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_enable(batch_client, config)


def action_pool_autoscale_evaluate(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Evaluate
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_evaluate(batch_client, config)


def action_pool_autoscale_lastexec(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Pool Autoscale Lastexec
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    batch.pool_autoscale_lastexec(batch_client, config)


def action_jobs_add(
        resource_client, compute_client, network_client, batch_mgmt_client,
        batch_client, blob_client, table_client, keyvault_client, config,
        recreate, tail):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.mgmt.batch.BatchManagementClient,
    #        azure.batch.batch_service_client.BatchServiceClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.keyvault.KeyVaultClient, dict, bool, str) -> None
    """Action: Jobs Add
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.mgmt.batch.BatchManagementClient: batch_mgmt_client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.keyvault.KeyVaultClient keyvault_client: keyvault client
    :param dict config: configuration dict
    :param bool recreate: recreate jobs if completed
    :param str tail: file to tail or last job and task added
    """
    _check_batch_client(batch_client)
    # check for job autopools
    autopool = batch.check_jobs_for_auto_pool(config)
    if autopool:
        # check to ensure pool id is within 20 chars
        pool_id = settings.pool_id(config)
        if len(pool_id) > 20:
            raise ValueError(
                'pool id must be less than 21 characters: {}'.format(pool_id))
        # check if a pool id with existing pool id exists
        try:
            batch_client.pool.get(pool_id)
        except batchmodels.BatchErrorException as ex:
            if 'The specified pool does not exist' in ex.message.value:
                pass
        else:
            raise RuntimeError(
                'pool with id of {} already exists'.format(pool_id))
        _adjust_settings_for_pool_creation(config)
        # create storage containers and clear
        storage.create_storage_containers(blob_client, table_client, config)
        storage.clear_storage_containers(blob_client, table_client, config)
        if settings.requires_populate_global_resources_storage(config):
            storage.populate_global_resource_blobs(
                blob_client, table_client, config)
        # create autopool specification object
        autopool = _construct_auto_pool_specification(
            resource_client, compute_client, network_client, batch_mgmt_client,
            batch_client, blob_client, keyvault_client, config
        )
        # check settings and warn
        _check_settings_for_auto_pool(config)
    else:
        autopool = None
    # add jobs
    is_windows = settings.is_windows_pool(config)
    batch.add_jobs(
        batch_client, blob_client, None, None, keyvault_client, config,
        autopool, _IMAGE_BLOCK_FILE,
        _BLOBXFER_WINDOWS_FILE if is_windows else _BLOBXFER_FILE,
        _AUTOSCRATCH_FILE, recreate, tail)


def action_jobs_list(batch_client, config, jobid, jobscheduleid):
    # type: (batchsc.BatchServiceClient, dict, str, str) -> None
    """Action: Jobs List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    :param str jobscheduleid: job schedule id
    """
    _check_batch_client(batch_client)
    if util.is_not_empty(jobid):
        batch.get_job_or_job_schedule(
            batch_client, config, jobid, jobscheduleid)
    else:
        batch.list_jobs(batch_client, config)


def action_jobs_tasks_list(
        batch_client, config, all, jobid, poll_until_tasks_complete, taskid):
    # type: (batchsc.BatchServiceClient, dict, bool, str, bool, str) -> None
    """Action: Jobs Tasks List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: all jobs
    :param str jobid: job id
    :param bool poll_until_tasks_complete: poll until tasks complete
    :param str taskid: task id
    """
    _check_batch_client(batch_client)
    if all and jobid is not None:
        raise ValueError('cannot specify both --all and --jobid')
    if settings.raw(config) and poll_until_tasks_complete:
        raise ValueError(
            'cannot specify --poll_until_tasks_complete and --raw')
    while True:
        if util.is_not_empty(taskid):
            all_complete = batch.get_task(batch_client, config, jobid, taskid)
        else:
            all_complete = batch.list_tasks(
                batch_client, config, all=all, jobid=jobid)
        if not poll_until_tasks_complete or all_complete:
            break
        time.sleep(5)


def action_jobs_tasks_count(batch_client, config, jobid):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Jobs Tasks Count
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    """
    _check_batch_client(batch_client)
    batch.get_task_counts(batch_client, config, jobid=jobid)


def action_jobs_tasks_term(batch_client, config, jobid, taskid, wait, force):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool, bool) -> None
    """Action: Jobs Tasks Term
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    :param str taskid: task id
    :param bool wait: wait for action to complete
    :param bool force: force docker kill even if completed
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to terminate without the corresponding '
            'job id')
    if force and (taskid is None or jobid is None):
        raise ValueError('cannot force docker kill without task id/job id')
    batch.terminate_tasks(
        batch_client, config, jobid=jobid, taskid=taskid, wait=wait,
        force=force)


def action_jobs_tasks_del(batch_client, config, jobid, taskid, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool) -> None
    """Action: Jobs Tasks Del
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id
    :param str taskid: task id
    :param bool wait: wait for action to complete
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to delete without the corresponding '
            'job id')
    batch.del_tasks(
        batch_client, config, jobid=jobid, taskid=taskid, wait=wait)


def action_jobs_del_or_term(
        batch_client, blob_client, table_client, config, delete, all_jobs,
        all_jobschedules, jobid, jobscheduleid, termtasks, wait):
    # type: (batchsc.BatchServiceClient, azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict, bool, bool, str, str,
    #        bool, bool) -> None
    """Action: Jobs Del or Term
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool all_jobs: all jobs
    :param bool all_jobschedules: all job schedules
    :param str jobid: job id
    :param str jobscheduleid: job schedule id
    :param bool termtasks: terminate tasks prior
    :param bool wait: wait for action to complete
    """
    _check_batch_client(batch_client)
    if jobid is not None and jobscheduleid is not None:
        raise ValueError('cannot specify both --jobid and --jobscheduleid')
    if all_jobs:
        if jobid is not None:
            raise ValueError('cannot specify both --all-jobs and --jobid')
        batch.delete_or_terminate_all_jobs(
            batch_client, config, delete, termtasks=termtasks, wait=wait)
    elif all_jobschedules:
        if jobscheduleid is not None:
            raise ValueError(
                'cannot specify both --all-jobschedules and --jobscheduleid')
        if termtasks:
            raise ValueError(
                'Cannot specify --termtasks with --all-jobschedules. '
                'Please terminate tasks with each individual job first.')
        batch.delete_or_terminate_all_job_schedules(
            batch_client, config, delete, wait=wait)
    else:
        # check for autopool
        if util.is_none_or_empty(jobid):
            autopool = batch.check_jobs_for_auto_pool(config)
            if autopool:
                # check if a pool id with existing pool id exists
                try:
                    batch_client.pool.get(settings.pool_id(config))
                except batchmodels.BatchErrorException as ex:
                    if 'The specified pool does not exist' in ex.message.value:
                        pass
                else:
                    autopool = False
        else:
            autopool = False
        # terminate the jobs
        batch.delete_or_terminate_jobs(
            batch_client, config, delete, jobid=jobid,
            jobscheduleid=jobscheduleid, termtasks=termtasks, wait=wait)
        # if autopool, delete the storage
        if autopool:
            storage.cleanup_with_del_pool(blob_client, table_client, config)


def action_jobs_cmi(batch_client, config, delete):
    # type: (batchsc.BatchServiceClient, dict, bool) -> None
    """Action: Jobs Cmi
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool delete: delete all cmi jobs
    """
    _check_batch_client(batch_client)
    if delete:
        batch.del_clean_mi_jobs(batch_client, config)
    else:
        batch.clean_mi_jobs(batch_client, config)
        batch.del_clean_mi_jobs(batch_client, config)


def action_jobs_migrate(
        batch_client, config, jobid, jobscheduleid, poolid, requeue,
        terminate, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, bool, bool,
    #        bool) -> None
    """Action: Jobs Migrate
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to migrate to in lieu of config
    :param str jobscheduleid: job schedule id to migrate to in lieu of config
    :param str poolid: pool id to migrate to in lieu of config
    :param bool requeue: requeue action
    :param bool terminate: terminate action
    :param bool wait: wait action
    """
    _check_batch_client(batch_client)
    if jobid is not None:
        if jobscheduleid is not None:
            raise ValueError('cannot specify both --jobid and --jobscheduleid')
        if [requeue, terminate, wait].count(True) != 1:
            raise ValueError(
                'must specify only one option of --requeue, --terminate, '
                '--wait')
    if requeue:
        action = 'requeue'
    elif terminate:
        action = 'terminate'
    elif wait:
        action = 'wait'
    else:
        action = None
    # check jobs to see if targetted pool id is the same
    batch.check_pool_for_job_migration(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid,
        poolid=poolid)
    if not util.confirm_action(
            config, msg='migration of jobs or job schedules'):
        return
    logger.warning(
        'ensure that the new target pool has the proper Docker images '
        'loaded, or you have enabled allow_run_on_missing_image')
    # disable job and wait for disabled state
    batch.disable_jobs(
        batch_client, config, action, jobid=jobid, jobscheduleid=jobscheduleid,
        suppress_confirm=True)
    # patch job
    batch.update_job_with_pool(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid,
        poolid=poolid)
    # enable job
    batch.enable_jobs(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid)


def action_jobs_disable(
        batch_client, config, jobid, jobscheduleid, requeue, terminate, wait):
    # type: (batchsc.BatchServiceClient, dict, str, str, bool, bool,
    #        bool) -> None
    """Action: Jobs Disable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to disable to in lieu of config
    :param str jobscheduleid: job schedule id to disable to in lieu of config
    :param bool requeue: requeue action
    :param bool terminate: terminate action
    :param bool wait: wait action
    """
    _check_batch_client(batch_client)
    if jobid is not None:
        if jobscheduleid is not None:
            raise ValueError('cannot specify both --jobid and --jobscheduleid')
        if [requeue, terminate, wait].count(True) != 1:
            raise ValueError(
                'must specify only one option of --requeue, --terminate, '
                '--wait')
    if requeue:
        action = 'requeue'
    elif terminate:
        action = 'terminate'
    elif wait:
        action = 'wait'
    else:
        action = None
    batch.disable_jobs(
        batch_client, config, action, jobid=jobid,
        jobscheduleid=jobscheduleid, disabling_state_ok=True)


def action_jobs_enable(batch_client, config, jobid, jobscheduleid):
    # type: (batchsc.BatchServiceClient, dict, str, str) -> None
    """Action: Jobs Enable
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to enable to in lieu of config
    :param str jobscheduleid: job schedule id to enable to in lieu of config
    """
    _check_batch_client(batch_client)
    batch.enable_jobs(
        batch_client, config, jobid=jobid, jobscheduleid=jobscheduleid)


def action_jobs_stats(batch_client, config, job_id):
    # type: (batchsc.BatchServiceClient, dict, str) -> None
    """Action: Jobs Stats
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str job_id: job id
    """
    _check_batch_client(batch_client)
    batch.job_stats(batch_client, config, jobid=job_id)


def action_storage_del(
        blob_client, table_client, config, clear_tables, diagnostics_logs,
        pools):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict, bool, str) -> None
    """Action: Storage Del
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool clear_tables: clear tables instead of deleting
    :param str pools: pool ids to target
    """
    if diagnostics_logs:
        storage.delete_or_clear_diagnostics_logs(blob_client, config, True)
    for poolid in pools:
        # reset storage settings to target poolid
        if util.is_not_empty(poolid):
            populate_global_settings(config, False, pool_id=poolid)
        if clear_tables:
            storage.clear_storage_containers(
                blob_client, table_client, config, tables_only=True,
                pool_id=poolid)
        storage.delete_storage_containers(
            blob_client, table_client, config, skip_tables=clear_tables)


def action_storage_clear(
        blob_client, table_client, config, diagnostics_logs, pools):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict, bool, List[str]) -> None
    """Action: Storage Clear
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param list pools: pool ids to target
    """
    if diagnostics_logs:
        storage.delete_or_clear_diagnostics_logs(blob_client, config, False)
    for poolid in pools:
        # reset storage settings to target poolid
        if util.is_not_empty(poolid):
            populate_global_settings(config, False, pool_id=poolid)
        storage.clear_storage_containers(
            blob_client, table_client, config, pool_id=poolid)


def action_storage_sas_create(
        config, storage_account, path, file, create, list_perm, read, write,
        delete):
    # type: (dict, str, str, bool, bool, bool, bool, bool, bool) -> None
    """Action: Storage Sas Create
    :param dict config: configuration dict
    :param str storage_account: storage account
    :param str path: path
    :param bool file: file sas
    :param bool create: create perm
    :param bool list_perm: list perm
    :param bool read: read perm
    :param bool write: write perm
    :param bool delete: delete perm
    """
    # reset storage settings to target poolid
    creds = settings.credentials_storage(config, storage_account)
    sas = storage.create_saskey(
        creds, path, file, create, list_perm, read, write, delete)
    logger.info('generated SAS URL: https://{}.{}.{}/{}?{}'.format(
        creds.account, 'file' if file else 'blob', creds.endpoint, path, sas))


def action_data_files_stream(batch_client, config, filespec, disk):
    # type: (batchsc.BatchServiceClient, dict, str, bool) -> None
    """Action: Data Files Stream
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str filespec: filespec of file to retrieve
    :param bool disk: write streamed data to disk instead
    """
    _check_batch_client(batch_client)
    batch.stream_file_and_wait_for_task(batch_client, config, filespec, disk)


def action_data_files_list(batch_client, config, jobid, taskid):
    # type: (batchsc.BatchServiceClient, dict, str, str) -> None
    """Action: Data Files List
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to list
    :param str taskid: task id to list
    """
    _check_batch_client(batch_client)
    if taskid is not None and jobid is None:
        raise ValueError(
            'cannot specify a task to list files without the corresponding '
            'job id')
    batch.list_task_files(batch_client, config, jobid, taskid)


def action_data_files_task(batch_client, config, all, filespec):
    # type: (batchsc.BatchServiceClient, dict, bool, str) -> None
    """Action: Data Files Task
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: retrieve all files
    :param str filespec: filespec of file to retrieve
    """
    _check_batch_client(batch_client)
    if all:
        batch.get_all_files_via_task(batch_client, config, filespec)
    else:
        batch.get_file_via_task(batch_client, config, filespec)


def action_data_files_node(batch_client, config, all, nodeid):
    # type: (batchsc.BatchServiceClient, dict, bool, str) -> None
    """Action: Data Files Node
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param bool all: retrieve all files
    :param str nodeid: node id to retrieve file from
    """
    _check_batch_client(batch_client)
    if all:
        batch.get_all_files_via_node(batch_client, config, nodeid)
    else:
        batch.get_file_via_node(batch_client, config, nodeid)


def action_data_ingress(
        batch_client, compute_client, network_client, config, to_fs):
    # type: (batchsc.BatchServiceClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str) -> None
    """Action: Data Ingress
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str to_fs: ingress to remote filesystem
    """
    pool_total_vm_count = None
    if util.is_none_or_empty(to_fs):
        try:
            # get pool current dedicated
            pool = batch_client.pool.get(settings.pool_id(config))
            pool_total_vm_count = (
                pool.current_dedicated_nodes + pool.current_low_priority_nodes
            )
            del pool
            # ensure there are remote login settings
            rls = batch.get_remote_login_settings(
                batch_client, config, nodes=None, suppress_output=True)
            # ensure nodes are at least idle/running for shared ingress
            kind = 'all'
            if not batch.check_pool_nodes_runnable(batch_client, config):
                kind = 'storage'
        except batchmodels.BatchErrorException as ex:
            if 'The specified pool does not exist' in ex.message.value:
                rls = None
                kind = 'storage'
            else:
                raise
    else:
        rls = None
        kind = 'remotefs'
        if compute_client is None or network_client is None:
            raise RuntimeError(
                'required ARM clients are invalid, please provide management '
                'AAD credentials')
    storage_threads = data.ingress_data(
        batch_client, compute_client, network_client, config, rls=rls,
        kind=kind, total_vm_count=pool_total_vm_count, to_fs=to_fs)
    data.wait_for_storage_threads(storage_threads)


def action_misc_tensorboard(
        batch_client, config, jobid, taskid, logdir, image):
    # type: (batchsc.BatchServiceClient, dict, str, str, str, str) -> None
    """Action: Misc Tensorboard
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    :param str jobid: job id to list
    :param str taskid: task id to list
    :param str logdir: log dir
    :param str image: tensorflow image to use
    """
    _check_batch_client(batch_client)
    if util.is_none_or_empty(jobid):
        jobspecs = settings.job_specifications(config)
        if len(jobspecs) != 1:
            raise ValueError(
                'The number of jobs in the specified jobs config is not '
                'one. Please specify which job with --jobid.')
        if util.is_not_empty(taskid):
            raise ValueError(
                'cannot specify a task to tunnel Tensorboard to without the '
                'corresponding job id')
    misc.tunnel_tensorboard(batch_client, config, jobid, taskid, logdir, image)


def action_misc_mirror_images(batch_client, config):
    # type: (batchsc.BatchServiceClient, dict) -> None
    """Action: Misc Mirror-images
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_batch_client(batch_client)
    misc.mirror_batch_shipyard_images(
        batch_client, config, _MIRROR_SYSTEM_IMAGES_FILE[1])


def action_monitor_create(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, config):
    # type: (azure.mgmt.authorization.AuthorizationManagementClient,
    #        azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict) -> None
    """Action: Monitor Create
    :param azure.mgmt.authorization.AuthorizationManagementClient auth_client:
        auth client
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    # ensure aad creds are populated
    mgmt_aad = settings.credentials_management(config)
    if (util.is_none_or_empty(mgmt_aad.subscription_id) or
            util.is_none_or_empty(mgmt_aad.aad.authority_url)):
        raise ValueError('management aad credentials are invalid')
    monitor.create_monitoring_resource(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, config, _RESOURCES_PATH,
        _MONITORINGPREP_FILE, _CONFIGURABLE_MONITORING_FILES)


def action_monitor_add(table_client, config, poolid, fscluster):
    # type: (azure.cosmosdb.table.TableService, dict, List[str],
    #        List[str]) -> None
    """Action: Monitor Add
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param list poolid: list of pool ids to monitor
    :param list fscluster: list of fs clusters to monitor
    """
    if util.is_none_or_empty(poolid) and util.is_none_or_empty(fscluster):
        logger.error('no monitoring resources specified to add')
        return
    # ensure that we are operating in AAD mode for batch
    if util.is_not_empty(poolid):
        bc = settings.credentials_batch(config)
        _check_for_batch_aad(bc, 'add pool monitors')
    fsmap = None
    if util.is_not_empty(fscluster):
        fsmap = {}
        for sc_id in fscluster:
            rfs = settings.remotefs_settings(config, sc_id)
            sc = rfs.storage_cluster
            vms = []
            if sc.file_server.type == 'nfs':
                vm_name = settings.generate_virtual_machine_name(sc, 0)
                vms.append(vm_name)
            elif sc.file_server.type == 'glusterfs':
                for i in range(sc.vm_count):
                    vm_name = settings.generate_virtual_machine_name(sc, i)
                    vms.append(vm_name)
            fsmap[sc_id] = {
                'type': sc.file_server.type,
                'rg': sc.resource_group,
                'ne_port': sc.prometheus.ne_port,
                'as': settings.generate_availability_set_name(sc),
                'vms': vms,
            }
    storage.add_resources_to_monitor(table_client, config, poolid, fsmap)


def action_monitor_list(table_client, config):
    # type: (azure.cosmosdb.table.TableService, dict) -> None
    """Action: Monitor List
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    """
    storage.list_monitored_resources(table_client, config)


def action_monitor_remove(table_client, config, all, poolid, fscluster):
    # type: (azure.cosmosdb.table.TableService, dict, bool, List[str],
    #        List[str]) -> None
    """Action: Monitor Remove
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool all: all resource monitors
    :param list poolid: list of pool ids to remove from monitoring
    :param list fscluster: list of fs clusters to monitor
    """
    # ensure that we are operating in AAD mode for batch
    if not all and util.is_not_empty(poolid):
        bc = settings.credentials_batch(config)
        _check_for_batch_aad(bc, 'remove pool monitors')
    if (not all and util.is_none_or_empty(poolid) and
            util.is_none_or_empty(fscluster)):
        logger.error('no monitoring resources specified to remove')
        return
    if all and (util.is_not_empty(poolid) or util.is_not_empty(fscluster)):
        raise ValueError(
            'cannot specify --all with specific monitoring resources to '
            'remove')
    storage.remove_resources_from_monitoring(
        table_client, config, all, poolid, fscluster)


def action_monitor_ssh(
        compute_client, network_client, config, tty, command):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict,
    #        bool, tuple) -> None
    """Action: Monitor Ssh
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param bool tty: allocate pseudo-tty
    :param tuple command: command
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    vm_res = settings.monitoring_settings(config)
    resource.ssh_to_virtual_machine_resource(
        compute_client, network_client, vm_res,
        crypto.get_monitoring_ssh_key_prefix(), tty, command)


def action_monitor_suspend(compute_client, config, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool) -> None
    """Action: Monitor Suspend
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool wait: wait for suspension to complete
    """
    _check_compute_client(compute_client)
    vm_res = settings.monitoring_settings(config)
    resource.suspend_virtual_machine_resource(
        compute_client, config, vm_res, offset=0, wait=wait)


def action_monitor_start(compute_client, config, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool) -> None
    """Action: Monitor Start
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool wait: wait for restart to complete
    """
    _check_compute_client(compute_client)
    vm_res = settings.monitoring_settings(config)
    resource.start_virtual_machine_resource(
        compute_client, config, vm_res, offset=0, wait=wait)


def action_monitor_status(compute_client, network_client, config):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict) -> None
    """Action: Monitor Status
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    """
    _check_compute_client(compute_client)
    vm_res = settings.monitoring_settings(config)
    resource.stat_virtual_machine_resource(
        compute_client, network_client, config, vm_res)


def action_monitor_destroy(
        resource_client, compute_client, network_client, blob_client,
        table_client, config, delete_all_resources, delete_virtual_network,
        generate_from_prefix, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService, dict, bool, bool,
    #        bool, bool) -> None
    """Action: Monitor Destroy
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param bool delete_all_resources: delete all resources
    :param bool delete_virtual_network: delete virtual network
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for deletion to complete
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if (generate_from_prefix and
            (delete_all_resources or delete_virtual_network)):
        raise ValueError(
            'Cannot specify generate_from_prefix and a delete_* option')
    monitor.delete_monitoring_resource(
        resource_client, compute_client, network_client, blob_client,
        table_client, config, delete_virtual_network=delete_virtual_network,
        delete_resource_group=delete_all_resources,
        generate_from_prefix=generate_from_prefix, wait=wait)


def action_fed_proxy_create(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, queue_client, config):
    # type: (azure.mgmt.authorization.AuthorizationManagementClient,
    #        azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict) -> None
    """Action: Fed Proxy Create
    :param azure.mgmt.authorization.AuthorizationManagementClient auth_client:
        auth client
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    # ensure aad creds are populated
    mgmt_aad = settings.credentials_management(config)
    if (util.is_none_or_empty(mgmt_aad.subscription_id) or
            util.is_none_or_empty(mgmt_aad.aad.authority_url)):
        raise ValueError('management aad credentials are invalid')
    federation.create_federation_proxy(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, queue_client, config, _RESOURCES_PATH,
        _FEDERATIONPREP_FILE, _ALL_FEDERATION_FILES)


def action_fed_proxy_ssh(
        compute_client, network_client, config, tty, command):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict,
    #        bool, tuple) -> None
    """Action: Fed Proxy Ssh
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param bool tty: allocate pseudo-tty
    :param tuple command: command
    """
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    vm_res = settings.federation_settings(config)
    resource.ssh_to_virtual_machine_resource(
        compute_client, network_client, vm_res,
        crypto.get_federation_ssh_key_prefix(), tty, command)


def action_fed_proxy_suspend(compute_client, config, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool) -> None
    """Action: Fed Proxy Suspend
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool wait: wait for suspension to complete
    """
    _check_compute_client(compute_client)
    vm_res = settings.federation_settings(config)
    resource.suspend_virtual_machine_resource(
        compute_client, config, vm_res, offset=0, wait=wait)


def action_fed_proxy_start(compute_client, config, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool) -> None
    """Action: Fed Proxy Start
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool wait: wait for restart to complete
    """
    _check_compute_client(compute_client)
    vm_res = settings.federation_settings(config)
    resource.start_virtual_machine_resource(
        compute_client, config, vm_res, offset=0, wait=wait)


def action_fed_proxy_status(compute_client, network_client, config):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict) -> None
    """Action: Fed Proxy Status
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    """
    _check_compute_client(compute_client)
    vm_res = settings.federation_settings(config)
    resource.stat_virtual_machine_resource(
        compute_client, network_client, config, vm_res)


def action_fed_proxy_destroy(
        resource_client, compute_client, network_client, blob_client,
        table_client, queue_client, config, delete_all_resources,
        delete_virtual_network, generate_from_prefix, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, bool, bool,
    #        bool, bool) -> None
    """Action: Fed Proxy Destroy
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param bool delete_all_resources: delete all resources
    :param bool delete_virtual_network: delete virtual network
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for deletion to complete
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if (generate_from_prefix and
            (delete_all_resources or delete_virtual_network)):
        raise ValueError(
            'Cannot specify generate_from_prefix and a delete_* option')
    federation.delete_federation_proxy(
        resource_client, compute_client, network_client, blob_client,
        table_client, queue_client, config,
        delete_virtual_network=delete_virtual_network,
        delete_resource_group=delete_all_resources,
        generate_from_prefix=generate_from_prefix, wait=wait)


def action_fed_create(
        blob_client, table_client, queue_client, config, federation_id,
        force, unique_jobs):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, str, bool, bool) -> None
    """Action: Fed Create
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param str federation_id: federation id
    :param bool force: force creation
    :param bool unique_jobs: unique job ids required
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    logger.info('creating federation id: {}'.format(federation_id))
    storage.create_federation_id(
        blob_client, table_client, queue_client, config, federation_id.lower(),
        force, unique_jobs)


def action_fed_list(
        table_client, config, federation_id):
    # type: (azure.cosmosdb.table.TableService, dict, List[str]) -> None
    """Action: Fed List
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param List[str] federation_id: federation ids
    """
    storage.list_federations(table_client, config, federation_id)


def action_fed_destroy(
        blob_client, table_client, queue_client, config, federation_id):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, str) -> None
    """Action: Fed Destroy
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param str federation_id: federation id
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    if not util.confirm_action(
            config,
            msg='destroy federation id {}, all queued jobs for the '
            'federation will be deleted'.format(federation_id)):
        return
    logger.info('destroying federation id: {}'.format(federation_id))
    storage.destroy_federation_id(
        blob_client, table_client, queue_client, config, federation_id.lower())


def action_fed_pool_add(
        batch_client, table_client, config, federation_id, batch_service_url,
        pools):
    # type: (batchsc.BatchServiceClient,
    #        azure.cosmosdb.table.TableService, dict, str, str,
    #        List[str]) -> None
    """Action: Fed Pool Add
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param str federation_id: federation id
    :param str batch_service_url: Batch service url to use instead
    :param list pools: list of pool ids to add to federation
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    # ensure that we are operating in AAD mode for batch
    if batch_client is not None:
        bc = settings.credentials_batch(config)
        _check_for_batch_aad(bc, 'add pool(s) to federation')
        if util.is_none_or_empty(pools):
            pools = [settings.pool_id(config)]
        # check for pool existence
        for poolid in pools:
            try:
                batch_client.pool.get(poolid)
            except batchmodels.BatchErrorException as ex:
                if 'The specified pool does not exist' in ex.message.value:
                    raise ValueError(
                        'pool {} does not exist for account {}'.format(
                            bc.account_service_url))
                else:
                    raise
    else:
        if util.is_none_or_empty(pools):
            logger.error('no pools specified to add to federation')
            return
    storage.add_pool_to_federation(
        table_client, config, federation_id.lower(), batch_service_url, pools)


def action_fed_pool_remove(
        batch_client, table_client, config, federation_id, all,
        batch_service_url, pools):
    # type: (batchsc.BatchServiceClient,
    #        azure.cosmosdb.table.TableService, dict, str, bool, str,
    #        List[str]) -> None
    """Action: Fed Pool Remove
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param dict config: configuration dict
    :param str federation_id: federation id
    :param bool all: all pools
    :param str batch_service_url: Batch service url to use instead
    :param list pools: list of pool ids to add to federation
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    # ensure that we are operating in AAD mode for batch
    if batch_client is not None:
        bc = settings.credentials_batch(config)
        _check_for_batch_aad(bc, 'add pool(s) to federation')
        if util.is_none_or_empty(pools):
            pools = [settings.pool_id(config)]
    if util.is_none_or_empty(pools) and not all:
        logger.error('no pools specified to remove from federation')
        return
    elif util.is_not_empty(pools) and all:
        raise ValueError('cannot specify both --all and --poolid')
    storage.remove_pool_from_federation(
        table_client, config, federation_id.lower(), all, batch_service_url,
        pools)


def action_fed_jobs_add(
        batch_client, keyvault_client, blob_client, table_client,
        queue_client, config, federation_id):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, str) -> None
    """Action: Fed Jobs Add
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param str federation_id: federation id
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    is_windows = settings.is_windows_pool(config)
    batch.add_jobs(
        batch_client, blob_client, table_client, queue_client, keyvault_client,
        config, None, _IMAGE_BLOCK_FILE,
        _BLOBXFER_WINDOWS_FILE if is_windows else _BLOBXFER_FILE,
        _AUTOSCRATCH_FILE, recreate=False, tail=None,
        federation_id=federation_id)


def action_fed_jobs_list(
        table_client, config, federation_id, jobid, jobscheduleid, blocked,
        queued):
    # type: (azure.cosmosdb.table.TableService,
    #        dict, str, str, str, bool, bool) -> None
    """Action: Fed Jobs List
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param str federation_id: federation id
    :param str jobid: job id
    :param str jobscheduleid: job schedule id
    :param bool blocked: blocked actions only
    :param bool queued: queued actions only
    """
    if jobid is not None and jobscheduleid is not None:
        raise ValueError('cannot specify both --jobid and --jobscheduleid')
    if blocked and queued:
        raise ValueError('cannot specify both --blocked and --queued')
    if blocked:
        storage.list_blocked_actions_in_federation(
            table_client, config, federation_id, jobid, jobscheduleid)
    elif queued:
        storage.list_queued_actions_in_federation(
            table_client, config, federation_id, jobid, jobscheduleid)
    else:
        storage.list_active_jobs_in_federation(
            table_client, config, federation_id, jobid, jobscheduleid)


def action_fed_jobs_del_or_term(
        blob_client, table_client, queue_client, config, delete, federation_id,
        jobid, jobscheduleid, all_jobs, all_jobschedules, force):
    # type: (azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, bool, str, str,
    #        bool, bool, bool) -> None
    """Action: Fed Jobs Del or Term
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param bool delete: delete instead of terminate
    :param str federation_id: federation id
    :param str jobid: job id
    :param str jobscheduleid: job schedule id
    :param bool all_jobs all jobs
    :param bool all_jobschedules: all job schedules
    :param bool force: force
    """
    if all_jobs and all_jobschedules:
        raise ValueError(
            'cannot specify both --all-jobs and --alljobschedules')
    elif all_jobs:
        if util.is_not_empty(jobid) or util.is_not_empty(jobscheduleid):
            raise ValueError(
                'cannot specify both --all-jobs and --jobid or '
                '--jobscheduleid')
    elif all_jobschedules:
        if util.is_not_empty(jobid) or util.is_not_empty(jobscheduleid):
            raise ValueError(
                'cannot specify both --all-jobschedules and --jobscheduleid '
                'or --jobid')
    else:
        if util.is_not_empty(jobid) and util.is_not_empty(jobscheduleid):
            raise ValueError('cannot specify both --jobid and --jobscheduleid')
        elif (util.is_none_or_empty(jobid) and
              util.is_none_or_empty(jobscheduleid)):
            _jobs = settings.job_specifications(config)
            js = []
            jobs = []
            for job in _jobs:
                if settings.job_recurrence(job):
                    js.append(settings.job_id(job))
                else:
                    jobs.append(settings.job_id(job))
            del _jobs
            if util.is_not_empty(js):
                storage.delete_or_terminate_job_from_federation(
                    blob_client, table_client, queue_client, config, delete,
                    federation_id, None, js, False, False, force)
            del js
            if util.is_not_empty(jobs):
                storage.delete_or_terminate_job_from_federation(
                    blob_client, table_client, queue_client, config, delete,
                    federation_id, jobs, None, False, False, force)
            return
    storage.delete_or_terminate_job_from_federation(
        blob_client, table_client, queue_client, config, delete, federation_id,
        jobid, jobscheduleid, all_jobs, all_jobschedules, force)


def action_fed_jobs_zap(blob_client, config, federation_id, unique_id):
    # type: (azure.storage.blob.BlockBlobService,
    #        dict, str, str) -> None
    """Action: Fed Jobs Zap
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str federation_id: federation id
    :param str unique_id: unique id
    """
    if util.is_none_or_empty(federation_id):
        raise ValueError('federation id is invalid')
    if util.is_none_or_empty(unique_id):
        raise ValueError('unique id is invalid')
    if not util.confirm_action(
            config,
            msg='zap unique id {} in federation {}, this may result in '
            'orphaned jobs and/or data'.format(unique_id, federation_id)):
        return
    storage.zap_unique_id_from_federation(
        blob_client, config, federation_id, unique_id)


def action_slurm_ssh(
        compute_client, network_client, table_client, batch_client, config,
        tty, command, kind, offset, node_name):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict,
    #        bool, tuple, str, int, str) -> None
    """Action: Slurm Ssh Controller
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param bool tty: allocate pseudo-tty
    :param tuple command: command
    :param str kind: kind
    :param int offset: offset
    :param str node_name: node name
    """
    if util.is_none_or_empty(node_name):
        _check_compute_client(compute_client)
        _check_network_client(network_client)
        vm_res = settings.slurm_settings(config, kind)
        if offset is None:
            offset = 0
        else:
            offset = int(offset)
        if kind == 'login':
            cont_vm_count = settings.slurm_vm_count(config, 'controller')
            offset = cont_vm_count + offset
        resource.ssh_to_virtual_machine_resource(
            compute_client, network_client, vm_res,
            crypto.get_slurm_ssh_key_prefix(kind), tty, command, offset=offset)
    else:
        slurm_opts = settings.slurm_options_settings(config)
        # get host name to node id mapping
        node_id = storage.get_slurm_host_node_id(
            table_client, slurm_opts.cluster_id, node_name)
        if util.is_none_or_empty(node_id):
            raise RuntimeError(
                'No batch node id associated with Slurm node: {}'.format(
                    node_name))
        ss_login = settings.slurm_settings(config, 'login')
        ssh_private_key = ss_login.ssh.ssh_private_key
        if ssh_private_key is None:
            ssh_private_key = pathlib.Path(
                ss_login.ssh.generated_file_export_path,
                crypto.get_slurm_ssh_key_prefix('login'))
        action_pool_ssh(
            batch_client, config, None, node_id, tty, command,
            ssh_username=ss_login.ssh.username,
            ssh_private_key=ssh_private_key)


def action_slurm_cluster_create(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, queue_client, batch_client, config):
    # type: (azure.mgmt.authorization.AuthorizationManagementClient,
    #        azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.batch.batch_service_client.BatchServiceClient, dict) -> None
    """Action: Slurm Cluster Create
    :param azure.mgmt.authorization.AuthorizationManagementClient auth_client:
        auth client
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.batch.batch_service_client.BatchServiceClient batch_client:
        batch client
    :param dict config: configuration dict
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    _check_batch_client(batch_client)
    # ensure aad creds are populated
    mgmt_aad = settings.credentials_management(config)
    if (util.is_none_or_empty(mgmt_aad.subscription_id) or
            util.is_none_or_empty(mgmt_aad.aad.authority_url)):
        raise ValueError('management aad credentials are invalid')
    slurm.create_slurm_controller(
        auth_client, resource_client, compute_client, network_client,
        blob_client, table_client, queue_client, batch_client, config,
        _RESOURCES_PATH, _SLURMMASTERPREP_FILE, _SLURMCOMPUTENODEPREP_FILE,
        _SLURMPY_FILE, _SLURMREQ_FILE, _CONFIGURABLE_SLURM_FILES)


def action_slurm_cluster_suspend(
        compute_client, config, controller_nodes, login_nodes, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool,
    #        bool, bool) -> None
    """Action: Slurm Cluster Suspend
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool controller_nodes: controller nodes
    :param bool login_nodes: login nodes
    :param bool wait: wait for suspension to complete
    """
    _check_compute_client(compute_client)
    cluster_id = settings.slurm_options_settings(config).cluster_id
    ss = []
    ss_map = {}
    ss_kind = {}
    total_vms = 0
    for kind in ('controller', 'login'):
        ss_kind[kind] = settings.slurm_settings(config, kind)
        vm_count = settings.slurm_vm_count(config, kind)
        total_vms += vm_count
        for _ in range(0, vm_count):
            ss.append(ss_kind[kind])
            ss_map[len(ss) - 1] = kind
    del ss_kind
    logger.warning(
        '**WARNING** cluster suspend is an experimental feature and may lead '
        'to data loss, unavailability or an unrecoverable state for '
        'the slurm cluster {}.'.format(cluster_id))
    if not util.confirm_action(
            config,
            msg='suspending Slurm cluster {}'.format(cluster_id)):
        return
    settings.set_auto_confirm(config, True)
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=total_vms) as executor:
        futures = []
        for i in range(0, len(ss)):
            if (controller_nodes and ss_map[i] == 'controller' or
                    login_nodes and ss_map[i] == 'login'):
                futures.append(executor.submit(
                    resource.suspend_virtual_machine_resource,
                    compute_client, config, ss[i], offset=i, wait=wait))
        if wait:
            for x in futures:
                x.result()


def action_slurm_cluster_start(
        compute_client, config, controller_nodes, login_nodes, wait):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, bool,
    #        bool, bool) -> None
    """Action: Slurm Cluster Start
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool controller_nodes: controller nodes
    :param bool login_nodes: login nodes
    :param bool wait: wait for restart to complete
    """
    _check_compute_client(compute_client)
    cluster_id = settings.slurm_options_settings(config).cluster_id
    ss = []
    ss_map = {}
    ss_kind = {}
    total_vms = 0
    for kind in ('controller', 'login'):
        ss_kind[kind] = settings.slurm_settings(config, kind)
        vm_count = settings.slurm_vm_count(config, kind)
        total_vms += vm_count
        for _ in range(0, vm_count):
            ss.append(ss_kind[kind])
            ss_map[len(ss) - 1] = kind
    del ss_kind
    if not util.confirm_action(
            config,
            msg='start Slurm cluster {}'.format(cluster_id)):
        return
    settings.set_auto_confirm(config, True)
    with concurrent.futures.ThreadPoolExecutor(
            max_workers=total_vms) as executor:
        futures = []
        for i in range(0, len(ss)):
            if (controller_nodes and ss_map[i] == 'controller' or
                    login_nodes and ss_map[i] == 'login'):
                futures.append(executor.submit(
                    resource.start_virtual_machine_resource,
                    compute_client, config, ss[i], offset=i, wait=wait))
        if wait:
            for x in futures:
                x.result()


def action_slurm_cluster_status(compute_client, network_client, config):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict) -> None
    """Action: Slurm Cluster Status
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    """
    _check_compute_client(compute_client)
    vm_res = settings.slurm_settings(config, 'controller')
    cont_vm_count = settings.slurm_vm_count(config, 'controller')
    i = 0
    while i < cont_vm_count:
        resource.stat_virtual_machine_resource(
            compute_client, network_client, config, vm_res, offset=i)
        i += 1
    vm_res = settings.slurm_settings(config, 'login')
    login_vm_count = settings.slurm_vm_count(config, 'login')
    i = 0
    while i < login_vm_count:
        resource.stat_virtual_machine_resource(
            compute_client, network_client, config, vm_res,
            offset=cont_vm_count + i)
        i += 1


def action_slurm_cluster_destroy(
        resource_client, compute_client, network_client, blob_client,
        table_client, queue_client, config, delete_all_resources,
        delete_virtual_network, generate_from_prefix, wait):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService,
    #        azure.cosmosdb.table.TableService,
    #        azure.storage.queue.QueueService, dict, bool, bool,
    #        bool, bool) -> None
    """Action: Slurm Cluster Destroy
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param azure.cosmosdb.table.TableService table_client: table client
    :param azure.storage.queue.QueueService queue_client: queue client
    :param dict config: configuration dict
    :param bool delete_all_resources: delete all resources
    :param bool delete_virtual_network: delete virtual network
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for deletion to complete
    """
    _check_resource_client(resource_client)
    _check_compute_client(compute_client)
    _check_network_client(network_client)
    if (generate_from_prefix and
            (delete_all_resources or delete_virtual_network)):
        raise ValueError(
            'Cannot specify generate_from_prefix and a delete_* option')
    slurm.delete_slurm_controller(
        resource_client, compute_client, network_client, blob_client,
        table_client, queue_client, config,
        delete_virtual_network=delete_virtual_network,
        delete_resource_group=delete_all_resources,
        generate_from_prefix=generate_from_prefix, wait=wait)