batch-shipyard/convoy/remotefs.py

# Copyright (c) Microsoft Corporation
#
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# compat imports
from __future__ import (
    absolute_import, division, print_function
)
from builtins import (  # noqa
    bytes, dict, int, list, object, range, str, ascii, chr, hex, input,
    next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports
import functools
import json
import logging
import os
try:
    import pathlib2 as pathlib
except ImportError:
    import pathlib
# non-stdlib imports
import msrestazure.azure_exceptions
# local imports
from . import crypto
from . import resource
from . import settings
from . import storage
from . import util

# create logger
logger = logging.getLogger(__name__)
util.setup_logger(logger)


def create_storage_cluster_mount_args(
        compute_client, network_client, config, sc_id, host_mount_path):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        dict, str, str) -> Tuple[str, str]
    """Create storage cluster mount arguments
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param str host_mount_path: host mount path
    :rtype: tuple
    :return: (fstab mount, storage cluster arg)
    """
    fstab_mount = None
    sc_arg = None
    # get remotefs settings
    rfs = settings.remotefs_settings(config, sc_id)
    sc = rfs.storage_cluster
    # iterate through shared data volumes and find storage clusters
    sdv = settings.global_resources_shared_data_volumes(config)
    if (sc_id not in sdv or
            not settings.is_shared_data_volume_storage_cluster(
                sdv, sc_id)):
        raise RuntimeError(
            'No storage cluster {} found in configuration'.format(sc_id))
    # get vm count
    if sc.vm_count < 1:
        raise RuntimeError(
            'storage cluster {} vm_count {} is invalid'.format(
                sc_id, sc.vm_count))
    # get fileserver type
    if sc.file_server.type == 'nfs':
        # query first vm for info
        vm_name = settings.generate_virtual_machine_name(sc, 0)
        vm = compute_client.virtual_machines.get(
            resource_group_name=sc.resource_group,
            vm_name=vm_name,
        )
        nic = resource.get_nic_from_virtual_machine(
            network_client, sc.resource_group, vm)
        # get private ip of vm
        remote_ip = nic.ip_configurations[0].private_ip_address
        # construct mount options
        mo = '_netdev,noauto,nfsvers=4,intr'
        amo = settings.shared_data_volume_mount_options(sdv, sc_id)
        if util.is_not_empty(amo):
            if 'udp' in mo:
                raise RuntimeError(
                    ('udp cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if 'auto' in mo:
                raise RuntimeError(
                    ('auto cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if any([x.startswith('nfsvers=') for x in amo]):
                raise RuntimeError(
                    ('nfsvers cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if any([x.startswith('port=') for x in amo]):
                raise RuntimeError(
                    ('port cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            mo = ','.join((mo, ','.join(amo)))
        # construct mount string for fstab
        fstab_mount = (
            '{remoteip}:{srcpath} {hmp} '
            '{fstype} {mo} 0 0').format(
                remoteip=remote_ip,
                srcpath=sc.file_server.mountpoint,
                hmp=host_mount_path,
                fstype=sc.file_server.type,
                mo=mo)
    elif sc.file_server.type == 'glusterfs':
        # walk vms and find non-overlapping ud/fds
        primary_ip = None
        primary_ud = None
        primary_fd = None
        backup_ip = None
        backup_ud = None
        backup_fd = None
        vms = {}
        # first pass, attempt to populate all ip, ud/fd
        for i in range(sc.vm_count):
            vm_name = settings.generate_virtual_machine_name(sc, i)
            vm = compute_client.virtual_machines.get(
                resource_group_name=sc.resource_group,
                vm_name=vm_name,
                expand=compute_client.virtual_machines.models.
                InstanceViewTypes.instance_view,
            )
            nic = resource.get_nic_from_virtual_machine(
                network_client, sc.resource_group, vm)
            vms[i] = (vm, nic)
            # get private ip and ud/fd of vm
            remote_ip = nic.ip_configurations[0].private_ip_address
            ud = vm.instance_view.platform_update_domain
            fd = vm.instance_view.platform_fault_domain
            if primary_ip is None:
                primary_ip = remote_ip
                primary_ud = ud
                primary_fd = fd
            if backup_ip is None:
                if (primary_ip == backup_ip or primary_ud == ud or
                        primary_fd == fd):
                    continue
                backup_ip = remote_ip
                backup_ud = ud
                backup_fd = fd
        # second pass, fill in with at least non-overlapping update domains
        if backup_ip is None:
            for i in range(sc.vm_count):
                vm, nic = vms[i]
                remote_ip = nic.ip_configurations[0].private_ip_address
                ud = vm.instance_view.platform_update_domain
                fd = vm.instance_view.platform_fault_domain
                if primary_ud != ud:
                    backup_ip = remote_ip
                    backup_ud = ud
                    backup_fd = fd
                    break
        if primary_ip is None or backup_ip is None:
            raise RuntimeError(
                'Could not find either a primary ip {} or backup ip {} for '
                'glusterfs client mount'.format(primary_ip, backup_ip))
        logger.debug('primary ip/ud/fd={} backup ip/ud/fd={}'.format(
            (primary_ip, primary_ud, primary_fd),
            (backup_ip, backup_ud, backup_fd)))
        # construct mount options
        mo = '_netdev,noauto,transport=tcp,backupvolfile-server={}'.format(
            backup_ip)
        amo = settings.shared_data_volume_mount_options(sdv, sc_id)
        if util.is_not_empty(amo):
            if 'auto' in mo:
                raise RuntimeError(
                    ('auto cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            if any([x.startswith('backupvolfile-server=') for x in amo]):
                raise RuntimeError(
                    ('backupvolfile-server cannot be specified as a mount '
                     'option for storage cluster {}').format(sc_id))
            if any([x.startswith('transport=') for x in amo]):
                raise RuntimeError(
                    ('transport cannot be specified as a mount option for '
                     'storage cluster {}').format(sc_id))
            mo = ','.join((mo, ','.join(amo)))
        # construct mount string for fstab, srcpath is the gluster volume
        fstab_mount = (
            '{remoteip}:/{srcpath} {hmp} '
            '{fstype} {mo} 0 0').format(
                remoteip=primary_ip,
                srcpath=settings.get_file_server_glusterfs_volume_name(sc),
                hmp=host_mount_path,
                fstype=sc.file_server.type,
                mo=mo)
    else:
        raise NotImplementedError(
            ('cannot handle file_server type {} for storage '
             'cluster {}').format(sc.file_server.type, sc_id))
    if util.is_none_or_empty(fstab_mount):
        raise RuntimeError(
            ('Could not construct an fstab mount entry for storage '
             'cluster {}').format(sc_id))
    # construct sc_arg
    sc_arg = '{}:{}'.format(sc.file_server.type, sc_id)
    # log config
    if settings.verbose(config):
        logger.debug('storage cluster {} fstab mount: {}'.format(
            sc_id, fstab_mount))
    return (fstab_mount, sc_arg)


def _create_managed_disk(compute_client, rfs, disk_name):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        settings.RemoteFsSettings, str) ->
    #        msrestazure.azure_operation.AzureOperationPoller
    """Create a managed disk
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param settings.RemoteFsSettings rfs: remote filesystem settings
    :param str disk_name: disk name
    :rtype: msrestazure.azure_operation.AzureOperationPoller
    :return: async operation handle
    """
    iops_rw = None
    mbps_rw = None
    if rfs.managed_disks.zone is not None:
        zone = [rfs.managed_disks.zone]
    else:
        zone = None
    if rfs.managed_disks.sku == 'standard_lrs':
        sku = compute_client.disks.models.DiskStorageAccountTypes(
            'Standard_LRS')
    elif rfs.managed_disks.sku == 'premium_lrs':
        sku = compute_client.disks.models.DiskStorageAccountTypes(
            'Premium_LRS')
    elif rfs.managed_disks.sku == 'standard_ssd_lrs':
        sku = compute_client.disks.models.DiskStorageAccountTypes(
            'StandardSSD_LRS')
    elif rfs.managed_disks.sku == 'ultra_ssd_lrs':
        sku = compute_client.disks.models.DiskStorageAccountTypes(
            'UltraSSD_LRS')
        if zone is None:
            raise ValueError(
                'Ultra SSD disks requires availabilty zones, please specify '
                'zone in configuration')
        iops_rw = rfs.managed_disks.disk_provisioned_perf_iops_rw
        mbps_rw = rfs.managed_disks.disk_provisioned_perf_mbps_rw
    logger.info(
        'creating managed disk: {} size={} GB sku={} zone={} iops_rw={} '
        'mbps_rw={}'.format(
            disk_name, rfs.managed_disks.disk_size_gb, sku, zone,
            iops_rw, mbps_rw))
    return compute_client.disks.create_or_update(
        resource_group_name=rfs.managed_disks.resource_group,
        disk_name=disk_name,
        disk=compute_client.disks.models.Disk(
            location=rfs.managed_disks.location,
            creation_data=compute_client.disks.models.CreationData(
                create_option=compute_client.disks.models.
                DiskCreateOption.empty,
            ),
            sku=compute_client.disks.models.DiskSku(
                name=sku,
            ),
            os_type=compute_client.disks.models.OperatingSystemTypes.linux,
            disk_size_gb=rfs.managed_disks.disk_size_gb,
            zones=zone,
            disk_iops_read_write=iops_rw,
            disk_mbps_read_write=mbps_rw,
        ),
    )


def create_managed_disks(resource_client, compute_client, config, wait=True):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient, dict, bool) -> None
    """Create managed disks
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param bool wait: wait for operation to complete
    """
    # retrieve remotefs settings
    rfs = settings.remotefs_settings(config)
    # create resource group if it doesn't exist
    resource.create_resource_group(
        resource_client, rfs.managed_disks.resource_group,
        rfs.managed_disks.location)
    # iterate disks and create disks if they don't exist
    existing_disk_sizes = set()
    async_ops = {}
    for disk_name in rfs.managed_disks.disk_names:
        try:
            disk = compute_client.disks.get(
                resource_group_name=rfs.managed_disks.resource_group,
                disk_name=disk_name)
            logger.debug('{} exists [created={} size={} GB]'.format(
                disk.id, disk.time_created, disk.disk_size_gb))
            existing_disk_sizes.add(disk.disk_size_gb)
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                existing_disk_sizes.add(rfs.managed_disks.disk_size_gb)
                if len(existing_disk_sizes) != 1:
                    existing_disk_sizes.discard(rfs.managed_disks.disk_size_gb)
                    raise RuntimeError(
                        ('Inconsistent disk sizes for newly created disks '
                         '({} GB) to existing disks ({} GB)').format(
                             rfs.managed_disks.disk_size_gb,
                             existing_disk_sizes)
                    )
                async_ops[disk_name] = resource.AsyncOperation(
                    functools.partial(
                        _create_managed_disk, compute_client, rfs, disk_name))
            else:
                raise
    # block for all ops to complete if specified
    # note that if wait is not specified and there is no delay, the request
    # may not get acknowledged...
    if wait:
        if len(async_ops) > 0:
            logger.debug('waiting for all {} disks to provision'.format(
                len(async_ops)))
        for disk_name in async_ops:
            disk = async_ops[disk_name].result()
            logger.info('{} created with size of {} GB'.format(
                disk.id, disk.disk_size_gb))


def delete_managed_disks(
        resource_client, compute_client, config, name, resource_group=None,
        all=False, delete_resource_group=False, wait=False,
        confirm_override=False):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient, dict, str or list,
    #        bool, bool, bool, bool) -> dict
    """Delete managed disks
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str or list name: specific disk name or list of names
    :param str resource_group: resource group of the disks
    :param bool all: delete all disks in resource group
    :param bool delete_resource_group: delete resource group
    :param bool wait: wait for operation to complete
    :param bool confirm_override: override confirmation of delete
    :rtype: dict or None
    :return: dictionary of disk names -> async ops if wait is False,
        otherwise None
    """
    # retrieve remotefs settings if necessary
    rfs = None
    if resource_group is None:
        rfs = settings.remotefs_settings(config)
        resource_group = rfs.managed_disks.resource_group
    # delete rg if specified
    if delete_resource_group:
        if (not confirm_override and not util.confirm_action(
                config, 'delete resource group {}'.format(resource_group))):
            return
        logger.info('deleting resource group {}'.format(resource_group))
        async_delete = resource_client.resource_groups.delete(
            resource_group_name=resource_group)
        if wait:
            logger.debug('waiting for resource group {} to delete'.format(
                resource_group))
            async_delete.result()
            logger.info('resource group {} deleted'.format(
                resource_group))
        return
    # set disks to delete
    if all:
        disks = [
            x[0].split('/')[-1] for x in list_disks(
                compute_client, config, resource_group=resource_group,
                restrict_scope=False)
        ]
    else:
        if util.is_none_or_empty(name):
            if rfs is None:
                rfs = settings.remotefs_settings(config)
            disks = rfs.managed_disks.disk_names
        else:
            if isinstance(name, list):
                disks = name
            else:
                disks = [name]
    # iterate disks and delete them
    async_ops = {}
    for disk_name in disks:
        if (not confirm_override and not util.confirm_action(
                config,
                'delete managed disk {} from resource group {}'.format(
                    disk_name, resource_group))):
            continue
        logger.info('deleting managed disk {} in resource group {}'.format(
            disk_name, resource_group))
        async_ops[disk_name] = resource.AsyncOperation(functools.partial(
            compute_client.disks.delete, resource_group_name=resource_group,
            disk_name=disk_name), retry_conflict=True)
    # block for all ops to complete if specified
    if wait:
        if len(async_ops) > 0:
            logger.debug('waiting for all {} disks to be deleted'.format(
                len(async_ops)))
        for disk_name in async_ops:
            async_ops[disk_name].result()
        logger.info('{} managed disks deleted in resource group {}'.format(
            len(async_ops), resource_group))
    else:
        return async_ops


def list_disks(
        compute_client, config, resource_group=None, restrict_scope=False):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str, bool) ->
    #        List[str, computemodels.StorageAccountTypes]
    """List managed disks
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str resource_group: resource group to list from
    :param bool restrict_scope: restrict scope to config
    :rtype: list
    :return list of (disk ids, disk account type)
    """
    # retrieve remotefs settings
    rfs = settings.remotefs_settings(config)
    confdisks = frozenset(rfs.managed_disks.disk_names)
    resource_group = resource_group or rfs.managed_disks.resource_group
    # list disks in resource group
    logger.debug(
        ('listing all managed disks in resource group {} '
         '[restrict_scope={}]').format(resource_group, restrict_scope))
    disks = compute_client.disks.list_by_resource_group(
        resource_group_name=resource_group)
    ret = []
    i = 0
    for disk in disks:
        if restrict_scope and disk.name not in confdisks:
            continue
        logger.info(
            '{} [provisioning_state={} created={} size={} type={}]'.format(
                disk.id, disk.provisioning_state, disk.time_created,
                disk.disk_size_gb, disk.sku.name))
        ret.append((disk.id, disk.sku.name))
        i += 1
    if i == 0:
        logger.error(
            ('no managed disks found in resource group {} '
             '[restrict_scope={}]').format(resource_group, restrict_scope))
    return ret


def _create_virtual_machine_extension(
        compute_client, rfs, bootstrap_file, blob_urls, vm_name, disks,
        private_ips, offset, verbose=False):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        settings.RemoteFsSettings, str, List[str], str, dict, List[str],
    #        int) -> msrestazure.azure_operation.AzureOperationPoller
    """Create a virtual machine extension
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param settings.RemoteFsSettings rfs: remote filesystem settings
    :param str bootstrap_file: bootstrap file
    :param list blob_urls: blob urls
    :param str vm_name: vm name
    :param dict disks: data disk map
    :param list private_ips: list of static private ips
    :param int offset: vm number
    :param bool verbose: verbose logging
    :rtype: msrestazure.azure_operation.AzureOperationPoller
    :return: msrestazure.azure_operation.AzureOperationPoller
    """
    # construct vm extensions
    vm_ext_name = settings.generate_virtual_machine_extension_name(
        rfs.storage_cluster, offset)
    # get premium storage settings
    premium = False
    for diskname in rfs.storage_cluster.vm_disk_map[offset].disk_array:
        if (disks[diskname][1] ==
                compute_client.disks.models.StorageAccountTypes.premium_lrs):
            premium = True
            break
    # construct server options
    server_options = []
    st = rfs.storage_cluster.file_server.type
    so = rfs.storage_cluster.file_server.server_options
    # special processing for gluster
    if st == 'glusterfs':
        # always create the following options if they don't exist
        server_options.append(
            settings.get_file_server_glusterfs_volume_name(
                rfs.storage_cluster))
        server_options.append(
            settings.get_file_server_glusterfs_volume_type(
                rfs.storage_cluster))
        server_options.append(
            settings.get_file_server_glusterfs_transport(
                rfs.storage_cluster))
        # process key pairs
        if st in so:
            for key in so[st]:
                if (key == 'volume_name' or key == 'volume_type' or
                        key == 'transport'):
                    continue
                server_options.append('{}:{}'.format(key, so[st][key]))
        server_options = ','.join(server_options)
    elif st == 'nfs':
        try:
            nfs_hosts = so[st]
        except KeyError:
            nfs_hosts = None
        if util.is_none_or_empty(nfs_hosts):
            nfs_hosts = {'*': []}
        nfs_exports = []
        for host in nfs_hosts:
            opt = []
            for eo in nfs_hosts[host]:
                if (not eo.startswith('mountpath=') and
                        not eo.startswith('mp=')):
                    opt.append(eo)
            if util.is_none_or_empty(opt):
                opt.extend(['rw', 'sync', 'root_squash', 'no_subtree_check'])
            nfs_exports.append('{}%{}'.format(host, ','.join(opt)))
        server_options = ';'.join(nfs_exports)
        del nfs_hosts
        del nfs_exports
    logger.debug('server options: {}'.format(server_options))
    # create samba option
    if util.is_not_empty(rfs.storage_cluster.file_server.samba.share_name):
        samba = rfs.storage_cluster.file_server.samba
        smb = '{share}:{user}:{pw}:{uid}:{gid}:{ro}:{cm}:{dm}'.format(
            share=samba.share_name,
            user=samba.account.username,
            pw=samba.account.password,
            uid=samba.account.uid,
            gid=samba.account.gid,
            ro=samba.read_only,
            cm=samba.create_mask,
            dm=samba.directory_mask,
        )
    else:
        smb = None
    # construct bootstrap command
    if rfs.storage_cluster.prometheus.ne_enabled:
        if util.is_not_empty(rfs.storage_cluster.prometheus.ne_options):
            pneo = ','.join(rfs.storage_cluster.prometheus.ne_options)
        else:
            pneo = ''
        promopt = ' -e \'{},{}\''.format(
            rfs.storage_cluster.prometheus.ne_port, pneo)
        del pneo
    else:
        promopt = ''
    cmd = './{bsf} {c}{d}{e}{f}{i}{m}{n}{o}{p}{r}{s}{t}'.format(
        bsf=bootstrap_file,
        c=' -c \'{}\''.format(smb) if util.is_not_empty(smb) else '',
        d=' -d {}'.format(rfs.storage_cluster.hostname_prefix),
        e=promopt,
        f=' -f {}'.format(rfs.storage_cluster.vm_disk_map[offset].filesystem),
        i=' -i {}'.format(
            ','.join(private_ips)) if util.is_not_empty(private_ips) else '',
        m=' -m {}'.format(rfs.storage_cluster.file_server.mountpoint),
        n=' -n' if settings.can_tune_tcp(rfs.storage_cluster.vm_size) else '',
        o=' -o \'{}\''.format(server_options) if util.is_not_empty(
            server_options) else '',
        p=' -p' if premium else '',
        r=' -r {}'.format(rfs.storage_cluster.vm_disk_map[offset].raid_level),
        s=' -s {}'.format(rfs.storage_cluster.file_server.type),
        t=' -t {}'.format(
            ','.join(rfs.storage_cluster.file_server.mount_options)
            if util.is_not_empty(rfs.storage_cluster.file_server.mount_options)
            else ''))
    if verbose:
        logger.debug('bootstrap command: {}'.format(cmd))
    logger.debug('creating virtual machine extension: {}'.format(vm_ext_name))
    return compute_client.virtual_machine_extensions.create_or_update(
        resource_group_name=rfs.storage_cluster.resource_group,
        vm_name=vm_name,
        vm_extension_name=vm_ext_name,
        extension_parameters=compute_client.virtual_machine_extensions.models.
        VirtualMachineExtension(
            location=rfs.storage_cluster.location,
            publisher='Microsoft.Azure.Extensions',
            virtual_machine_extension_type='CustomScript',
            type_handler_version='2.0',
            auto_upgrade_minor_version=True,
            settings={
                'fileUris': blob_urls,
            },
            protected_settings={
                'commandToExecute': cmd,
                'storageAccountName': storage.get_storageaccount(),
                'storageAccountKey': storage.get_storageaccount_key(),
            },
        ),
    )


def create_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        sc_id, bootstrap_file, remotefs_files):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str, str,
    #        List[tuple]) -> None
    """Create a storage cluster
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param str sc_id: storage cluster id
    :param str bootstrap_file: customscript bootstrap file
    :param list remotefs_files: remotefs shell scripts
    :param dict config: configuration dict
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # check if cluster already exists
    logger.debug('checking if storage cluster {} exists'.format(sc_id))
    # construct disk map
    disk_map = {}
    try:
        disk_names = list_disks(compute_client, config, restrict_scope=True)
        for disk_id, sat in disk_names:
            disk_map[disk_id.split('/')[-1]] = (disk_id, sat)
        del disk_names
    except msrestazure.azure_exceptions.CloudError:
        logger.error(
            'could not enumerate required disks for storage cluster {}'.format(
                sc_id))
        raise
    # check vms
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
            raise RuntimeError(
                'Existing virtual machine {} found, cannot add this '
                'storage cluster'.format(vm.id))
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                pass
            else:
                raise
        # check if all referenced managed disks exist and premium sku
        # is specified if premium disk
        for disk in rfs.storage_cluster.vm_disk_map[i].disk_array:
            if disk not in disk_map:
                raise RuntimeError(
                    ('Referenced managed disk {} unavailable in set {} for '
                     'vm offset {}').format(disk, disk_map, i))
            if (disk_map[disk][1] ==
                    compute_client.disks.models.
                    StorageAccountTypes.premium_lrs and
                    not settings.is_premium_storage_vm_size(
                        rfs.storage_cluster.vm_size)):
                raise RuntimeError(
                    ('Premium storage requires premium storage capable '
                     'vm_size instead of {}'.format(
                         rfs.storage_cluster.vm_size)))
    # confirm before proceeding
    if not util.confirm_action(
            config, 'create storage cluster {}'.format(sc_id)):
        return
    # create resource group if it doesn't exist
    resource.create_resource_group(
        resource_client, rfs.storage_cluster.resource_group,
        rfs.storage_cluster.location)
    # create storage container
    storage.create_storage_containers_nonbatch(
        blob_client, None, None, 'remotefs')
    # upload scripts to blob storage for customscript vm extension
    blob_urls = storage.upload_for_nonbatch(
        blob_client, remotefs_files, 'remotefs')
    # async operation dictionary
    async_ops = {}
    # create nsg
    async_ops['nsg'] = resource.AsyncOperation(functools.partial(
        resource.create_network_security_group, network_client,
        rfs.storage_cluster))
    # create static private ip block
    if rfs.storage_cluster.file_server.type == 'nfs':
        private_ips = None
        logger.debug('using dynamic private ip address allocation')
    else:
        # follow Azure numbering scheme: start offset at 4
        private_ips = [
            x for x in util.ip_from_address_prefix(
                rfs.storage_cluster.virtual_network.subnet_address_prefix,
                start_offset=4,
                max=rfs.storage_cluster.vm_count)
        ]
        logger.debug('static private ip addresses to assign: {}'.format(
            private_ips))
    # create virtual network and subnet if specified
    vnet, subnet = resource.create_virtual_network_and_subnet(
        resource_client, network_client,
        rfs.storage_cluster.virtual_network.resource_group,
        rfs.storage_cluster.location,
        rfs.storage_cluster.virtual_network)
    # create public ips
    pips = None
    if rfs.storage_cluster.public_ip.enabled:
        async_ops['pips'] = {}
        for i in range(rfs.storage_cluster.vm_count):
            async_ops['pips'][i] = resource.AsyncOperation(functools.partial(
                resource.create_public_ip, network_client,
                rfs.storage_cluster, i))
        logger.debug('waiting for public ips to provision')
        pips = {}
        for offset in async_ops['pips']:
            pip = async_ops['pips'][offset].result()
            logger.info(
                ('public ip: {} [provisioning_state={} ip_address={} '
                 'public_ip_allocation={}]').format(
                     pip.id, pip.provisioning_state,
                     pip.ip_address, pip.public_ip_allocation_method))
            pips[offset] = pip
    else:
        logger.info('public ip is disabled for storage cluster: {}'.format(
            sc_id))
    # get nsg
    logger.debug('waiting for network security group to provision')
    nsg = async_ops['nsg'].result()
    # create nics
    async_ops['nics'] = {}
    for i in range(rfs.storage_cluster.vm_count):
        async_ops['nics'][i] = resource.AsyncOperation(functools.partial(
            resource.create_network_interface, network_client,
            rfs.storage_cluster, subnet, nsg, private_ips, pips, i))
    # create availability set if vm_count > 1, this call is not async
    availset = resource.create_availability_set(
        compute_client, rfs.storage_cluster, rfs.storage_cluster.vm_count,
        fault_domains=rfs.storage_cluster.fault_domains)
    # wait for nics to be created
    logger.debug('waiting for network interfaces to provision')
    nics = {}
    for offset in async_ops['nics']:
        nic = async_ops['nics'][offset].result()
        logger.info(
            ('network interface: {} [provisioning_state={} private_ip={} '
             'private_ip_allocation_method={} network_security_group={} '
             'accelerated_networking={}]').format(
                 nic.id, nic.provisioning_state,
                 nic.ip_configurations[0].private_ip_address,
                 nic.ip_configurations[0].private_ip_allocation_method,
                 nsg.name if nsg is not None else None,
                 nic.enable_accelerated_networking))
        nics[offset] = nic
    # read or generate ssh keys
    if util.is_not_empty(rfs.storage_cluster.ssh.ssh_public_key_data):
        key_data = rfs.storage_cluster.ssh.ssh_public_key_data
    else:
        # create universal ssh key for all vms if not specified
        ssh_pub_key = rfs.storage_cluster.ssh.ssh_public_key
        if ssh_pub_key is None:
            _, ssh_pub_key = crypto.generate_ssh_keypair(
                rfs.storage_cluster.ssh.generated_file_export_path,
                crypto.get_remotefs_ssh_key_prefix())
        # read public key data
        with ssh_pub_key.open('rb') as fd:
            key_data = fd.read().decode('utf8')
    ssh_pub_key = compute_client.virtual_machines.models.SshPublicKey(
        path='/home/{}/.ssh/authorized_keys'.format(
            rfs.storage_cluster.ssh.username),
        key_data=key_data,
    )
    # create vms
    async_ops['vms'] = {}
    for i in range(rfs.storage_cluster.vm_count):
        async_ops['vms'][i] = resource.AsyncOperation(functools.partial(
            resource.create_virtual_machine, compute_client,
            rfs.storage_cluster, availset, nics, disk_map, ssh_pub_key, i))
    # wait for vms to be created
    logger.info(
        'waiting for {} virtual machines to provision'.format(
            len(async_ops['vms'])))
    vms = {}
    for offset in async_ops['vms']:
        vms[offset] = async_ops['vms'][offset].result()
    logger.debug('{} virtual machines created'.format(len(vms)))
    # wait for all vms to be created before installing extensions to prevent
    # variability in wait times and timeouts during customscript
    async_ops['vmext'] = {}
    for i in range(rfs.storage_cluster.vm_count):
        # install vm extension
        async_ops['vmext'][i] = resource.AsyncOperation(
            functools.partial(
                _create_virtual_machine_extension, compute_client, rfs,
                bootstrap_file, blob_urls, vms[i].name, disk_map,
                private_ips, i, settings.verbose(config)),
            max_retries=0,
        )
    logger.debug('waiting for virtual machine extensions to provision')
    for offset in async_ops['vmext']:
        # get ip info for vm
        if util.is_none_or_empty(pips):
            ipinfo = 'private_ip_address={}'.format(
                nics[offset].ip_configurations[0].private_ip_address)
        else:
            # refresh public ip for vm
            pip = network_client.public_ip_addresses.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                public_ip_address_name=pips[offset].name,
            )
            ipinfo = 'fqdn={} public_ip_address={}'.format(
                pip.dns_settings.fqdn, pip.ip_address)
        # get vm extension result
        vm_ext = async_ops['vmext'][offset].result()
        vm = vms[offset]
        logger.info(
            ('virtual machine: {} [provisioning_state={}/{} '
             'vm_size={} {}]').format(
                vm.id, vm.provisioning_state, vm_ext.provisioning_state,
                vm.hardware_profile.vm_size, ipinfo))


def resize_storage_cluster(
        compute_client, network_client, blob_client, config, sc_id,
        bootstrap_file, addbrick_file, remotefs_files):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, str, str,
    #        list) -> bool
    """Resize a storage cluster (increase size only for now)
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param str bootstrap_file: bootstrap file
    :param str addbrick_file: glusterfs addbrick file
    :param list remotefs_files: remotefs files to upload
    :rtype: bool
    :return: if cluster was resized
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # if storage cluster is not glusterfs, exit
    if rfs.storage_cluster.file_server.type != 'glusterfs':
        raise ValueError(
            'Resize is only supported on glusterfs storage clusters')
    # only allow certain types of resizes to proceed
    # for now disallow resize on all stripe volumes, can be relaxed in
    # the future
    voltype = settings.get_file_server_glusterfs_volume_type(
        rfs.storage_cluster).lower()
    if 'stripe' in voltype:
        raise RuntimeError('Cannot resize glusterfs striped volumes')
    # construct disk map
    disk_map = {}
    disk_names = list_disks(compute_client, config, restrict_scope=True)
    for disk_id, sat in disk_names:
        disk_map[disk_id.split('/')[-1]] = (disk_id, sat)
    del disk_names
    # get existing vms
    new_vms = []
    pe_vms = {}
    all_pe_disks = set()
    vnet_name = None
    subnet_name = None
    nsg_name = None
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                new_vms.append(i)
                continue
            else:
                raise
        entry = {
            'vm': vm,
            'disks': set(),
        }
        for dd in vm.storage_profile.data_disks:
            entry['disks'].add(dd.name)
            all_pe_disks.add(dd.name.lower())
        # get vnet, subnet, nsg names
        if vnet_name is None or subnet_name is None or nsg_name is None:
            _, _, subnet_name, vnet_name, nsg_name = \
                resource.get_resource_names_from_virtual_machine(
                    compute_client, network_client, rfs.storage_cluster, vm)
        # add vm to map
        pe_vms[i] = entry
    # check early return conditions
    if len(new_vms) == 0:
        logger.warning(
            'no new virtual machines to add in storage cluster {}'.format(
                sc_id))
        return False
    # ensure that new disks to add are not already attached and
    # are provisioned
    for i in new_vms:
        for disk in rfs.storage_cluster.vm_disk_map[i].disk_array:
            if disk.lower() in all_pe_disks:
                raise RuntimeError(
                    'Disk {} for new VM {} is already attached'.format(
                        disk, i))
            # check disks for new vms are provisioned
            if disk not in disk_map:
                raise RuntimeError(
                    ('Disk {} for new VM {} is not provisioned in '
                     'resource group {}').format(
                         disk, i, rfs.storage_cluster.resource_group))
    logger.warning(
        ('**WARNING** cluster resize is an experimental feature and may lead '
         'to data loss, unavailability or an unrecoverable state for '
         'the storage cluster {}.'.format(sc_id)))
    # confirm before proceeding
    if not util.confirm_action(
            config, 'resize storage cluster {}'.format(sc_id)):
        return False
    # re-create storage container in case it got deleted
    storage.create_storage_containers_nonbatch(
        blob_client, None, None, 'remotefs')
    # upload scripts to blob storage for customscript vm extension
    blob_urls = storage.upload_for_nonbatch(
        blob_client, remotefs_files, 'remotefs')
    # create static private ip block, start offset at 4
    private_ips = [
        x for x in util.ip_from_address_prefix(
            rfs.storage_cluster.virtual_network.subnet_address_prefix,
            start_offset=4,
            max=rfs.storage_cluster.vm_count)
    ]
    logger.debug('static private ip block: {}'.format(private_ips))
    async_ops = {}
    # create public ips
    if rfs.storage_cluster.public_ip.enabled:
        async_ops['pips'] = {}
        for i in new_vms:
            async_ops['pips'][i] = resource.AsyncOperation(functools.partial(
                resource.create_public_ip, network_client,
                rfs.storage_cluster, i))
    else:
        logger.info('public ip is disabled for storage cluster: {}'.format(
            sc_id))
    # get subnet and nsg objects
    subnet = network_client.subnets.get(
        resource_group_name=rfs.storage_cluster.resource_group,
        virtual_network_name=vnet_name,
        subnet_name=subnet_name,
    )
    nsg = network_client.network_security_groups.get(
        resource_group_name=rfs.storage_cluster.resource_group,
        network_security_group_name=nsg_name,
    )
    # get ssh login info of prober vm
    ssh_info = None
    for i in pe_vms:
        vm = pe_vms[i]['vm']
        ssh_info = _get_ssh_info(
            compute_client, network_client, config, sc_id, None, vm.name)
        break
    if settings.verbose(config):
        logger.debug('prober vm: {}'.format(ssh_info))
    # wait for public ips
    pips = None
    if 'pips' in async_ops:
        logger.debug('waiting for public ips to provision')
        pips = {}
        for offset in async_ops['pips']:
            pip = async_ops['pips'][offset].result()
            logger.info(
                ('public ip: {} [provisioning_state={} ip_address={} '
                 'public_ip_allocation={}]').format(
                     pip.id, pip.provisioning_state,
                     pip.ip_address, pip.public_ip_allocation_method))
            pips[offset] = pip
    # create nics
    nics = {}
    async_ops['nics'] = {}
    for i in new_vms:
        async_ops['nics'][i] = resource.AsyncOperation(functools.partial(
            resource.create_network_interface, network_client,
            rfs.storage_cluster, subnet, nsg, private_ips, pips, i))
    # get availability set
    availset = compute_client.availability_sets.get(
        resource_group_name=rfs.storage_cluster.resource_group,
        availability_set_name=settings.generate_availability_set_name(
            rfs.storage_cluster),
    )
    # wait for nics to be created
    logger.debug('waiting for network interfaces to provision')
    for offset in async_ops['nics']:
        nic = async_ops['nics'][offset].result()
        logger.info(
            ('network interface: {} [provisioning_state={} private_ip={} '
             'private_ip_allocation_method={} network_security_group={} '
             'accelerated={}]').format(
                 nic.id, nic.provisioning_state,
                 nic.ip_configurations[0].private_ip_address,
                 nic.ip_configurations[0].private_ip_allocation_method,
                 nsg.name if nsg is not None else None,
                 nic.enable_accelerated_networking))
        nics[offset] = nic
    # read or generate ssh keys
    if util.is_not_empty(rfs.storage_cluster.ssh.ssh_public_key_data):
        key_data = rfs.storage_cluster.ssh.ssh_public_key_data
    else:
        # create universal ssh key for all vms if not specified
        ssh_pub_key = rfs.storage_cluster.ssh.ssh_public_key
        if ssh_pub_key is None:
            # check if ssh key exists first in default location
            ssh_pub_key = pathlib.Path(
                rfs.storage_cluster.ssh.generated_file_export_path,
                crypto.get_remotefs_ssh_key_prefix() + '.pub')
            if not ssh_pub_key.exists():
                _, ssh_pub_key = crypto.generate_ssh_keypair(
                    rfs.storage_cluster.ssh.generated_file_export_path,
                    crypto.get_remotefs_ssh_key_prefix())
        # read public key data
        with ssh_pub_key.open('rb') as fd:
            key_data = fd.read().decode('utf8')
    ssh_pub_key = compute_client.virtual_machines.models.SshPublicKey(
        path='/home/{}/.ssh/authorized_keys'.format(
            rfs.storage_cluster.ssh.username),
        key_data=key_data,
    )
    # create vms
    async_ops['vms'] = {}
    for i in new_vms:
        async_ops['vms'][i] = resource.AsyncOperation(functools.partial(
            resource.create_virtual_machine, compute_client,
            rfs.storage_cluster, availset, nics, disk_map, ssh_pub_key, i))
    # gather all new private ips
    new_private_ips = {}
    for offset in nics:
        new_private_ips[offset] = nics[
            offset].ip_configurations[0].private_ip_address
    if settings.verbose(config):
        logger.debug('new private ips: {}'.format(new_private_ips))
    # wait for vms to be created
    logger.info(
        'waiting for {} virtual machines to provision'.format(
            len(async_ops['vms'])))
    vm_hostnames = []
    vms = {}
    for offset in async_ops['vms']:
        vms[offset] = async_ops['vms'][offset].result()
        # generate vm names in list
        vm_hostnames.append(settings.generate_virtual_machine_name(
            rfs.storage_cluster, offset))
    logger.debug('{} virtual machines created: {}'.format(
        len(vms), vm_hostnames))
    # wait for all vms to be created before installing extensions to prevent
    # variability in wait times and timeouts during customscript
    async_ops['vmext'] = {}
    for i in new_vms:
        # install vm extension
        async_ops['vmext'][i] = resource.AsyncOperation(
            functools.partial(
                _create_virtual_machine_extension, compute_client, rfs,
                bootstrap_file, blob_urls, vms[i].name, disk_map, private_ips,
                i, settings.verbose(config)),
            max_retries=0,
        )
    logger.debug(
        'adding {} bricks to gluster volume, this may take a while'.format(
            len(async_ops['vmext'])))
    # execute special add brick script
    script_cmd = '/opt/batch-shipyard/{asf} {c}{d}{i}{n}{v}'.format(
        asf=addbrick_file,
        c=' -c {}'.format(rfs.storage_cluster.vm_count),
        d=' -d {}'.format(','.join(vm_hostnames)),
        i=' -i {}'.format(','.join(list(new_private_ips.values()))),
        n=' -n {}'.format(
            settings.get_file_server_glusterfs_volume_name(
                rfs.storage_cluster)),
        v=' -v \'{}\''.format(voltype),
    )
    if settings.verbose(config):
        logger.debug('add brick command: {}'.format(script_cmd))
    ssh_priv_key, port, username, ip = ssh_info
    proc = crypto.connect_or_exec_ssh_command(
        ip, port, ssh_priv_key, username, sync=False,
        command=['sudo', script_cmd])
    stdout, stderr = proc.communicate()
    logline = 'add brick script completed with ec={}'.format(proc.returncode)
    if util.is_not_empty(stdout):
        if util.on_python2():
            stdout = stdout.decode('utf8')
        if util.on_windows():
            stdout = stdout.replace('\n', os.linesep)
    if util.is_not_empty(stderr):
        if util.on_python2():
            stderr = stderr.decode('utf8')
        if util.on_windows():
            stderr = stderr.replace('\n', os.linesep)
    if proc.returncode != 0:
        logger.error(logline)
        logger.error('add brick stdout:{}{}'.format(os.linesep, stdout))
        logger.error('add brick stderr:{}{}'.format(os.linesep, stderr))
    else:
        logger.info(logline)
        logger.debug('add brick stdout:{}{}'.format(os.linesep, stdout))
    del logline
    del stdout
    del stderr
    # wait for new vms to finish custom script extension processing
    logger.debug('waiting for virtual machine extensions to provision')
    for offset in async_ops['vmext']:
        # get ip info for vm
        if util.is_none_or_empty(pips):
            ipinfo = 'private_ip_address={}'.format(
                nics[offset].ip_configurations[0].private_ip_address)
        else:
            # refresh public ip for vm
            pip = network_client.public_ip_addresses.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                public_ip_address_name=pips[offset].name,
            )
            ipinfo = 'fqdn={} public_ip_address={}'.format(
                pip.dns_settings.fqdn, pip.ip_address)
        # get vm extension result
        vm_ext = async_ops['vmext'][offset].result()
        vm = vms[offset]
        logger.info(
            ('virtual machine: {} [provisioning_state={}/{} '
             'vm_size={} {}]').format(
                vm.id, vm.provisioning_state, vm_ext.provisioning_state,
                vm.hardware_profile.vm_size, ipinfo))
    if proc.returncode == 0:
        logger.info('storage cluster {} resized'.format(sc_id))
    else:
        logger.critical('failed to resize cluster {}'.format(sc_id))


def expand_storage_cluster(
        compute_client, network_client, config, sc_id, bootstrap_file,
        rebalance=False):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, str,
    #        bool) -> bool
    """Expand a storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param str bootstrap_file: bootstrap file
    :param bool rebalance: rebalance filesystem
    :rtype: bool
    :return: if cluster was expanded
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # check if cluster exists
    logger.debug('checking if storage cluster {} exists'.format(sc_id))
    # construct disk map
    disk_map = {}
    disk_names = list_disks(compute_client, config, restrict_scope=True)
    for disk_id, sat in disk_names:
        disk_map[disk_id.split('/')[-1]] = (disk_id, sat)
    del disk_names
    # check vms
    vms = {}
    new_disk_count = 0
    mdadm_expand = False
    for i in range(rfs.storage_cluster.vm_count):
        # check if this vm filesystem supports expanding
        if (rfs.storage_cluster.vm_disk_map[i].filesystem != 'btrfs' and
                rfs.storage_cluster.vm_disk_map[i].raid_level == 0):
            mdadm_expand = True
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                raise RuntimeError(
                    'Virtual machine {} not found, cannot expand this '
                    'storage cluster'.format(vm_name))
            else:
                raise
        # create entry
        entry = {
            'vm': vm,
            'pe_disks': {
                'names': set(),
                'luns': [],
            },
            'new_disks': [],
        }
        # get attached disks
        for dd in vm.storage_profile.data_disks:
            entry['pe_disks']['names'].add(dd.name)
            entry['pe_disks']['luns'].append(dd.lun)
        # check if all referenced managed disks exist
        for disk in rfs.storage_cluster.vm_disk_map[i].disk_array:
            if disk not in disk_map:
                raise RuntimeError(
                    ('Referenced managed disk {} unavailable in set {} for '
                     'vm offset {}. Ensure that this disk has been '
                     'provisioned first.').format(disk, disk_map, i))
            if disk not in entry['pe_disks']['names']:
                entry['new_disks'].append(disk)
                new_disk_count += 1
        # check for proper raid setting and number of disks
        pe_len = len(entry['pe_disks']['names'])
        if pe_len <= 1 or rfs.storage_cluster.vm_disk_map[i].raid_level != 0:
            raise RuntimeError(
                'Cannot expand array from {} disk(s) or RAID level {}'.format(
                    pe_len, rfs.storage_cluster.vm_disk_map[i].raid_level))
        # add vm to map
        vms[i] = entry
    # check early return conditions
    if len(vms) == 0:
        logger.warning(
            'no virtual machines to expand in storage cluster {}'.format(
                sc_id))
        return False
    if settings.verbose(config):
        logger.debug('expand settings:{}{}'.format(os.linesep, vms))
    if new_disk_count == 0:
        logger.error(
            'no new disks detected for storage cluster {}'.format(sc_id))
        return False
    if mdadm_expand:
        logger.warning(
            '**WARNING** cluster expansion is being performed on mdadm-based '
            'RAID arrays. This feature is experimental and can take an '
            'extremely long time. Any interruption or unrecoverable '
            'failure can result in data loss.')
    del mdadm_expand
    # confirm before proceeding
    if not util.confirm_action(
            config, 'expand storage cluster {}'.format(sc_id)):
        return False
    # attach new data disks to each vm
    async_ops = {}
    for key in vms:
        entry = vms[key]
        if len(entry['new_disks']) == 0:
            logger.debug('no new disks to attach to virtual machine {}'.format(
                vm.id))
            continue
        vm = entry['vm']
        premium = False
        # sort lun array and get last element
        lun = sorted(entry['pe_disks']['luns'])[-1] + 1
        for diskname in entry['new_disks']:
            if (disk_map[diskname][1] ==
                    compute_client.disks.models.
                    StorageAccountTypes.premium_lrs):
                premium = True
            vm.storage_profile.data_disks.append(
                compute_client.disks.models.DataDisk(
                    lun=lun,
                    name=diskname,
                    create_option=compute_client.disks.models.
                    DiskCreateOptionTypes.attach,
                    managed_disk=compute_client.disks.models.
                    ManagedDiskParameters(
                        id=disk_map[diskname][0],
                    ),
                )
            )
            lun += 1
        logger.info(
            ('attaching {} additional data disks {} to virtual '
             'machine {}').format(
                len(entry['new_disks']), entry['new_disks'], vm.name))
        # update vm
        async_ops[key] = (
            premium,
            resource.AsyncOperation(functools.partial(
                compute_client.virtual_machines.create_or_update,
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm.name, parameters=vm))
        )
    # wait for async ops to complete
    if len(async_ops) == 0:
        logger.error('no operations started for expansion')
        return False
    logger.debug(
        'waiting for disks to attach to virtual machines and expanding '
        'the volume; please be patient as this can take a very long time')
    for offset in async_ops:
        premium, op = async_ops[offset]
        vm = op.result()
        vms[offset]['vm'] = vm
        # execute bootstrap script via ssh
        script_cmd = \
            '/opt/batch-shipyard/{bsf} {a}{b}{d}{f}{m}{p}{r}{s}'.format(
                bsf=bootstrap_file,
                a=' -a',
                b=' -b' if rebalance else '',
                d=' -d {}'.format(rfs.storage_cluster.hostname_prefix),
                f=' -f {}'.format(
                    rfs.storage_cluster.vm_disk_map[offset].filesystem),
                m=' -m {}'.format(
                    rfs.storage_cluster.file_server.mountpoint),
                p=' -p' if premium else '',
                r=' -r {}'.format(
                    rfs.storage_cluster.vm_disk_map[offset].raid_level),
                s=' -s {}'.format(rfs.storage_cluster.file_server.type),
            )
        ssh_priv_key, port, username, ip = _get_ssh_info(
            compute_client, network_client, config, sc_id, None, vm.name)
        if settings.verbose(config):
            logger.debug('bootstrap command: {}'.format(script_cmd))
        proc = crypto.connect_or_exec_ssh_command(
            ip, port, ssh_priv_key, username, sync=False,
            command=['sudo', script_cmd])
        stdout, stderr = proc.communicate()
        if util.is_not_empty(stdout):
            if util.on_python2():
                stdout = stdout.decode('utf8')
            if util.on_windows():
                stdout = stdout.replace('\n', os.linesep)
        if util.is_not_empty(stderr):
            if util.on_python2():
                stderr = stderr.decode('utf8')
            if util.on_windows():
                stderr = stderr.replace('\n', os.linesep)
        vms[offset]['status'] = proc.returncode
        vms[offset]['stdout'] = '>>stdout>> {}:{}{}'.format(
            vm.name, os.linesep, stdout)
        vms[offset]['stderr'] = '>>stderr>> {}:{}{}'.format(
            vm.name, os.linesep, stderr)
    logger.info('disk attach operations completed')
    succeeded = True
    for key in vms:
        entry = vms[key]
        vm = entry['vm']
        log = 'bootstrap exit code for virtual machine {}: {}'.format(
            vm.name, entry['status'])
        if entry['status'] == 0:
            logger.info(log)
            logger.debug(entry['stdout'])
        else:
            logger.error(log)
            logger.error(entry['stdout'])
            logger.error(entry['stderr'])
            succeeded = False
    if succeeded:
        logger.info('storage cluster {} expanded'.format(sc_id))
    else:
        logger.critical('failed to expand cluster {}'.format(sc_id))
    return succeeded


def delete_storage_cluster(
        resource_client, compute_client, network_client, blob_client, config,
        sc_id, delete_data_disks=False, delete_virtual_network=False,
        delete_resource_group=False, generate_from_prefix=False, wait=False):
    # type: (azure.mgmt.resource.resources.ResourceManagementClient,
    #        azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient,
    #        azure.storage.blob.BlockBlobService, dict, str, bool,
    #        bool, bool, bool, bool) -> None
    """Delete a storage cluster
    :param azure.mgmt.resource.resources.ResourceManagementClient
        resource_client: resource client
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param azure.storage.blob.BlockBlobService blob_client: blob client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param bool delete_data_disks: delete managed data disks
    :param bool delete_virtual_network: delete vnet
    :param bool delete_resource_group: delete resource group
    :param bool generate_from_prefix: generate resources from hostname prefix
    :param bool wait: wait for completion
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # delete rg if specified
    if delete_resource_group:
        if util.confirm_action(
                config, 'delete resource group {}'.format(
                    rfs.storage_cluster.resource_group)):
            logger.info('deleting resource group {}'.format(
                rfs.storage_cluster.resource_group))
            async_delete = resource_client.resource_groups.delete(
                resource_group_name=rfs.storage_cluster.resource_group)
            if wait:
                logger.debug('waiting for resource group {} to delete'.format(
                    rfs.storage_cluster.resource_group))
                async_delete.result()
                logger.info('resource group {} deleted'.format(
                    rfs.storage_cluster.resource_group))
        return
    if not util.confirm_action(
            config, 'delete storage cluster {}'.format(sc_id)):
        return
    # get vms and cache for concurent async ops
    resources = {}
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                logger.warning('virtual machine {} not found'.format(vm_name))
                if generate_from_prefix:
                    logger.warning(
                        'OS and data disks for this virtual machine will not '
                        'be deleted, please use "fs disks del" to delete '
                        'those resources if desired')
                    resources[i] = {
                        'vm': settings.generate_virtual_machine_name(
                            rfs.storage_cluster, i),
                        'as': None,
                        'nic': settings.generate_network_interface_name(
                            rfs.storage_cluster, i),
                        'pip': settings.generate_public_ip_name(
                            rfs.storage_cluster, i),
                        'subnet': None,
                        'nsg': settings.generate_network_security_group_name(
                            rfs.storage_cluster),
                        'vnet': None,
                        'os_disk': None,
                        'data_disks': [],
                    }
                    if rfs.storage_cluster.vm_count > 1:
                        resources[i]['as'] = \
                            settings.generate_availability_set_name(
                                rfs.storage_cluster)
                continue
            else:
                raise
        else:
            # get resources connected to vm
            nic, pip, subnet, vnet, nsg = \
                resource.get_resource_names_from_virtual_machine(
                    compute_client, network_client, rfs.storage_cluster, vm)
            resources[i] = {
                'vm': vm.name,
                'arm_id': vm.id,
                'id': vm.vm_id,
                'as': None,
                'nic': nic,
                'pip': pip,
                'subnet': subnet,
                'nsg': nsg,
                'vnet': vnet,
                'os_disk': vm.storage_profile.os_disk.name,
                'data_disks': [],
            }
            # populate availability set
            if vm.availability_set is not None:
                resources[i]['as'] = vm.availability_set.id.split('/')[-1]
            # populate data disks
            if delete_data_disks:
                for disk in vm.storage_profile.data_disks:
                    resources[i]['data_disks'].append(disk.name)
            # unset virtual network if not specified to delete
            if not delete_virtual_network:
                resources[i]['subnet'] = None
                resources[i]['vnet'] = None
    if len(resources) == 0:
        logger.warning('no resources deleted')
        return
    if settings.verbose(config):
        logger.debug('deleting the following resources:{}{}'.format(
            os.linesep, json.dumps(resources, sort_keys=True, indent=4)))
    # delete storage container
    storage.delete_storage_containers_nonbatch(
        blob_client, None, None, 'remotefs')
    # create async op holder
    async_ops = {}
    # delete vms
    async_ops['vms'] = {}
    for key in resources:
        vm_name = resources[key]['vm']
        async_ops['vms'][vm_name] = resource.AsyncOperation(functools.partial(
            resource.delete_virtual_machine, compute_client,
            rfs.storage_cluster.resource_group, vm_name), retry_conflict=True)
    logger.info(
        'waiting for {} virtual machines to delete'.format(
            len(async_ops['vms'])))
    for vm_name in async_ops['vms']:
        async_ops['vms'][vm_name].result()
    logger.info('{} virtual machines deleted'.format(len(async_ops['vms'])))
    # delete nics
    async_ops['nics'] = {}
    for key in resources:
        nic_name = resources[key]['nic']
        async_ops['nics'][nic_name] = resource.AsyncOperation(
            functools.partial(
                resource.delete_network_interface, network_client,
                rfs.storage_cluster.resource_group, nic_name),
            retry_conflict=True
        )
    # wait for nics to delete
    logger.debug('waiting for {} network interfaces to delete'.format(
        len(async_ops['nics'])))
    for nic_name in async_ops['nics']:
        async_ops['nics'][nic_name].result()
    logger.info('{} network interfaces deleted'.format(len(async_ops['nics'])))
    # delete data disks if specified
    async_ops['data_disks'] = []
    for key in resources:
        data_disks = resources[key]['data_disks']
        if util.is_none_or_empty(data_disks):
            continue
        if len(data_disks) > 0:
            async_ops['data_disks'].append(delete_managed_disks(
                resource_client, compute_client, config, data_disks,
                resource_group=rfs.managed_disks.resource_group, wait=False))
    # delete os disks
    async_ops['os_disk'] = []
    for key in resources:
        os_disk = resources[key]['os_disk']
        if util.is_none_or_empty(os_disk):
            continue
        async_ops['os_disk'].append(delete_managed_disks(
            resource_client, compute_client, config, os_disk,
            resource_group=rfs.storage_cluster.resource_group, wait=False,
            confirm_override=True))
    # delete nsg
    deleted = set()
    async_ops['nsg'] = {}
    for key in resources:
        nsg_name = resources[key]['nsg']
        if nsg_name in deleted:
            continue
        deleted.add(nsg_name)
        async_ops['nsg'][nsg_name] = resource.AsyncOperation(functools.partial(
            resource.delete_network_security_group, network_client,
            rfs.storage_cluster.resource_group, nsg_name), retry_conflict=True)
    deleted.clear()
    # delete public ips
    async_ops['pips'] = {}
    for key in resources:
        pip_name = resources[key]['pip']
        if util.is_none_or_empty(pip_name):
            continue
        async_ops['pips'][pip_name] = resource.AsyncOperation(
            functools.partial(
                resource.delete_public_ip, network_client,
                rfs.storage_cluster.resource_group, pip_name),
            retry_conflict=True
        )
    logger.debug('waiting for {} public ips to delete'.format(
        len(async_ops['pips'])))
    for pip_name in async_ops['pips']:
        async_ops['pips'][pip_name].result()
    logger.info('{} public ips deleted'.format(len(async_ops['pips'])))
    # delete subnets
    async_ops['subnets'] = {}
    for key in resources:
        subnet_name = resources[key]['subnet']
        vnet_name = resources[key]['vnet']
        if util.is_none_or_empty(subnet_name) or subnet_name in deleted:
            continue
        deleted.add(subnet_name)
        async_ops['subnets'][subnet_name] = resource.AsyncOperation(
            functools.partial(
                resource.delete_subnet, network_client,
                rfs.storage_cluster.virtual_network.resource_group,
                vnet_name, subnet_name),
            retry_conflict=True
        )
    logger.debug('waiting for {} subnets to delete'.format(
        len(async_ops['subnets'])))
    for subnet_name in async_ops['subnets']:
        async_ops['subnets'][subnet_name].result()
    logger.info('{} subnets deleted'.format(len(async_ops['subnets'])))
    deleted.clear()
    # delete vnet
    async_ops['vnets'] = {}
    for key in resources:
        vnet_name = resources[key]['vnet']
        if util.is_none_or_empty(vnet_name) or vnet_name in deleted:
            continue
        deleted.add(vnet_name)
        async_ops['vnets'][vnet_name] = resource.AsyncOperation(
            functools.partial(
                resource.delete_virtual_network, network_client,
                rfs.storage_cluster.virtual_network.resource_group, vnet_name),
            retry_conflict=True
        )
    deleted.clear()
    # delete availability set, this is synchronous
    for key in resources:
        as_name = resources[key]['as']
        if util.is_none_or_empty(as_name) or as_name in deleted:
            continue
        deleted.add(as_name)
        resource.delete_availability_set(
            compute_client, rfs.storage_cluster.resource_group, as_name)
        logger.info('availability set {} deleted'.format(as_name))
    deleted.clear()
    # delete boot diagnostics storage containers
    for key in resources:
        try:
            vm_name = resources[key]['vm']
            vm_id = resources[key]['id']
        except KeyError:
            pass
        else:
            storage.delete_storage_containers_boot_diagnostics(
                blob_client, vm_name, vm_id)
    # wait for all async ops to complete
    if wait:
        logger.debug('waiting for network security groups to delete')
        for nsg_name in async_ops['nsg']:
            async_ops['nsg'][nsg_name].result()
        logger.info('{} network security groups deleted'.format(
            len(async_ops['nsg'])))
        logger.debug('waiting for virtual networks to delete')
        for vnet_name in async_ops['vnets']:
            async_ops['vnets'][vnet_name].result()
        logger.info('{} virtual networks deleted'.format(
            len(async_ops['vnets'])))
        logger.debug('waiting for managed os disks to delete')
        count = 0
        for os_disk_set in async_ops['os_disk']:
            for os_disk in os_disk_set:
                os_disk_set[os_disk].result()
                count += 1
        logger.info('{} managed os disks deleted'.format(count))
        if len(async_ops['data_disks']) > 0:
            logger.debug('waiting for managed data disks to delete')
            count = 0
            for data_disk_set in async_ops['data_disks']:
                for data_disk in data_disk_set:
                    data_disk_set[data_disk].result()
                    count += 1
            logger.info('{} managed data disks deleted'.format(count))


def suspend_storage_cluster(compute_client, config, sc_id, wait=False):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Suspend a storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param bool wait: wait for suspension to complete
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    vms = []
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                logger.error('virtual machine {} not found'.format(vm_name))
                continue
            else:
                raise
        else:
            vms.append(vm)
    if len(vms) == 0:
        logger.warning('no virtual machines to suspend')
        return
    # check if glusterfs and warn
    if rfs.storage_cluster.file_server.type == 'glusterfs':
        logger.warning(
            '**WARNING** Suspending a glusterfs cluster is risky. Depending '
            'upon the volume type and state of the bricks at the time of '
            'suspension, a variety of issues can occur such as: unsuccessful '
            'restart of the cluster, split-brain states, or even data loss.')
    if not util.confirm_action(
            config, 'suspend storage cluster {}'.format(sc_id)):
        return
    # deallocate each vm
    async_ops = {}
    for vm in vms:
        async_ops[vm.name] = resource.AsyncOperation(functools.partial(
            resource.deallocate_virtual_machine, compute_client,
            rfs.storage_cluster.resource_group, vm.name), retry_conflict=True)
    if wait:
        logger.info(
            'waiting for {} virtual machines to deallocate'.format(
                len(async_ops)))
        for vm_name in async_ops:
            async_ops[vm_name].result()
        logger.info('{} virtual machines deallocated'.format(len(async_ops)))


def start_storage_cluster(compute_client, config, sc_id, wait=False):
    # type: (azure.mgmt.compute.ComputeManagementClient, dict, str,
    #        bool) -> None
    """Starts a suspended storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param bool wait: wait for restart to complete
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    vms = []
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                raise RuntimeError(
                    'virtual machine {} not found'.format(vm_name))
            else:
                raise
        else:
            vms.append(vm)
    if len(vms) == 0:
        logger.error('no virtual machines to restart')
        return
    if not util.confirm_action(
            config, 'start suspended storage cluster {}'.format(sc_id)):
        return
    # start each vm
    async_ops = {}
    for vm in vms:
        async_ops[vm.name] = resource.AsyncOperation(functools.partial(
            resource.start_virtual_machine, compute_client,
            rfs.storage_cluster.resource_group, vm.name))
    if wait:
        logger.info(
            'waiting for {} virtual machines to start'.format(len(async_ops)))
        for vm_name in async_ops:
            async_ops[vm_name].result()
        logger.info('{} virtual machines started'.format(len(async_ops)))


def stat_storage_cluster(
        compute_client, network_client, config, sc_id, status_script,
        detail=False, hosts=False):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, str,
    #        bool, bool) -> None
    """Retrieve status of a storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param str status_script: status script
    :param bool detail: detailed status
    :param bool hosts: dump info for /etc/hosts
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # retrieve all vms
    vms = []
    for i in range(rfs.storage_cluster.vm_count):
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, i)
        try:
            vm = compute_client.virtual_machines.get(
                resource_group_name=rfs.storage_cluster.resource_group,
                vm_name=vm_name,
                expand=compute_client.virtual_machines.models.
                InstanceViewTypes.instance_view,
            )
        except msrestazure.azure_exceptions.CloudError as e:
            if e.status_code == 404:
                logger.error('virtual machine {} not found'.format(vm_name))
            else:
                raise
        else:
            vms.append((vm, i))
    if len(vms) == 0:
        logger.error(
            'no virtual machines to query for storage cluster {}'.format(
                sc_id))
        return
    # fetch vm status
    fsstatus = []
    vmstatus = {}
    for vm, offset in vms:
        powerstate = None
        for status in vm.instance_view.statuses:
            if status.code.startswith('PowerState'):
                powerstate = status.code
        diskstates = []
        if util.is_not_empty(vm.instance_view.disks):
            for disk in vm.instance_view.disks:
                for status in disk.statuses:
                    diskstates.append(status.code)
        # get nic/pip connected to vm
        nic, pip = resource.get_nic_and_pip_from_virtual_machine(
            network_client, rfs.storage_cluster.resource_group, vm)
        # get resource names (pass cached data to prevent another lookup)
        _, _, subnet, vnet, nsg = \
            resource.get_resource_names_from_virtual_machine(
                compute_client, network_client, rfs.storage_cluster, vm,
                nic=nic, pip=pip)
        # stat data disks
        disks = {}
        total_size_gb = 0
        for dd in vm.storage_profile.data_disks:
            total_size_gb += dd.disk_size_gb
            disks[dd.name] = {
                'lun': dd.lun,
                'caching': str(dd.caching),
                'disk_size_gb': dd.disk_size_gb,
                'type': str(dd.managed_disk.storage_account_type),
            }
        disks['disk_array_size_gb'] = total_size_gb
        # detailed settings: run stat script via ssh
        if detail:
            ssh_priv_key, port, username, ip = _get_ssh_info(
                compute_client, network_client, config, sc_id, None, vm.name,
                nic=nic, pip=pip)
            offset = settings.get_offset_from_virtual_machine_name(vm.name)
            script_cmd = '/opt/batch-shipyard/{sf} {c}{f}{m}{n}{r}{s}'.format(
                sf=status_script,
                c=' -c' if util.is_not_empty(
                    rfs.storage_cluster.file_server.samba.share_name) else '',
                f=' -f {}'.format(
                    rfs.storage_cluster.vm_disk_map[offset].filesystem),
                m=' -m {}'.format(
                    rfs.storage_cluster.file_server.mountpoint),
                n=' -n {}'.format(
                    settings.get_file_server_glusterfs_volume_name(
                        rfs.storage_cluster)),
                r=' -r {}'.format(
                    rfs.storage_cluster.vm_disk_map[offset].raid_level),
                s=' -s {}'.format(rfs.storage_cluster.file_server.type),
            )
            proc = crypto.connect_or_exec_ssh_command(
                ip, port, ssh_priv_key, username, sync=False,
                command=['sudo', script_cmd])
            stdout = proc.communicate()[0]
            if util.is_not_empty(stdout):
                if util.on_python2():
                    stdout = stdout.decode('utf8')
                if util.on_windows():
                    stdout = stdout.replace('\n', os.linesep)
            fsstatfmt = '>> File Server Status for {} ec={}:{}{}'
            if util.on_python2():
                fsstatfmt = unicode(fsstatfmt)  # noqa
            fsstatus.append(
                fsstatfmt.format(vm.name, proc.returncode, os.linesep, stdout))
        vmstatus[vm.name] = {
            'vm_size': vm.hardware_profile.vm_size,
            'powerstate': powerstate,
            'provisioning_state': vm.provisioning_state,
            'availability_set':
            vm.availability_set.id.split('/')[-1]
            if vm.availability_set is not None else None,
            'update_domain/fault_domain': '{}/{}'.format(
                vm.instance_view.platform_update_domain,
                vm.instance_view.platform_fault_domain),
            'fqdn': pip.dns_settings.fqdn if pip is not None else None,
            'public_ip_address': pip.ip_address if pip is not None else None,
            'public_ip_allocation':
            pip.public_ip_allocation_method if pip is not None else None,
            'private_ip_address': nic.ip_configurations[0].private_ip_address,
            'private_ip_allocation':
            nic.ip_configurations[0].private_ip_allocation_method,
            'admin_username': vm.os_profile.admin_username,
            'accelerated_networking': nic.enable_accelerated_networking,
            'virtual_network': vnet,
            'subnet': subnet,
            'network_security_group': nsg,
            'data_disks': disks,
        }
    if detail:
        log = '{}{}{}{}'.format(
            json.dumps(vmstatus, sort_keys=True, indent=4),
            os.linesep, os.linesep,
            '{}{}'.format(os.linesep, os.linesep).join(
                fsstatus) if detail else '')
    else:
        log = '{}'.format(json.dumps(vmstatus, sort_keys=True, indent=4))
    logger.info('storage cluster {} virtual machine status:{}{}'.format(
        sc_id, os.linesep, log))
    if hosts:
        if rfs.storage_cluster.file_server.type != 'glusterfs':
            raise ValueError('hosts option not compatible with glusterfs')
        print(('{}>> Ensure that you have enabled the "glusterfs" network '
               'security rule.{}>> Add the following entries to your '
               '/etc/hosts to mount the gluster volume.{}>> Mount the '
               'source as -t glusterfs from {}:/{}{}'.format(
                   os.linesep, os.linesep, os.linesep, next(iter(vmstatus)),
                   settings.get_file_server_glusterfs_volume_name(
                       rfs.storage_cluster), os.linesep)))
        for vmname in vmstatus:
            print('{} {}'.format(
                vmstatus[vmname]['public_ip_address'], vmname))


def _get_ssh_info(
        compute_client, network_client, config, sc_id, cardinal, hostname,
        nic=None, pip=None):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, int,
    #        str, networkmodes.NetworkInterface,
    #        networkmodels.PublicIPAddress) ->
    #        Tuple[pathlib.Path, int, str, str]
    """SSH to a node in storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param int cardinal: cardinal number
    :param str hostname: hostname
    :param networkmodels.NetworkInterface nic: network interface
    :param networkmodels.PublicIPAddress pip: public ip
    :rtype: tuple
    :return (ssh private key, port, username, ip)
    """
    # retrieve remotefs settings
    if util.is_none_or_empty(sc_id):
        raise ValueError('storage cluster id not specified')
    rfs = settings.remotefs_settings(config, sc_id)
    # retrieve specific vm
    if cardinal is not None:
        vm_name = settings.generate_virtual_machine_name(
            rfs.storage_cluster, cardinal)
    else:
        vm_name = hostname
    try:
        vm = compute_client.virtual_machines.get(
            resource_group_name=rfs.storage_cluster.resource_group,
            vm_name=vm_name,
        )
    except msrestazure.azure_exceptions.CloudError as e:
        if e.status_code == 404:
            raise RuntimeError('virtual machine {} not found'.format(vm_name))
        else:
            raise
    # get connection ip
    if rfs.storage_cluster.public_ip.enabled:
        # get pip connected to vm
        if pip is None:
            _, pip = resource.get_nic_and_pip_from_virtual_machine(
                network_client, rfs.storage_cluster.resource_group, vm)
        ip_address = pip.ip_address
    else:
        if nic is None:
            nic, _ = resource.get_nic_and_pip_from_virtual_machine(
                network_client, rfs.storage_cluster.resource_group, vm)
        ip_address = nic.ip_configurations[0].private_ip_address
    # return connection info for vm
    if rfs.storage_cluster.ssh.ssh_private_key is not None:
        ssh_priv_key = rfs.storage_cluster.ssh.ssh_private_key
    else:
        ssh_priv_key = pathlib.Path(
            rfs.storage_cluster.ssh.generated_file_export_path,
            crypto.get_remotefs_ssh_key_prefix())
    if not ssh_priv_key.exists():
        raise RuntimeError('SSH private key file not found at: {}'.format(
            ssh_priv_key))
    return ssh_priv_key, 22, vm.os_profile.admin_username, ip_address


def ssh_storage_cluster(
        compute_client, network_client, config, sc_id, cardinal, hostname,
        tty, command):
    # type: (azure.mgmt.compute.ComputeManagementClient,
    #        azure.mgmt.network.NetworkManagementClient, dict, str, int,
    #        str, bool, tuple) -> None
    """SSH to a node in storage cluster
    :param azure.mgmt.compute.ComputeManagementClient compute_client:
        compute client
    :param azure.mgmt.network.NetworkManagementClient network_client:
        network client
    :param dict config: configuration dict
    :param str sc_id: storage cluster id
    :param int cardinal: cardinal number
    :param str hostname: hostname
    :param bool tty: allocate pseudo-tty
    :param tuple command: command to execute
    """
    ssh_priv_key, port, username, ip = _get_ssh_info(
        compute_client, network_client, config, sc_id, cardinal, hostname)
    crypto.connect_or_exec_ssh_command(
        ip, port, ssh_priv_key, username, tty=tty, command=command)