Support missing image tasks, pool check

- Break out configs into separate pages
- Update all configs using 16.04.0-LTS to 16.04-LTS
- Remove Batch `account` from recipe credentials
This commit is contained in:
Fred Park 2017-03-09 14:38:16 -08:00
Родитель e349a004cd
Коммит 33291504c2
81 изменённых файлов: 1294 добавлений и 1147 удалений

Просмотреть файл

@ -2,19 +2,34 @@
## [Unreleased]
### Added
- Support for provisioning storage clusters via the `fs cluster` command
- Support for provisioning managed disks via the `fs disks` command
- Support for UserSubscription Batch accounts
- Azure Active Directory authentication support for Batch accounts
- `allow_run_on_missing` option to jobs that allows tasks to execute under
jobs with Docker images that have not been pre-loaded via the
`global_resources`:`docker_images` setting in config.json. Note that, if
possible, you should attempt to specify all Docker images that you intend
to run in the `global_resources`:`docker_images` property in the global
configuration to minimize scheduling to task execution latency.
- Support for Canonical/UbuntuServer/16.04-LTS. This sku should be used over
the old 16.04.0-LTS sku due to
[issue #31](https://github.com/Azure/batch-shipyard/issues/31).
### Changed
- **Breaking Change:** `glusterfs` `volume_driver` for `shared_data_volumes`
should now be named as `glusterfs_on_compute`. This is to distinguish
co-located glusterfs on compute nodes with possible standalone glusterfs
`storage_cluster` remote mounted in the future.
- Batch account (name) is now an optional property in the credentials config
- Pool existance is now checked prior to job submission and can now proceed
to add without an active pool.
- Batch `account` (name) is now an optional property in the credentials config
- Configuration doc broken up into multiple pages
- Update all recipes using Canonical/UbuntuServer/16.04.0-LTS to use
Canonical/UbuntuServer/16.04-LTS instead
- Precompile python files for Docker images
- All dependencies updated to latest versions
- Update Batch API call compatibility for `azure-batch 2.0.0`
- Precompile python files for Docker images
- Configuration doc broken up into multiple pages
## [2.5.4] - 2017-03-08
### Changed

Просмотреть файл

@ -37,10 +37,6 @@
},
"batch": {
"account_service_url": "",
"account_key": "",
"account_key_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/batchkey",
"user_subscription": false,
"resource_group": "",
"aad": {
"endpoint": "https://batch.core.windows.net/",
"directory_id": "",
@ -54,7 +50,10 @@
"enabled": true,
"filename": ""
}
}
},
"resource_group": "",
"account_key": "",
"account_key_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/batchkey"
},
"storage": {
"mystorageaccount": {

Просмотреть файл

@ -18,6 +18,7 @@
"static_public_ip": false,
"virtual_network": {
"name": "",
"resource_group": "",
"existing_ok": false,
"address_space": "",
"subnet": {
@ -28,7 +29,7 @@
"network_security": {
"nfs": ["1.2.3.0/24"],
"ssh": ["*"],
"custom_inbound": {
"custom_inbound_rules": {
"myrule": {
"destination_port_range": "5000-5001",
"source_address_prefix": ["1.2.3.4", "5.6.7.0/24"],

Просмотреть файл

@ -8,6 +8,7 @@
},
"environment_variables_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/myjobenv",
"max_task_retries": 1,
"allow_run_on_missing_image": false,
"input_data": {
"azure_batch": [
{

Просмотреть файл

@ -33,6 +33,7 @@
},
"virtual_network": {
"name": "",
"resource_group": "",
"create_nonexistant": false,
"address_space": "",
"subnet": {

Просмотреть файл

@ -66,6 +66,27 @@ _RUN_ELEVATED = batchmodels.UserIdentity(
)
def get_batch_account(batch_mgmt_client, config):
# type: (azure.mgmt.batch.BatchManagementClient, dict) ->
# azure.mgmt.batch.models.BatchAccount
"""Get Batch account properties from ARM
:param azure.mgmt.batch.BatchManagementClient batch_mgmt_client:
batch management client
:param dict config: configuration dict
:rtype: azure.mgmt.batch.models.BatchAccount
:return: Batch account
"""
if batch_mgmt_client is None:
raise RuntimeError(
'Batch management client is invalid, please specify management '
'aad credentials')
bc = settings.credentials_batch(config)
return batch_mgmt_client.batch_account.get(
resource_group_name=bc.resource_group,
account_name=bc.account,
)
def list_node_agent_skus(batch_client):
# type: (batch.BatchServiceClient) -> None
"""List all node agent skus
@ -1708,15 +1729,81 @@ def add_jobs(
# get the pool inter-node comm setting
bs = settings.batch_shipyard_settings(config)
pool = settings.pool_settings(config)
_pool = batch_client.pool.get(pool.id)
global_resources = []
for gr in settings.global_resources_docker_images(config):
global_resources.append(gr)
try:
cloud_pool = batch_client.pool.get(pool.id)
except batchmodels.batch_error.BatchErrorException as ex:
if 'The specified pool does not exist.' in ex.message.value:
logger.error('{} pool does not exist'.format(pool.id))
if util.confirm_action(
config, 'add jobs to nonexistant pool {}'.format(pool.id)):
cloud_pool = None
else:
logger.error(
'not submitting jobs to nonexistant pool {}'.format(
pool.id))
return
else:
raise
global_resources = settings.global_resources_docker_images(config)
lastjob = None
lasttask = None
for jobspec in settings.job_specifications(config):
jpcmd = ['$AZ_BATCH_NODE_STARTUP_DIR/wd/{} {}'.format(
jpfile[0], ' '.join(global_resources))]
job_id = settings.job_id(jobspec)
# perform checks:
# 1. check docker images in task against pre-loaded on pool
# 2. if tasks have dependencies, set it if so
# 3. if there are multi-instance tasks
mi_ac = settings.job_multi_instance_auto_complete(config)
multi_instance = False
mi_docker_container_name = None
reserved_task_id = None
uses_task_dependencies = False
missing_images = []
allow_run_on_missing = settings.job_allow_run_on_missing(jobspec)
for task in settings.job_tasks(jobspec):
# check if task docker image is set in config.json
di = settings.task_docker_image(task)
if di not in global_resources:
if allow_run_on_missing:
logger.warning(
('docker image {} not pre-loaded on pool for a '
'task specified in job {}').format(di, job_id))
missing_images.append(di)
else:
raise RuntimeError(
('not submitting job {} with missing docker image {} '
'pre-load on pool {}').format(job_id, di, pool.id))
# do not break, check to ensure ids are set on each task if
# task dependencies are set
if settings.has_depends_on_task(task):
uses_task_dependencies = True
if settings.is_multi_instance_task(task):
if multi_instance and mi_ac:
raise ValueError(
'cannot specify more than one multi-instance task '
'per job with auto completion enabled')
multi_instance = True
mi_docker_container_name = settings.task_name(task)
if util.is_none_or_empty(mi_docker_container_name):
_id = settings.task_id(task)
if util.is_none_or_empty(_id):
reserved_task_id = _generate_next_generic_task_id(
batch_client, job_id)
settings.set_task_id(task, reserved_task_id)
_id = '{}-{}'.format(job_id, reserved_task_id)
settings.set_task_name(task, _id)
mi_docker_container_name = settings.task_name(task)
del _id
# construct job prep
if util.is_not_empty(global_resources):
if len(missing_images) > 0 and allow_run_on_missing:
gr = list(set(global_resources) - set(missing_images))
else:
gr = global_resources
jpcmd = ['$AZ_BATCH_NODE_STARTUP_DIR/wd/{} {}'.format(
jpfile[0], ' '.join(gr))]
else:
jpcmd = []
# digest any input_data
addlcmds = data.process_input_data(config, bxfile, jobspec)
if addlcmds is not None:
@ -1741,39 +1828,10 @@ def add_jobs(
user_identity=_RUN_ELEVATED,
rerun_on_node_reboot_after_success=False,
),
uses_task_dependencies=False,
uses_task_dependencies=uses_task_dependencies,
constraints=job_constraints,
)
lastjob = job.id
# perform checks:
# 1. if tasks have dependencies, set it if so
# 2. if there are multi-instance tasks
mi_ac = settings.job_multi_instance_auto_complete(config)
multi_instance = False
mi_docker_container_name = None
reserved_task_id = None
for task in settings.job_tasks(jobspec):
# do not break, check to ensure ids are set on each task if
# task dependencies are set
if settings.has_depends_on_task(task):
job.uses_task_dependencies = True
if settings.is_multi_instance_task(task):
if multi_instance and mi_ac:
raise ValueError(
'cannot specify more than one multi-instance task '
'per job with auto completion enabled')
multi_instance = True
mi_docker_container_name = settings.task_name(task)
if util.is_none_or_empty(mi_docker_container_name):
_id = settings.task_id(task)
if util.is_none_or_empty(_id):
reserved_task_id = _generate_next_generic_task_id(
batch_client, job.id)
settings.set_task_id(task, reserved_task_id)
_id = '{}-{}'.format(job.id, reserved_task_id)
settings.set_task_name(task, _id)
mi_docker_container_name = settings.task_name(task)
del _id
# add multi-instance settings
set_terminate_on_all_tasks_complete = False
if multi_instance and mi_ac:
@ -1784,7 +1842,7 @@ def add_jobs(
'docker rm -v {}'.format(mi_docker_container_name)]),
user_identity=_RUN_ELEVATED,
)
logger.info('Adding job: {}'.format(job.id))
logger.info('Adding job {} to pool {}'.format(job.id, pool.id))
try:
batch_client.job.add(job)
except batchmodels.batch_error.BatchErrorException as ex:
@ -1810,6 +1868,7 @@ def add_jobs(
del mi_ac
del multi_instance
del mi_docker_container_name
del uses_task_dependencies
# get base env vars from job
job_env_vars = settings.job_environment_variables(jobspec)
_job_env_vars_secid = \
@ -1830,7 +1889,8 @@ def add_jobs(
if util.is_none_or_empty(settings.task_name(_task)):
settings.set_task_name(_task, '{}-{}'.format(job.id, _task_id))
del _task_id
task = settings.task_settings(_pool, config, _task)
task = settings.task_settings(
cloud_pool, config, pool, _task, missing_images)
# retrieve keyvault task env vars
if util.is_not_empty(
task.environment_variables_keyvault_secret_id):

Просмотреть файл

@ -118,6 +118,31 @@ def create_network_client(ctx, credentials=None, subscription_id=None):
credentials, subscription_id)
def create_batch_mgmt_client(ctx, credentials=None, subscription_id=None):
# type: (CliContext, object, str) ->
# azure.mgmt.batch.BatchManagementClient
"""Create batch management client
:param CliContext ctx: Cli Context
:param object credentials: credentials object
:param str subscription_id: subscription id
:rtype: azure.mgmt.batch.BatchManagementClient
:return: batch management client
"""
mgmt_aad = None
if credentials is None:
mgmt_aad = settings.credentials_management(ctx.config).aad
credentials = aad.create_aad_credentials(ctx, mgmt_aad)
if util.is_none_or_empty(subscription_id):
if mgmt_aad is None:
mgmt_aad = settings.credentials_management(ctx.config).aad
subscription_id = ctx.subscription_id or mgmt_aad.subscription_id
batch_mgmt_client = azure.mgmt.batch.BatchManagementClient(
credentials, subscription_id)
batch_mgmt_client.config.add_user_agent(
'batch-shipyard/{}'.format(__version__))
return batch_mgmt_client
def create_arm_clients(ctx, batch_clients=False):
# type: (CliContext, bool) ->
# Tuple[azure.mgmt.resource.resources.ResourceManagementClient,
@ -148,10 +173,16 @@ def create_arm_clients(ctx, batch_clients=False):
network_client = create_network_client(
ctx, credentials=credentials, subscription_id=subscription_id)
if batch_clients:
batch_mgmt_client, batch_client = create_batch_clients(ctx)
batch_client = create_batch_service_client(ctx)
try:
batch_mgmt_client = create_batch_mgmt_client(
ctx, credentials=credentials, subscription_id=subscription_id)
except Exception:
logger.warning('could not create batch management client')
batch_mgmt_client = None
else:
batch_mgmt_client = None
batch_client = None
batch_mgmt_client = None
return (
resource_client, compute_client, network_client, batch_mgmt_client,
batch_client
@ -171,60 +202,25 @@ def create_keyvault_client(ctx):
)
def create_batch_mgmt_client(ctx, credentials=None, subscription_id=None):
# type: (CliContext, object, str) ->
# azure.mgmt.batch.BatchManagementClient
"""Create batch management client
def create_batch_service_client(ctx):
# type: (CliContext) -> azure.batch.batch_service_client.BatchServiceClient
"""Create batch service client
:param CliContext ctx: Cli Context
:param object credentials: credentials object
:param str subscription_id: subscription id
:rtype: azure.mgmt.batch.BatchManagementClient
:return: batch management client
"""
batch_aad = None
if credentials is None:
batch_aad = settings.credentials_batch(ctx.config).aad
credentials = aad.create_aad_credentials(ctx, batch_aad)
if util.is_none_or_empty(subscription_id):
if batch_aad is None:
batch_aad = settings.credentials_batch(ctx.config).aad
subscription_id = ctx.subscription_id or batch_aad.subscription_id
if util.is_none_or_empty(subscription_id):
return None
batch_mgmt_client = azure.mgmt.batch.BatchManagementClient(
credentials, subscription_id)
batch_mgmt_client.config.add_user_agent(
'batch-shipyard/{}'.format(__version__))
return batch_mgmt_client
def create_batch_clients(ctx):
# type: (CliContext) ->
# Tuple[azure.mgmt.batch.BatchManagementClient,
# azure.batch.batch_service_client.BatchServiceClient]
"""Create batch client
:param CliContext ctx: Cli Context
:rtype: tuple
:return: (
azure.mgmt.batch.BatchManagementClient,
azure.batch.batch_service_client.BatchServiceClient)
:rtype: azure.batch.batch_service_client.BatchServiceClient
:return: batch service client
"""
bc = settings.credentials_batch(ctx.config)
use_aad = bc.user_subscription or util.is_none_or_empty(bc.account_key)
batch_mgmt_client = None
if use_aad:
subscription_id = ctx.subscription_id or bc.subscription_id
if util.is_none_or_empty(bc.account_key):
logger.debug('batch account key not specified, using aad auth')
batch_aad = settings.credentials_batch(ctx.config).aad
credentials = aad.create_aad_credentials(ctx, batch_aad)
batch_mgmt_client = create_batch_mgmt_client(
ctx, credentials=credentials, subscription_id=subscription_id)
else:
credentials = batchauth.SharedKeyCredentials(
bc.account, bc.account_key)
batch_client = batchsc.BatchServiceClient(
credentials, base_url=bc.account_service_url)
batch_client.config.add_user_agent('batch-shipyard/{}'.format(__version__))
return (batch_mgmt_client, batch_client)
return batch_client
def create_storage_clients():

Просмотреть файл

@ -43,6 +43,7 @@ except ImportError:
import uuid
# non-stdlib imports
import azure.batch.models as batchmodels
import azure.mgmt.batch.models as batchmgmtmodels
# local imports
from . import batch
from . import crypto
@ -451,10 +452,15 @@ def _add_pool(
raise ValueError(
'Invalid subnet name on virtual network {}'.format(
pool_settings.virtual_network.name))
if util.is_not_empty(pool_settings.virtual_network.resource_group):
_vnet_rg = pool_settings.virtual_network.resource_group
else:
_vnet_rg = bc.resource_group
# create virtual network and subnet if specified
vnet, subnet = resource.create_virtual_network_and_subnet(
network_client, bc.resource_group, bc.location,
network_client, _vnet_rg, bc.location,
pool_settings.virtual_network)
del _vnet_rg
# ensure address prefix for subnet is valid
tmp = subnet.address_prefix.split('/')
if len(tmp) <= 1:
@ -490,7 +496,9 @@ def _add_pool(
sc_arg = None
if storage_cluster_mount:
# ensure usersubscription account
if not bc.user_subscription:
ba = batch.get_batch_account(batch_mgmt_client, config)
if (not ba.pool_allocation_mode ==
batchmgmtmodels.PoolAllocationMode.user_subscription):
raise RuntimeError(
'{} account is not a UserSubscription account'.format(
bc.account))
@ -1135,21 +1143,6 @@ def _adjust_settings_for_pool_creation(config):
# adjust inter node comm setting
if pool.vm_count < 1:
raise ValueError('invalid vm_count: {}'.format(pool.vm_count))
dr = settings.data_replication_settings(config)
max_vms = 20 if publisher == 'microsoftwindowsserver' else 40
if pool.vm_count > max_vms:
if dr.peer_to_peer.enabled:
logger.warning(
('disabling peer-to-peer transfer as pool size of {} exceeds '
'max limit of {} vms for inter-node communication').format(
pool.vm_count, max_vms))
settings.set_peer_to_peer_enabled(config, False)
if pool.inter_node_communication_enabled:
logger.warning(
('disabling inter-node communication as pool size of {} '
'exceeds max limit of {} vms for setting').format(
pool.vm_count, max_vms))
settings.set_inter_node_communication_enabled(config, False)
# re-read pool and data replication settings
pool = settings.pool_settings(config)
dr = settings.data_replication_settings(config)

Просмотреть файл

@ -632,9 +632,14 @@ def create_storage_cluster(
# upload scripts to blob storage for customscript
blob_urls = storage.upload_for_remotefs(blob_client, remotefs_files)
# create virtual network and subnet if specified
if util.is_not_empty(rfs.storage_cluster.virtual_network.resource_group):
_vnet_rg = rfs.storage_cluster.virtual_network.resource_group
else:
_vnet_rg = rfs.resource_group
vnet, subnet = resource.create_virtual_network_and_subnet(
network_client, rfs.resource_group, rfs.location,
network_client, _vnet_rg, rfs.location,
rfs.storage_cluster.virtual_network)
del _vnet_rg
# TODO create slb

Просмотреть файл

@ -98,7 +98,7 @@ ManagementCredentialsSettings = collections.namedtuple(
BatchCredentialsSettings = collections.namedtuple(
'BatchCredentialsSettings', [
'aad', 'account', 'account_key', 'account_service_url',
'user_subscription', 'resource_group', 'subscription_id', 'location',
'resource_group', 'subscription_id', 'location',
]
)
StorageCredentialsSettings = collections.namedtuple(
@ -176,8 +176,8 @@ ManagedDisksSettings = collections.namedtuple(
)
VirtualNetworkSettings = collections.namedtuple(
'VirtualNetworkSettings', [
'name', 'address_space', 'subnet_name', 'subnet_address_prefix',
'existing_ok', 'create_nonexistant',
'name', 'resource_group', 'address_space', 'subnet_name',
'subnet_address_prefix', 'existing_ok', 'create_nonexistant',
]
)
FileServerSettings = collections.namedtuple(
@ -718,7 +718,6 @@ def credentials_batch(config):
account = _kv_read_checked(conf, 'account')
account_key = _kv_read_checked(conf, 'account_key')
account_service_url = conf['account_service_url']
user_subscription = _kv_read(conf, 'user_subscription', False)
resource_group = _kv_read_checked(conf, 'resource_group')
# get subscription id from management section
try:
@ -749,7 +748,6 @@ def credentials_batch(config):
account=account,
account_key=account_key,
account_service_url=conf['account_service_url'],
user_subscription=user_subscription,
resource_group=resource_group,
location=location,
subscription_id=subscription_id,
@ -1804,6 +1802,22 @@ def job_max_task_retries(conf):
return max_task_retries
def job_allow_run_on_missing(conf):
# type: (dict) -> int
"""Get allow task run on missing image
:param dict conf: job configuration object
:rtype: bool
:return: allow run on missing image
"""
try:
allow = conf['allow_run_on_missing_image']
if allow is None:
raise KeyError()
except KeyError:
allow = False
return allow
def has_depends_on_task(conf):
# type: (dict) -> bool
"""Determines if task has task dependencies
@ -1825,7 +1839,7 @@ def has_depends_on_task(conf):
def is_multi_instance_task(conf):
# type: (dict) -> bool
"""Determines if task is multi-isntance
:param dict conf: job configuration object
:param dict conf: task configuration object
:rtype: bool
:return: task is multi-instance
"""
@ -1835,7 +1849,7 @@ def is_multi_instance_task(conf):
def task_name(conf):
# type: (dict) -> str
"""Get task name
:param dict conf: job configuration object
:param dict conf: task configuration object
:rtype: str
:return: task name
"""
@ -1848,10 +1862,26 @@ def task_name(conf):
return name
def task_docker_image(conf):
# type: (dict) -> str
"""Get docker image used by task
:param dict conf: task configuration object
:rtype: str
:return: docker image used by task
"""
try:
di = conf['image']
if util.is_none_or_empty(di):
raise KeyError()
except KeyError:
di = None
return di
def set_task_name(conf, name):
# type: (dict, str) -> None
"""Set task name
:param dict conf: job configuration object
:param dict conf: task configuration object
:param str name: task name to set
"""
conf['name'] = name
@ -1860,7 +1890,7 @@ def set_task_name(conf, name):
def task_id(conf):
# type: (dict) -> str
"""Get task id
:param dict conf: job configuration object
:param dict conf: task configuration object
:rtype: str
:return: task id
"""
@ -1876,18 +1906,21 @@ def task_id(conf):
def set_task_id(conf, id):
# type: (dict, str) -> None
"""Set task id
:param dict conf: job configuration object
:param dict conf: task configuration object
:param str id: task id to set
"""
conf['id'] = id
def task_settings(pool, config, conf):
# type: (azure.batch.models.CloudPool, dict, dict) -> TaskSettings
def task_settings(cloud_pool, config, poolconf, conf, missing_images):
# type: (azure.batch.models.CloudPool, dict, PoolSettings,
# dict, list) -> TaskSettings
"""Get task settings
:param azure.batch.models.CloudPool pool: cloud pool object
:param azure.batch.models.CloudPool cloud_pool: cloud pool object
:param dict config: configuration dict
:param dict conf: job configuration object
:param PoolSettings poolconf: pool settings
:param dict conf: task configuration object
:param list missing_images: list of missing docker images on pool
:rtype: TaskSettings
:return: task settings
"""
@ -1898,11 +1931,36 @@ def task_settings(pool, config, conf):
image = conf['image']
if util.is_none_or_empty(image):
raise ValueError('image is invalid')
# check if image is in missing image list
if image in missing_images:
# get private registry settings
preg = docker_registry_private_settings(config)
if util.is_not_empty(preg.storage_account):
registry = 'localhost:5000/'
elif util.is_not_empty(preg.server):
registry = '{}/'.format(preg.server)
else:
registry = ''
del preg
image = '{}{}'.format(registry, image)
# get some pool props
publisher = pool.virtual_machine_configuration.image_reference.\
publisher.lower()
offer = pool.virtual_machine_configuration.image_reference.offer.lower()
sku = pool.virtual_machine_configuration.image_reference.sku.lower()
if cloud_pool is None:
pool_id = poolconf.id
publisher = poolconf.publisher.lower()
offer = poolconf.offer.lower()
sku = poolconf.sku.lower()
vm_size = poolconf.vm_size
inter_node_comm = poolconf.inter_node_communication_enabled
else:
pool_id = cloud_pool.id
publisher = cloud_pool.virtual_machine_configuration.image_reference.\
publisher.lower()
offer = cloud_pool.virtual_machine_configuration.image_reference.\
offer.lower()
sku = cloud_pool.virtual_machine_configuration.image_reference.sku.\
lower()
vm_size = cloud_pool.vm_size.lower()
inter_node_comm = cloud_pool.enable_inter_node_communication
# get depends on
try:
depends_on = conf['depends_on']
@ -2088,10 +2146,10 @@ def task_settings(pool, config, conf):
gpu = False
# adjust for gpu settings
if gpu:
if not is_gpu_pool(pool.vm_size):
if not is_gpu_pool(vm_size):
raise RuntimeError(
('cannot initialize a gpu task on nodes without '
'gpus, pool: {} vm_size: {}').format(pool.id, pool.vm_size))
'gpus, pool: {} vm_size: {}').format(pool_id, vm_size))
# TODO other images as they become available with gpu support
if (publisher != 'canonical' and offer != 'ubuntuserver' and
sku < '16.04'):
@ -2107,16 +2165,16 @@ def task_settings(pool, config, conf):
docker_exec_cmd = 'docker exec'
# adjust for infiniband
if infiniband:
if not pool.enable_inter_node_communication:
if not inter_node_comm:
raise RuntimeError(
('cannot initialize an infiniband task on a '
'non-internode communication enabled '
'pool: {}').format(pool.id))
if not is_rdma_pool(pool.vm_size):
'pool: {}').format(pool_id))
if not is_rdma_pool(vm_size):
raise RuntimeError(
('cannot initialize an infiniband task on nodes '
'without RDMA, pool: {} vm_size: {}').format(
pool.id, pool.vm_size))
pool_id, vm_size))
# only centos-hpc and sles-hpc:12-sp1 are supported
# for infiniband
if publisher == 'openlogic' and offer == 'centos-hpc':
@ -2147,7 +2205,7 @@ def task_settings(pool, config, conf):
run_opts.append('--env-file {}'.format(envfile))
# populate mult-instance settings
if is_multi_instance_task(conf):
if not pool.enable_inter_node_communication:
if not inter_node_comm:
raise RuntimeError(
('cannot run a multi-instance task on a '
'non-internode communication enabled '
@ -2194,7 +2252,12 @@ def task_settings(pool, config, conf):
if num_instances == 'pool_specification_vm_count':
num_instances = pool_vm_count(config)
elif num_instances == 'pool_current_dedicated':
num_instances = pool.current_dedicated
if cloud_pool is None:
raise RuntimeError(
('Cannot retrieve current dedicated count for '
'pool: {}. Ensure pool exists.)'.format(pool_id)))
else:
num_instances = cloud_pool.current_dedicated
else:
raise ValueError(
('multi instance num instances setting '
@ -2267,6 +2330,7 @@ def virtual_network_settings(
except KeyError:
conf = {}
name = _kv_read_checked(conf, 'name')
resource_group = _kv_read_checked(conf, 'resource_group')
address_space = _kv_read_checked(conf, 'address_space')
existing_ok = _kv_read(conf, 'existing_ok', default_existing_ok)
subnet_name = _kv_read_checked(conf['subnet'], 'name')
@ -2275,6 +2339,7 @@ def virtual_network_settings(
conf, 'create_nonexistant', default_create_nonexistant)
return VirtualNetworkSettings(
name=name,
resource_group=resource_group,
address_space=address_space,
subnet_name=subnet_name,
subnet_address_prefix=subnet_address_prefix,
@ -2331,9 +2396,9 @@ def remotefs_settings(config):
)
if not isinstance(sc_ns_inbound['nfs'].source_address_prefix, list):
raise ValueError('expected list for nfs network security rule')
if 'custom_inbound' in ns_conf:
if 'custom_inbound_rules' in ns_conf:
_reserved = frozenset(['ssh', 'nfs', 'glusterfs'])
for key in ns_conf['custom_inbound']:
for key in ns_conf['custom_inbound_rules']:
# ensure key is not reserved
if key.lower() in _reserved:
raise ValueError(
@ -2341,11 +2406,13 @@ def remotefs_settings(config):
'reserved name {}').format(key, _reserved))
sc_ns_inbound[key] = InboundNetworkSecurityRule(
destination_port_range=_kv_read_checked(
ns_conf['custom_inbound'][key], 'destination_port_range'),
ns_conf['custom_inbound_rules'][key],
'destination_port_range'),
source_address_prefix=_kv_read_checked(
ns_conf['custom_inbound'][key], 'source_address_prefix'),
ns_conf['custom_inbound_rules'][key],
'source_address_prefix'),
protocol=_kv_read_checked(
ns_conf['custom_inbound'][key], 'protocol'),
ns_conf['custom_inbound_rules'][key], 'protocol'),
)
if not isinstance(sc_ns_inbound[key].source_address_prefix, list):
raise ValueError(

Просмотреть файл

@ -7,10 +7,12 @@ Batch Shipyard is driven by the following json configuration files:
1. [Credentials](11-batch-shipyard-configuration-credentials.md) -
credentials for Azure Batch, Storage, KeyVault, Management and Docker private
registries
2. [Global config](#global) - Batch Shipyard and Docker-specific configuration
settings
3. [Pool](#pool) - Azure Batch pool configuration
4. [Jobs](#jobs) - Azure Batch jobs and tasks configuration
2. [Global config](12-batch-shipyard-configuration-global.md) -
Batch Shipyard and Docker-specific configuration settings
3. [Pool](13-batch-shipyard-configuration-pool.md) -
Batch Shipyard pool configuration
4. [Jobs](14-batch-shipyard-configuration-jobs.md) -
Batch Shipyard jobs and tasks configuration
Note that all potential properties are described here and that specifying
all such properties may result in invalid configuration as some properties
@ -26,940 +28,5 @@ may be invalid if specified as such. They must be modified for your execution
scenario. All [sample recipe](../recipes) also have a set of configuration
files that can be modified to fit your needs.
### <a name="global"></a>Global Config
The global config schema is as follows:
```json
{
"batch_shipyard": {
"storage_account_settings": "mystorageaccount",
"storage_entity_prefix": "shipyard",
"generated_sas_expiry_days": 90,
"encryption" : {
"enabled": true,
"pfx": {
"filename": "encrypt.pfx",
"passphrase": "mysupersecretpassword",
"sha1_thumbprint": "123456789..."
},
"public_key_pem": "encrypt.pem"
}
},
"docker_registry": {
"private": {
"allow_public_docker_hub_pull_on_missing": true,
"server": "myserver-myorg.azurecr.io",
"azure_storage": {
"storage_account_settings": "mystorageaccount",
"container": "mydockerregistry"
}
}
},
"data_replication": {
"peer_to_peer": {
"enabled": true,
"compression": true,
"concurrent_source_downloads": 10,
"direct_download_seed_bias": null
},
"non_peer_to_peer_concurrent_downloading": true
},
"global_resources": {
"docker_images": [
"busybox",
"redis:3.2.3-alpine",
],
"files": [
{
"source": {
"path": "/some/local/path/dir",
"include": ["*.dat"],
"exclude": ["*.bak"]
},
"destination": {
"shared_data_volume": "glustervol",
"relative_destination_path": "myfiles",
"data_transfer": {
"method": "multinode_scp",
"ssh_private_key": "id_rsa_shipyard",
"scp_ssh_extra_options": "-C -c aes256-gcm@openssh.com",
"rsync_extra_options": "",
"split_files_megabytes": 500,
"max_parallel_transfers_per_node": 2
}
}
},
{
"source": {
"path": "/some/local/path/bound/for/blob",
"include": ["*.bin"]
},
"destination": {
"storage_account_settings": "mystorageaccount",
"data_transfer": {
"container": "mycontainer",
"blobxfer_extra_options": "--no-computefilemd5"
}
}
},
{
"source": {
"path": "/another/local/path/dir",
"include": [],
"exclude": []
},
"destination": {
"relative_destination_path": "relpath/on/host",
"data_transfer": {
"method": "rsync+ssh",
"ssh_private_key": "id_rsa_shipyard",
"scp_ssh_extra_options": "-c aes256-gcm@openssh.com",
"rsync_extra_options": "-v"
}
}
}
],
"docker_volumes": {
"data_volumes": {
"abcvol": {
"host_path": null,
"container_path": "/abc"
},
"hosttempvol": {
"host_path": "/tmp",
"container_path": "/hosttmp"
}
},
"shared_data_volumes": {
"shipyardvol": {
"volume_driver": "azurefile",
"storage_account_settings": "mystorageaccount",
"azure_file_share_name": "shipyardshared",
"container_path": "$AZ_BATCH_NODE_SHARED_DIR/azfile",
"mount_options": [
"filemode=0777",
"dirmode=0777",
"nolock=true"
]
},
"glustervol": {
"volume_driver": "glusterfs_on_compute",
"container_path": "$AZ_BATCH_NODE_SHARED_DIR/gfs",
"volume_type": "replica",
"volume_options": [
"performance.cache-size 1 GB",
"performance.cache-max-file-size 10 MB",
"performance.cache-refresh-timeout 61",
]
}
}
}
}
}
```
The `batch_shipyard` property is used to set settings for the tool.
* (required) `storage_account_settings` is a link to the alias of the storage
account specified, in this case, it is `mystorageaccount`. Batch shipyard
requires a storage account for storing metadata in order to execute across a
distributed environment.
* (optional) `storage_entity_prefix` property is used as a generic qualifier
to prefix storage containers (blob containers, tables, queues) with. If not
specified, defaults to `shipyard`.
* (optional) `generated_sas_expiry_days` property is used to set the number of
days any generated SAS key by Batch Shipyard is valid for. The default is 30
days. This is useful if you have long-lived pools and want to ensure that
SAS keys are valid for longer periods of time.
* (optional) `encryption` object is used to define credential encryption which
contains the following members:
* (required) `enabled` property enables or disables this feature.
* (required) `pfx` object defines the PFX certificate
* (required) `filename` property is the full path and name to the PFX
certificate
* (required) `passphrase` property is the passphrase for the PFX
certificate. This cannot be empty.
* (optional) `sha1_thumbprint` is the SHA1 thumbprint of the
certificate. If the PFX file is created using the `cert create` command,
then the SHA1 thumbprint is output. It is recommended to populate this
property such that it does not have to be generated when needed for
encryption.
* (optional) `public_key_pem` property is the full path and name to the
RSA public key in PEM format. If the PFX file is created using the
`cert create` command, then this file is generated along with the PFX
file. It is recommended to populate this property with the PEM file path
such that it does not have to be generated when needed for encryption.
The `docker_registry` property is used to configure Docker image distribution
options from public/private Docker hub and private registries.
* (optional) `private` property controls settings for interacting with private
registries. There are three kinds of private registries that are supported:
(1) private registries hosted on Docker Hub, (2) Internet accessible
registries such as those hosted by the
[Azure Container Registry](https://azure.microsoft.com/en-us/services/container-registry/)
service and (3) [private registry instances backed to
Azure Blob Storage](https://azure.microsoft.com/en-us/documentation/articles/virtual-machines-linux-docker-registry-in-blob-storage/)
and are run on compute nodes. To use private registries hosted on Docker Hub,
no additional properties need to be specified here, instead, specify your
Docker Hub login information in the credentials json. To specify a private
registry other than on Docker Hub, a json property named `server` should be
defined. To use a private registry backed by Azure Blob Storage, define a
json object named `azure_storage`. Note that a maximum of only one of these
three types private registries may be specified at once. The following
describes members of the non-Docker Hub private registries supported:
* (optional) `server` object is a property that is the fully-qualified host
name to a private registry server. A specific port other than 80 can be
specified using a `:` separator, e.g.,
`mydockerregistry.com:8080`. Port 80 is the default if no port is
specified. The value of this property should have an associated login
in the credentials json file.
* (optional) `azure_storage` object is to define settings for connecting
to a private registry backed by Azure Storage blobs and where the
private registry instances are hosted on the compute nodes themselves.
* (required) `storage_account_settings` is a link to the alias of the
storage account specified that stores the private registry blobs.
* (required) `container` property is the name of the Azure Blob
container holding the private registry blobs.
* (optional) `allow_public_docker_hub_pull_on_missing` property allows
pass-through of Docker image retrieval to public Docker Hub if it is
missing in the private registry. This defaults to `false` if not
specified.
The `data_replication` property is used to configure the internal image
replication mechanism between compute nodes within a compute pool. The
`non_peer_to_peer_concurrent_downloading` property specifies if it is ok
to allow unfettered concurrent downloading from the source registry among
all compute nodes. The following options apply to `peer_to_peer` data
replication options:
* (optional) `enabled` property enables or disables private peer-to-peer
transfer. Note that for compute pools with a relatively small number of VMs,
peer-to-peer transfer may not provide any benefit and is recommended to be
disabled in these cases. Compute pools with large number of VMs and especially
in the case of an Azure Storage-backed private registry can benefit from
peer-to-peer image replication.
* `compression` property enables or disables compression of image files. It
is strongly recommended to keep this enabled.
* `concurrent_source_downloads` property specifies the number of
simultaneous downloads allowed to each image.
* `direct_download_seed_bias` property sets the number of direct download
seeds to prefer per image before switching to peer-to-peer transfer.
The `global_resources` property contains information regarding required
Docker images, volume configuration and data ingress information. This
property is required.
`docker_images` is an array of docker images that should be installed on
every compute node when this configuration file is supplied while creating
a compute pool. Image tags are supported. Image names should not include
private registry server names, as these will be automatically prepended. For
instance, if you have an image `abc/mytag` on your private registry
`myregistry-myorg.azurecr.io`, your image should be named in the
`docker_images` array as `abc/mytag` and not
`myregistry-myorg.azurecr.io/abc/mytag`.
`files` is an optional property that specifies data that should be ingressed
from a location accessible by the local machine (i.e., machine invoking
`shipyard.py` to a shared file system location accessible by compute nodes
in the pool or Azure Blob or File Storage). `files` is a json list of objects,
which allows for multiple sources to destinations to be ingressed during the
same invocation. Note that no Azure Batch environment variables
(i.e., `$AZ_BATCH_`-style environment variables) are available as path
arguments since ingress actions performed within `files` are done locally
on the machine invoking `shipyard.py`. Each object within the `files` list
contains the following members:
* (required) `source` property contains the following members:
* (required) `path` is a local path. A single file or a directory
can be specified. Filters below will be ignored if `path` is a file and
not a directory.
* (optional) `include` is an array of
[Unix shell-style wildcard filters](https://docs.python.org/3.5/library/fnmatch.html)
where only files matching a filter are included in the data transfer.
Filters specified in `include` have precedence over `exclude` described
next. `include` can only have a maximum of 1 filter for ingress to Azure
Blob Storage. In this example, all files ending in `.dat` are ingressed.
* (optional) `exclude` is an array of
[Unix shell-style wildcard filters](https://docs.python.org/3.5/library/fnmatch.html)
where files matching a filter are excluded from the data transfer. Filters
specified in `include` have precedence over filters specified in
`exclude`. `exclude` cannot be specified for ingress into Azure Blob
Storage. In this example, all files ending in `.bak` are skipped for
ingress.
* (required) `destination` property contains the following members:
* (required or optional) `shared_data_volume` or `storage_account_settings`
for data ingress to a GlusterFS volume or Azure Blob or File Storage. If
you are ingressing to a pool with only one compute node, you may omit
`shared_data_volume`. Otherwise, you may specify one or the other, but
not both in the same object. Please see below in the
`shared_data_volumes` for information on how to set up a GlusterFS share.
* (required or optional) `relative_destination_path` specifies a relative
destination path to place the files, with respect to the target root.
If transferring to a `shared_data_volume` then this is relative to the
GlusterFS volume root. If transferring to a pool with one single node in
it, thus, no `shared_data_volume` is specified in the prior property, then
this is relative to
[$AZ_BATCH_NODE_ROOT_DIR](https://azure.microsoft.com/en-us/documentation/articles/batch-api-basics/#files-and-directories).
To place files directly in `$AZ_BATCH_NODE_ROOT_DIR` (not recommended),
you can specify this property as empty string when not ingressing to
a `shared_data_volume`. Note that if `scp` is selected while attempting
to transfer directly to this aforementioned path, then `scp` will fail
with exit code of 1 but the transfer will have succeeded (this is due
to some of the permission options). If this property is not specified for
a `shared_data_volume`, then files will be placed directly in the
GlusterFS volume root. This property cannot be specified for a Azure
Storage destination (i.e., `storage_account_settings`).
* (required) `data_transfer` specifies how the transfer should take place.
The following list contains members for GlusterFS ingress when a GlusterFS
volume is provided for `shared_data_volume` (see below for ingressing to
Azure Blob or File Storage):
* (required) `method` specified which method should be used to ingress
data, which should be one of: `scp`, `multinode_scp`, `rsync+ssh` or
`multinode_rsync+ssh`. `scp` will use secure copy to copy a file or a
directory (recursively) to the remote share path. `multinode_scp` will
attempt to simultaneously transfer files to many compute nodes using
`scp` at the same time to speed up data transfer. `rsync+ssh` will
perform an rsync of files through SSH. `multinode_rsync+ssh` will
attempt to simultaneously transfer files using `rsync` to many compute
nodes at the same time to speed up data transfer with. Note that you may
specify the `multinode_*` methods even with only 1 compute node in a
pool which will allow you to take advantage of
`max_parallel_transfers_per_node` below.
* (optional) `ssh_private_key` location of the SSH private key for the
username specified in the `pool_specification`:`ssh` section when
connecting to compute nodes. The default is `id_rsa_shipyard`, if
omitted, which is automatically generated if no SSH key is specified
when an SSH user is added to a pool.
* (optional) `scp_ssh_extra_options` are any extra options to pass to
`scp` or `ssh` for `scp`/`multinode_scp` or
`rsync+ssh`/`multinode_rsync+ssh` methods, respectively. In the example
above, `-C` enables compression and `-c aes256-gcm@openssh.com`
is passed to `scp`, which can potentially increase the transfer speed by
selecting the `aes256-gcm@openssh.com` cipher which can exploit Intel
AES-NI.
* (optional) `rsync_extra_options` are any extra options to pass to
`rsync` for the `rsync+ssh`/`multinode_rsync+ssh` transfer methods. This
property is ignored for non-rsync transfer methods.
* (optional) `split_files_megabytes` splits files into chunks with the
specified size in MiB. This can potentially help with very large files.
This option forces the transfer `method` to `multinode_scp`.
Note that the destination file system must be able to accommodate
up to 2x the size of files which are split. Additionally, transfers
involving files which are split will incur reconstruction costs after
the transfer is complete, which will increase the total end-to-end
ingress time. However, in certain scenarios, by splitting files and
transferring chunks in parallel along with reconstruction may end up
being faster than transferring a large file without chunking.
* (optional) `max_parallel_transfers_per_node` is the maximum number of
parallel transfer to invoke per node with the
`multinode_scp`/`multinode_rsync+ssh` methods. For example, if there
are 3 compute nodes in the pool, and `2` is given for this option, then
there will be up to 2 scp sessions in parallel per compute node for a
maximum of 6 concurrent scp sessions to the pool. The default is 1 if
not specified or omitted.
* (required) `data_transfer` specifies how the transfer should take place.
When Azure Blob or File Storage is selected as the destination for data
ingress, [blobxfer](https://github.com/Azure/blobxfer) is invoked. The
following list contains members for Azure Blob or File Storage ingress
when a storage account link is provided for `storage_account_settings`:
* (required) `container` or `file_share` is required when uploading to
Azure Blob Storage or Azure File Storage, respectively. `container`
specifies which container to upload to for Azure Blob Storage while
`file_share` specifies which file share to upload to for Azure File
Storage. Only one of these properties can be specified per
`data_transfer` object. The container or file share need not be created
beforehand.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`. In the example above, `--no-computefilemd5` will force
`blobxfer` to skip MD5 calculation on files ingressed.
`docker_volumes` is an optional property that can consist of two
different types of volumes: `data_volumes` and `shared_data_volumes`.
`data_volumes` can be of two flavors depending upon if `host_path` is set to
null or not. In the former, this is typically used with the `VOLUME` keyword
in Dockerfiles to initialize a data volume with existing data inside the
image. If `host_path` is set, then the path on the host is mounted in the
container at the path specified with `container_path`.
`shared_data_volumes` is an optional property for initializing persistent
shared storage volumes. In the first shared volume, `shipyardvol` is the alias
of this volume:
* `volume_driver` property specifies the Docker Volume Driver to use.
Currently Batch Shipyard only supports the `volume_driver` as `azurefile` or
`glusterfs_on_compute`. Note that `glusterfs_on_compute` is not a true Docker
Volume Driver. For this volume (`shipyardvol`), as this is an Azure File
shared volume, the `volume_driver` should be set as `azurefile`.
* `storage_account_settings` is a link to the alias of the storage account
specified that holds this Azure File Share.
* `azure_file_share_name` is the name of the share name on Azure Files. Note
that the Azure File share must be created beforehand, the toolkit does not
create Azure File shares, it only mounts them to the compute nodes.
* `container_path` is the path in the container to mount.
* `mount_options` are the mount options to pass to the mount command. Supported
options are documented
[here](https://github.com/Azure/azurefile-dockervolumedriver). It is
recommended to use `0777` for both `filemode` and `dirmode` as the `uid` and
`gid` cannot be reliably determined before the compute pool is allocated and
this volume will be mounted as the root user.
Note that when using `azurefile` for a shared data volume, the storage account
that holds the file share must reside within the same Azure region as the
Azure Batch compute pool. Attempting to mount an Azure File share that is
cross-region will result in failure as current Linux Samba clients do not
support share level encryption at this time.
The second shared volue, `glustervol`, is a
[GlusterFS](https://www.gluster.org/) network file system. Please note that
`glusterfs_on_compute` are GlusterFS volumes co-located on the VM's temporary
local disk space which is a shared resource. Sizes of the local temp disk for
each VM size can be found
[here](https://azure.microsoft.com/en-us/documentation/articles/virtual-machines-windows-sizes/).
If specifying a `glusterfs_on_compute` volume, you must enable internode
communication in the pool configuration file. These volumes have the following
properties:
* (required) `volume_driver` property should be set as `glusterfs_on_compute`.
* (required) `container_path` is the path in the container to mount.
* (optional) `volume_type` property defines the GlusterFS volume type.
Currently, `replica` is the only supported type.
* (optional) `volume_options` property defines additional GlusterFS volume
options to set.
`glusterfs_on_compute` volumes are mounted on the host at
`$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0`. Batch Shipyard will automatically
replace container path references in direct and storage-based data
ingress/egress with their host path equivalents.
Note that when resizing a pool with a `glusterfs_on_compute` shared file
systems that you must resize with the `pool resize` command in `shipyard.py`
and not with Azure Portal, Batch Explorer or any other tool.
Finally, note that all `docker_volumes` can be omitted completely along with
one or all of `data_volumes` and `shared_data_volumes` if you do not require
this functionality.
An example global config json template can be found
[here](../config\_templates/config.json).
### <a name="pool"></a>Pool
The pool schema is as follows:
```json
{
"pool_specification": {
"id": "dockerpool",
"vm_size": "STANDARD_A9",
"vm_count": 10,
"max_tasks_per_node": 1,
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"reboot_on_start_task_failed": true,
"block_until_all_global_resources_loaded": true,
"transfer_files_on_pool_creation": false,
"input_data": {
"azure_batch": [
{
"job_id": "jobonanotherpool",
"task_id": "mytask",
"include": ["wd/*.dat"],
"exclude": ["*.txt"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/jobonanotherpool"
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "poolcontainer",
"include": ["pooldata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/pooldata",
"blobxfer_extra_options": null
}
]
},
"ssh": {
"username": "docker",
"expiry_days": 7,
"ssh_public_key": null,
"generate_docker_tunnel_script": true,
"generated_file_export_path": null,
"hpn_server_swap": false
},
"gpu": {
"nvidia_driver": {
"source": "https://some.url"
}
},
"additional_node_prep_commands": [
]
}
}
```
The `pool_specification` property has the following members:
* (required) `id` is the compute pool ID.
* (required) `vm_size` is the
[Azure Virtual Machine Instance Size](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/).
Please note that not all regions have every VM size available.
* (required) `vm_count` is the number of compute nodes to allocate.
* (optional) `max_tasks_per_node` is the maximum number of concurrent tasks
that can be running at any one time on a compute node. This defaults to a
value of 1 if not specified.
* (optional) `inter_node_communication_enabled` designates if this pool is set
up for inter-node communication. This must be set to `true` for any containers
that must communicate with each other such as MPI applications. This property
will be force enabled if peer-to-peer replication is enabled.
* (required) `publisher` is the publisher name of the Marketplace VM image.
* (required) `offer` is the offer name of the Marketplace VM image.
* (required) `sku` is the sku name of the Marketplace VM image.
* (optional) `reboot_on_start_task_failed` allows Batch Shipyard to reboot the
compute node in case there is a transient failure in node preparation (e.g.,
network timeout, resolution failure or download problem). This defaults to
`false`.
* (optional) `block_until_all_global_resources_loaded` will block the node
from entering ready state until all Docker images are loaded. This defaults
to `true`.
* (optional) `transfer_files_on_pool_creation` will ingress all `files`
specified in the `global_resources` section of the configuration json when
the pool is created. If files are to be ingressed to Azure Blob or File
Storage, then data movement operations are overlapped with the creation of the
pool. If files are to be ingressed to a shared file system on the compute
nodes, then the files are ingressed after the pool is created and the shared
file system is ready. Files can be ingressed to both Azure Blob Storage and a
shared file system during the same pool creation invocation. If this property
is set to `true` then `block_until_all_global_resources_loaded` will be force
disabled. If omitted, this property defaults to `false`.
* (optional) `input_data` is an object containing data that should be
ingressed to all compute nodes as part of node preparation. It is
important to note that if you are combining this action with `files` and
are ingressing data to Azure Blob or File storage as part of pool creation,
that the blob containers or file shares defined here will be downloaded as
soon as the compute node is ready to do so. This may result in the blob
container/blobs or file share/files not being ready in time for the
`input_data` transfer. It is up to you to ensure that these two operations do
not overlap. If there is a possibility of overlap, then you should ingress
data defined in `files` prior to pool creation and disable the option above
`transfer_files_on_pool_creation`. This object currently supports
`azure_batch` and `azure_storage` as members.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (required) `destination` is the destination path to place the files
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (required) `destination` property defines where to place the
downloaded files on the host file system. Please note that you should
not specify a destination that is on a shared file system. If you
require ingressing to a shared file system location like a GlusterFS
volume, then use the global configuration `files` property and the
`data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `ssh` is the property for creating a user to accomodate SSH
sessions to compute nodes. If this property is absent, then an SSH user is not
created with pool creation.
* (required) `username` is the user to create on the compute nodes.
* (optional) `expiry_days` is the number of days from now for the account on
the compute nodes to expire. The default is 30 days from invocation time.
* (optional) `ssh_public_key` is the path to an existing SSH public key to
use. If not specified, an RSA public/private keypair will be automatically
generated only on Linux. If this is `null` or not specified on Windows,
the SSH user is not created.
* (optional) `generate_docker_tunnel_script` property directs script to
generate an SSH tunnel script that can be used to connect to the remote
Docker engine running on a compute node.
* (optional) `generated_file_export_path` is the path to export the
generated RSA keypair and docker tunnel script to. If omitted, the
current directory is used.
* (experimental) `hpn_server_swap` property enables an OpenSSH server with
[HPN patches](https://www.psc.edu/index.php/using-joomla/extensions/templates/atomic/636-hpn-ssh)
to be swapped with the standard distribution OpenSSH server. This is not
supported on all Linux distributions and may be force disabled.
* (required for `STANDARD_NV` instances, optional for `STANDARD_NC` instances)
`gpu` property defines additional information for NVIDIA GPU-enabled VMs:
* `nvidia_driver` property contains the following required members:
* `source` is the source url to download the driver.
* (optional) `additional_node_prep_commands` is an array of additional commands
to execute on the compute node host as part of node preparation. This can
be empty or omitted.
An example pool json template can be found
[here](../config\_templates/pool.json).
### <a name="jobs"></a>Jobs
The jobs schema is as follows:
```json
{
"job_specifications": [
{
"id": "dockerjob",
"multi_instance_auto_complete": true,
"environment_variables": {
"abc": "xyz"
},
"environment_variables_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/myjobenv",
"max_task_retries": 3,
"input_data": {
"azure_batch": [
{
"job_id": "someotherjob",
"task_id": "task-a",
"include": ["wd/*.dat"],
"exclude": ["*.txt"],
"destination": null
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "jobcontainer",
"include": ["jobdata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/jobdata",
"blobxfer_extra_options": null
}
]
},
"tasks": [
{
"id": null,
"depends_on": [
"taskid-a", "taskid-b", "taskid-c"
],
"depends_on_range": [
1, 10
],
"image": "busybox",
"name": null,
"labels": [],
"environment_variables": {
"def": "123"
},
"environment_variables_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/mytaskenv",
"ports": [],
"data_volumes": [
"contdatavol",
"hosttempvol"
],
"shared_data_volumes": [
"azurefilevol"
],
"resource_files": [
{
"file_path": "",
"blob_source": "",
"file_mode": ""
}
],
"input_data": {
"azure_batch": [
{
"job_id": "previousjob",
"task_id": "mytask1",
"include": ["wd/output/*.bin"],
"exclude": ["*.txt"],
"destination": null
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "taskcontainer",
"include": ["taskdata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/taskdata",
"blobxfer_extra_options": null
}
]
},
"output_data": {
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "output",
"source": null,
"include": ["**/out*.dat"],
"blobxfer_extra_options": null
}
]
},
"remove_container_after_exit": true,
"shm_size": "256m",
"additional_docker_run_options": [
],
"infiniband": false,
"gpu": false,
"max_task_retries": 3,
"retention_time": "1.12:00:00",
"multi_instance": {
"num_instances": "pool_current_dedicated",
"coordination_command": null,
"resource_files": [
{
"file_path": "",
"blob_source": "",
"file_mode": ""
}
]
},
"entrypoint": null,
"command": ""
}
]
}
]
}
```
`job_specifications` array consists of jobs to create.
* (required) `id` is the job id to create. If the job already exists, the
specified `tasks` under the job will be added to the existing job.
* (optional) `multi_instance_auto_complete` enables auto-completion of the job
for which a multi-task instance is run. This allows automatic cleanup of the
Docker container in multi-instance tasks. This is defaulted to `true` when
multi-instance tasks are specified.
* (optional) `environment_variables` under the job are environment variables
which will be applied to all tasks operating under the job. Note that
environment variables are not expanded and are passed as-is. You will need
to source the environment file `$AZ_BATCH_TASK_WORKING_DIR/.shipyard.envlist`
in a shell within the docker `command` or `entrypoint` if you want any
environment variables to be expanded.
* (optional) `environment_variables_keyvault_secret_id` under the job are
environment variables stored in KeyVault that should be applied to all tasks
operating under the job. The secret stored in KeyVault must be a valid json
string, e.g., `{ "env_var_name": "env_var_value" }`.
* (optional) `max_task_retries` sets the maximum number of times that
Azure Batch should retry all tasks in this job for. By default, Azure Batch
does not retry tasks that fail (i.e. `max_task_retries` is 0).
* (optional) `input_data` is an object containing data that should be
ingressed for the job. Any `input_data` defined at this level will be
downloaded for this job which can be run on any number of compute nodes
depending upon the number of constituent tasks and repeat invocations. However,
`input_data` is only downloaded once per job invocation on a compute node.
For example, if `job-1`:`task-1` is run on compute node A and then
`job-1`:`task-2` is run on compute node B, then this `input_data` is ingressed
to both compute node A and B. However, if `job-1`:`task-3` is then run on
compute node A after `job-1`:`task-1`, then the `input_data` is not
transferred again. This object currently supports `azure_batch` and
`azure_storage` as members.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (required) `destination` is the destination path to place the files
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (required) `destination` property defines where to place the
downloaded files on the host file system. Please note that you should
not specify a destination that is on a shared file system. If you
require ingressing to a shared file system location like a GlusterFS
volume, then use the global configuration `files` property and the
`data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (required) `tasks` is an array of tasks to add to the job.
* (optional) `id` is the task id. Note that if the task `id` is null or
empty then a generic task id will be assigned. The generic task id is
formatted as `dockertask-NNNNN` where `NNNNN` starts from `00000` and is
increased by 1 for each task added to the same job. If there are more
than `99999` autonamed tasks in a job then the numbering is not
padded for tasks exceeding 5 digits.
* (optional) `depends_on` is an array of task ids for which this container
invocation (task) depends on and must run to successful completion prior
to this task executing.
* (optional) `depends_on_range` is an array with exactly two integral
elements containing a task `id` range for which this task is dependent
upon, i.e., the start `id` and the end `id` for which this task depends
on. Although task `id`s are always strings, the dependent task `id`s for
ranges must be expressed by their integral representation for this
property. This also implies that task `id`s for which this task depends
on must be integral in nature. For example, if `depends_on_range` is set
to `[1, 10]` (note the integral members), then there should be task
`id`s of `"1"`, `"2"`, ... `"10"` within the job. Once these dependent
tasks complete successfully, then this specified task will execute.
* (required) `image` is the Docker image to use for this task
* (optional) `name` is the name to assign to the container. If not
specified, the value of the `id` property will be used for `name`.
* (optional) `labels` is an array of labels to apply to the container.
* (optional) `environment_variables` are any additional task-specific
environment variables that should be applied to the container. Note that
environment variables are not expanded and are passed as-is. You will
need to source the environment file
`$AZ_BATCH_TASK_WORKING_DIR/.shipyard.envlist` in a shell within the
docker `command` or `entrypoint` if you want any environment variables
to be expanded.
* (optional) `environment_variables_keyvault_secret_id` are any additional
task-specific environment variables that should be applied to the
container but are stored in KeyVault. The secret stored in KeyVault must
be a valid json string, e.g., `{ "env_var_name": "env_var_value" }`.
* (optional) `ports` is an array of port specifications that should be
exposed to the host.
* (optional) `data_volumes` is an array of `data_volume` aliases as defined
in the global configuration file. These volumes will be mounted in the
container.
* (optional) `shared_data_volumes` is an array of `shared_data_volume`
aliases as defined in the global configuration file. These volumes will be
mounted in the container.
* (optional) `resource_files` is an array of resource files that should be
downloaded as part of the task. Each array entry contains the following
information:
* `file_path` is the path within the task working directory to place the
file on the compute node.
* `blob_source` is an accessible HTTP/HTTPS URL. This need not be an Azure
Blob Storage URL.
* `file_mode` if the file mode to set for the file on the compute node.
This is optional.
* (optional) `input_data` is an object containing data that should be
ingressed for this specific task. This object currently supports
`azure_batch` and `azure_storage` as members. Note for multi-instance
tasks, transfer of `input_data` is only applied to the task running the
application command.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (optional) `destination` is the destination path to place the files.
If `destination` is not specified at this level, then files are
defaulted to download into `$AZ_BATCH_TASK_WORKING_DIR`.
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (optional) `destination` property defines where to place the
downloaded files on the host file system. Unlike the job-level
version of `input_data`, this `destination` property can be ommitted.
If `destination` is not specified at this level, then files are
defaulted to download into `$AZ_BATCH_TASK_WORKING_DIR`. Please note
that you should not specify a destination that is on a shared file
system. If you require ingressing to a shared file system location
like a GlusterFS volume, then use the global configuration `files`
property and the `data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `output_data` is an object containing data that should be
egressed for this specific task if and only if the task completes
successfully. This object currently only supports `azure_storage` as a
member. Note for multi-instance tasks, transfer of `output_data` is only
applied to the task running the application command.
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when uploading to
Azure Blob Storage or Azure File Storage, respectively. `container`
specifies which container to upload to for Azure Blob Storage while
`file_share` specifies which file share to upload to for Azure File
Storage. Only one of these properties can be specified per
`data_transfer` object.
* (optional) `source` property defines which directory to upload to
Azure storage. If `source` is not specified, then `source` is
defaulted to `$AZ_BATCH_TASK_DIR`.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `remove_container_after_exit` property specifies if the
container should be automatically removed/cleaned up after it exits. This
defaults to `false`.
* (optional) `shm_size` property specifies the size of `/dev/shm` in
the container. The default is `64m`. The postfix unit can be designated
as `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). This
value may need to be increased from the default of `64m` for certain
Docker applications, including multi-instance tasks using Intel MPI
(see [issue #8](https://github.com/Azure/batch-shipyard/issues/8)).
* (optional) `additional_docker_run_options` is an array of addition Docker
run options that should be passed to the Docker daemon when starting this
container.
* (optional) `infiniband` designates if this container requires access to the
Infiniband/RDMA devices on the host. Note that this will automatically
force the container to use the host network stack. If this property is
set to `true`, ensure that the `pool_specification` property
`inter_node_communication_enabled` is set to `true`.
* (optional) `gpu` designates if this container requires access to the GPU
devices on the host. If this property is set to `true`, Docker containers
are instantiated via `nvidia-docker`. This requires N-series VM instances.
* (optional) `max_task_retries` sets the maximum number of times that
Azure Batch should retry this task for. This overrides the job-level task
retry count. By default, Azure Batch does not retry tasks that fail
(i.e. `max_task_retries` is 0).
* (optional) `retention_time` sets the timedelta to retain the task
directory on the compute node where it ran after the task completes.
The format for this property is a timedelta with a string representation
of "d.HH:mm:ss". For example, "1.12:00:00" would allow the compute node
to clean up this task's directory 36 hours after the task completed. The
default, if unspecified, is effectively infinite - i.e., task data is
retained forever on the compute node that ran the task.
* (optional) `multi_instance` is a property indicating that this task is a
multi-instance task. This is required if the Docker image is an MPI
program. Additional information about multi-instance tasks and Batch
Shipyard can be found
[here](80-batch-shipyard-multi-instance-tasks.md). Do not define this
property for tasks that are not multi-instance. Additional members of this
property are:
* `num_instances` is a property setting the number of compute node
instances are required for this multi-instance task. This can be any one
of the following:
1. An integral number
2. `pool_current_dedicated` which is the instantaneous reading of the
target pool's current dedicated count during this function invocation.
3. `pool_specification_vm_count` which is the `vm_count` specified in the
pool configuration.
* `coordination_command` is the coordination command this is run by each
instance (compute node) of this multi-instance task prior to the
application command. This command must not block and must exit
successfully for the multi-instance task to proceed. This is the command
passed to the container in `docker run` for multi-instance tasks. This
docker container instance will automatically be daemonized. This is
optional and may be null.
* `resource_files` is an array of resource files that should be downloaded
as part of the multi-instance task. Each array entry contains the
following information:
* `file_path` is the path within the task working directory to place
the file on the compute node.
* `blob_source` is an accessible HTTP/HTTPS URL. This need not be an
Azure Blob Storage URL.
* `file_mode` if the file mode to set for the file on the compute node.
This is optional.
* (optional) `entrypoint` is the property that can override the Docker image
defined `ENTRYPOINT`.
* (optional) `command` is the command to execute in the Docker container
context. If this task is a regular non-multi-instance task, then this is
the command passed to the container context during `docker run`. If this
task is a multi-instance task, then this `command` is the application
command and is executed with `docker exec` in the running Docker container
context from the `coordination_command` in the `multi_instance` property.
This property may be null.
An example jobs json template can be found
[here](../config\_templates/jobs.json).
## Batch Shipyard Usage
Continue on to [Batch Shipyard Usage](20-batch-shipyard-usage.md).

Просмотреть файл

@ -36,7 +36,7 @@ The credentials schema is as follows:
"rsa_private_key_pem": "/path/to/privkey.pem",
"x509_cert_sha1_thumbprint": "01AB02CD...",
"user": "me@domain.com",
"password": "password"
"password": "password",
"token_cache": {
"enabled": true,
"filename": ""
@ -59,6 +59,7 @@ The credentials schema is as follows:
"filename": ""
}
},
"resource_group": "",
"account_key": "batchaccountkey",
"account_key_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/batchkey"
},
@ -149,9 +150,12 @@ under the `batch` property can be found in the
* (required) `account_service_url` is the Batch account service URL.
* (required for UserSubscription accounts, optional otherwise) `aad` AAD
authentication parameters for Azure Batch.
* (optional) `resource_group` is the resource group containing the Batch
account. This is only required if using a UserSubscription Batch account
with `aad` authentication.
* (required unless `aad` is specified) `account_key` is the shared
key. This is required for non-AAD logins. This is ignored if the `aad`
property is specified.
key. This is required for non-AAD logins. This option takes precendence
over the `aad` property if specified.
* (optional) `account_key_keyvault_secret_id` property can be used to
reference an Azure KeyVault secret id. Batch Shipyard will contact the
specified KeyVault and replace the `account_key` value as returned by

Просмотреть файл

@ -0,0 +1,418 @@
# Batch Shipyard Global Configuration
This page contains in-depth details on how to configure the global
json file for Batch Shipyard.
## Schema
The global config schema is as follows:
```json
{
"batch_shipyard": {
"storage_account_settings": "mystorageaccount",
"storage_entity_prefix": "shipyard",
"generated_sas_expiry_days": 90,
"encryption" : {
"enabled": true,
"pfx": {
"filename": "encrypt.pfx",
"passphrase": "mysupersecretpassword",
"sha1_thumbprint": "123456789..."
},
"public_key_pem": "encrypt.pem"
}
},
"docker_registry": {
"private": {
"allow_public_docker_hub_pull_on_missing": true,
"server": "myserver-myorg.azurecr.io",
"azure_storage": {
"storage_account_settings": "mystorageaccount",
"container": "mydockerregistry"
}
}
},
"data_replication": {
"peer_to_peer": {
"enabled": true,
"compression": true,
"concurrent_source_downloads": 10,
"direct_download_seed_bias": null
},
"non_peer_to_peer_concurrent_downloading": true
},
"global_resources": {
"docker_images": [
"busybox",
"redis:3.2.3-alpine",
],
"files": [
{
"source": {
"path": "/some/local/path/dir",
"include": ["*.dat"],
"exclude": ["*.bak"]
},
"destination": {
"shared_data_volume": "glustervol",
"relative_destination_path": "myfiles",
"data_transfer": {
"method": "multinode_scp",
"ssh_private_key": "id_rsa_shipyard",
"scp_ssh_extra_options": "-C -c aes256-gcm@openssh.com",
"rsync_extra_options": "",
"split_files_megabytes": 500,
"max_parallel_transfers_per_node": 2
}
}
},
{
"source": {
"path": "/some/local/path/bound/for/blob",
"include": ["*.bin"]
},
"destination": {
"storage_account_settings": "mystorageaccount",
"data_transfer": {
"container": "mycontainer",
"blobxfer_extra_options": "--no-computefilemd5"
}
}
},
{
"source": {
"path": "/another/local/path/dir",
"include": [],
"exclude": []
},
"destination": {
"relative_destination_path": "relpath/on/host",
"data_transfer": {
"method": "rsync+ssh",
"ssh_private_key": "id_rsa_shipyard",
"scp_ssh_extra_options": "-c aes256-gcm@openssh.com",
"rsync_extra_options": "-v"
}
}
}
],
"docker_volumes": {
"data_volumes": {
"abcvol": {
"host_path": null,
"container_path": "/abc"
},
"hosttempvol": {
"host_path": "/tmp",
"container_path": "/hosttmp"
}
},
"shared_data_volumes": {
"shipyardvol": {
"volume_driver": "azurefile",
"storage_account_settings": "mystorageaccount",
"azure_file_share_name": "shipyardshared",
"container_path": "$AZ_BATCH_NODE_SHARED_DIR/azfile",
"mount_options": [
"filemode=0777",
"dirmode=0777",
"nolock=true"
]
},
"glustervol": {
"volume_driver": "glusterfs_on_compute",
"container_path": "$AZ_BATCH_NODE_SHARED_DIR/gfs",
"volume_type": "replica",
"volume_options": [
"performance.cache-size 1 GB",
"performance.cache-max-file-size 10 MB",
"performance.cache-refresh-timeout 61",
]
}
}
}
}
}
```
The `batch_shipyard` property is used to set settings for the tool.
* (required) `storage_account_settings` is a link to the alias of the storage
account specified, in this case, it is `mystorageaccount`. Batch shipyard
requires a storage account for storing metadata in order to execute across a
distributed environment.
* (optional) `storage_entity_prefix` property is used as a generic qualifier
to prefix storage containers (blob containers, tables, queues) with. If not
specified, defaults to `shipyard`.
* (optional) `generated_sas_expiry_days` property is used to set the number of
days any generated SAS key by Batch Shipyard is valid for. The default is 30
days. This is useful if you have long-lived pools and want to ensure that
SAS keys are valid for longer periods of time.
* (optional) `encryption` object is used to define credential encryption which
contains the following members:
* (required) `enabled` property enables or disables this feature.
* (required) `pfx` object defines the PFX certificate
* (required) `filename` property is the full path and name to the PFX
certificate
* (required) `passphrase` property is the passphrase for the PFX
certificate. This cannot be empty.
* (optional) `sha1_thumbprint` is the SHA1 thumbprint of the
certificate. If the PFX file is created using the `cert create` command,
then the SHA1 thumbprint is output. It is recommended to populate this
property such that it does not have to be generated when needed for
encryption.
* (optional) `public_key_pem` property is the full path and name to the
RSA public key in PEM format. If the PFX file is created using the
`cert create` command, then this file is generated along with the PFX
file. It is recommended to populate this property with the PEM file path
such that it does not have to be generated when needed for encryption.
The `docker_registry` property is used to configure Docker image distribution
options from public/private Docker hub and private registries.
* (optional) `private` property controls settings for interacting with private
registries. There are three kinds of private registries that are supported:
(1) private registries hosted on Docker Hub, (2) Internet accessible
registries such as those hosted by the
[Azure Container Registry](https://azure.microsoft.com/en-us/services/container-registry/)
service and (3) [private registry instances backed to
Azure Blob Storage](https://azure.microsoft.com/en-us/documentation/articles/virtual-machines-linux-docker-registry-in-blob-storage/)
and are run on compute nodes. To use private registries hosted on Docker Hub,
no additional properties need to be specified here, instead, specify your
Docker Hub login information in the credentials json. To specify a private
registry other than on Docker Hub, a json property named `server` should be
defined. To use a private registry backed by Azure Blob Storage, define a
json object named `azure_storage`. Note that a maximum of only one of these
three types private registries may be specified at once. The following
describes members of the non-Docker Hub private registries supported:
* (optional) `server` object is a property that is the fully-qualified host
name to a private registry server. A specific port other than 80 can be
specified using a `:` separator, e.g.,
`mydockerregistry.com:8080`. Port 80 is the default if no port is
specified. The value of this property should have an associated login
in the credentials json file.
* (optional) `azure_storage` object is to define settings for connecting
to a private registry backed by Azure Storage blobs and where the
private registry instances are hosted on the compute nodes themselves.
* (required) `storage_account_settings` is a link to the alias of the
storage account specified that stores the private registry blobs.
* (required) `container` property is the name of the Azure Blob
container holding the private registry blobs.
* (optional) `allow_public_docker_hub_pull_on_missing` property allows
pass-through of Docker image retrieval to public Docker Hub if it is
missing in the private registry. This defaults to `false` if not
specified. Note that this setting does not apply to a missing Docker
image that is allowed to run via the job property
`allow_run_on_missing_image`.
The `data_replication` property is used to configure the internal image
replication mechanism between compute nodes within a compute pool. The
`non_peer_to_peer_concurrent_downloading` property specifies if it is ok
to allow unfettered concurrent downloading from the source registry among
all compute nodes. The following options apply to `peer_to_peer` data
replication options:
* (optional) `enabled` property enables or disables private peer-to-peer
transfer. Note that for compute pools with a relatively small number of VMs,
peer-to-peer transfer may not provide any benefit and is recommended to be
disabled in these cases. Compute pools with large number of VMs and especially
in the case of an Azure Storage-backed private registry can benefit from
peer-to-peer image replication.
* `compression` property enables or disables compression of image files. It
is strongly recommended to keep this enabled.
* `concurrent_source_downloads` property specifies the number of
simultaneous downloads allowed to each image.
* `direct_download_seed_bias` property sets the number of direct download
seeds to prefer per image before switching to peer-to-peer transfer.
The `global_resources` property contains information regarding required
Docker images, volume configuration and data ingress information. This
property is required.
`docker_images` is an array of docker images that should be installed on
every compute node when this configuration file is supplied while creating
a compute pool. Image tags are supported. Image names should not include
private registry server names, as these will be automatically prepended. For
instance, if you have an image `abc/mytag` on your private registry
`myregistry-myorg.azurecr.io`, your image should be named in the
`docker_images` array as `abc/mytag` and not
`myregistry-myorg.azurecr.io/abc/mytag`.
`files` is an optional property that specifies data that should be ingressed
from a location accessible by the local machine (i.e., machine invoking
`shipyard.py` to a shared file system location accessible by compute nodes
in the pool or Azure Blob or File Storage). `files` is a json list of objects,
which allows for multiple sources to destinations to be ingressed during the
same invocation. Note that no Azure Batch environment variables
(i.e., `$AZ_BATCH_`-style environment variables) are available as path
arguments since ingress actions performed within `files` are done locally
on the machine invoking `shipyard.py`. Each object within the `files` list
contains the following members:
* (required) `source` property contains the following members:
* (required) `path` is a local path. A single file or a directory
can be specified. Filters below will be ignored if `path` is a file and
not a directory.
* (optional) `include` is an array of
[Unix shell-style wildcard filters](https://docs.python.org/3.5/library/fnmatch.html)
where only files matching a filter are included in the data transfer.
Filters specified in `include` have precedence over `exclude` described
next. `include` can only have a maximum of 1 filter for ingress to Azure
Blob Storage. In this example, all files ending in `.dat` are ingressed.
* (optional) `exclude` is an array of
[Unix shell-style wildcard filters](https://docs.python.org/3.5/library/fnmatch.html)
where files matching a filter are excluded from the data transfer. Filters
specified in `include` have precedence over filters specified in
`exclude`. `exclude` cannot be specified for ingress into Azure Blob
Storage. In this example, all files ending in `.bak` are skipped for
ingress.
* (required) `destination` property contains the following members:
* (required or optional) `shared_data_volume` or `storage_account_settings`
for data ingress to a GlusterFS volume or Azure Blob or File Storage. If
you are ingressing to a pool with only one compute node, you may omit
`shared_data_volume`. Otherwise, you may specify one or the other, but
not both in the same object. Please see below in the
`shared_data_volumes` for information on how to set up a GlusterFS share.
* (required or optional) `relative_destination_path` specifies a relative
destination path to place the files, with respect to the target root.
If transferring to a `shared_data_volume` then this is relative to the
GlusterFS volume root. If transferring to a pool with one single node in
it, thus, no `shared_data_volume` is specified in the prior property, then
this is relative to
[$AZ_BATCH_NODE_ROOT_DIR](https://azure.microsoft.com/en-us/documentation/articles/batch-api-basics/#files-and-directories).
To place files directly in `$AZ_BATCH_NODE_ROOT_DIR` (not recommended),
you can specify this property as empty string when not ingressing to
a `shared_data_volume`. Note that if `scp` is selected while attempting
to transfer directly to this aforementioned path, then `scp` will fail
with exit code of 1 but the transfer will have succeeded (this is due
to some of the permission options). If this property is not specified for
a `shared_data_volume`, then files will be placed directly in the
GlusterFS volume root. This property cannot be specified for a Azure
Storage destination (i.e., `storage_account_settings`).
* (required) `data_transfer` specifies how the transfer should take place.
The following list contains members for GlusterFS ingress when a GlusterFS
volume is provided for `shared_data_volume` (see below for ingressing to
Azure Blob or File Storage):
* (required) `method` specified which method should be used to ingress
data, which should be one of: `scp`, `multinode_scp`, `rsync+ssh` or
`multinode_rsync+ssh`. `scp` will use secure copy to copy a file or a
directory (recursively) to the remote share path. `multinode_scp` will
attempt to simultaneously transfer files to many compute nodes using
`scp` at the same time to speed up data transfer. `rsync+ssh` will
perform an rsync of files through SSH. `multinode_rsync+ssh` will
attempt to simultaneously transfer files using `rsync` to many compute
nodes at the same time to speed up data transfer with. Note that you may
specify the `multinode_*` methods even with only 1 compute node in a
pool which will allow you to take advantage of
`max_parallel_transfers_per_node` below.
* (optional) `ssh_private_key` location of the SSH private key for the
username specified in the `pool_specification`:`ssh` section when
connecting to compute nodes. The default is `id_rsa_shipyard`, if
omitted, which is automatically generated if no SSH key is specified
when an SSH user is added to a pool.
* (optional) `scp_ssh_extra_options` are any extra options to pass to
`scp` or `ssh` for `scp`/`multinode_scp` or
`rsync+ssh`/`multinode_rsync+ssh` methods, respectively. In the example
above, `-C` enables compression and `-c aes256-gcm@openssh.com`
is passed to `scp`, which can potentially increase the transfer speed by
selecting the `aes256-gcm@openssh.com` cipher which can exploit Intel
AES-NI.
* (optional) `rsync_extra_options` are any extra options to pass to
`rsync` for the `rsync+ssh`/`multinode_rsync+ssh` transfer methods. This
property is ignored for non-rsync transfer methods.
* (optional) `split_files_megabytes` splits files into chunks with the
specified size in MiB. This can potentially help with very large files.
This option forces the transfer `method` to `multinode_scp`.
Note that the destination file system must be able to accommodate
up to 2x the size of files which are split. Additionally, transfers
involving files which are split will incur reconstruction costs after
the transfer is complete, which will increase the total end-to-end
ingress time. However, in certain scenarios, by splitting files and
transferring chunks in parallel along with reconstruction may end up
being faster than transferring a large file without chunking.
* (optional) `max_parallel_transfers_per_node` is the maximum number of
parallel transfer to invoke per node with the
`multinode_scp`/`multinode_rsync+ssh` methods. For example, if there
are 3 compute nodes in the pool, and `2` is given for this option, then
there will be up to 2 scp sessions in parallel per compute node for a
maximum of 6 concurrent scp sessions to the pool. The default is 1 if
not specified or omitted.
* (required) `data_transfer` specifies how the transfer should take place.
When Azure Blob or File Storage is selected as the destination for data
ingress, [blobxfer](https://github.com/Azure/blobxfer) is invoked. The
following list contains members for Azure Blob or File Storage ingress
when a storage account link is provided for `storage_account_settings`:
* (required) `container` or `file_share` is required when uploading to
Azure Blob Storage or Azure File Storage, respectively. `container`
specifies which container to upload to for Azure Blob Storage while
`file_share` specifies which file share to upload to for Azure File
Storage. Only one of these properties can be specified per
`data_transfer` object. The container or file share need not be created
beforehand.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`. In the example above, `--no-computefilemd5` will force
`blobxfer` to skip MD5 calculation on files ingressed.
`docker_volumes` is an optional property that can consist of two
different types of volumes: `data_volumes` and `shared_data_volumes`.
`data_volumes` can be of two flavors depending upon if `host_path` is set to
null or not. In the former, this is typically used with the `VOLUME` keyword
in Dockerfiles to initialize a data volume with existing data inside the
image. If `host_path` is set, then the path on the host is mounted in the
container at the path specified with `container_path`.
`shared_data_volumes` is an optional property for initializing persistent
shared storage volumes. In the first shared volume, `shipyardvol` is the alias
of this volume:
* `volume_driver` property specifies the Docker Volume Driver to use.
Currently Batch Shipyard only supports the `volume_driver` as `azurefile` or
`glusterfs_on_compute`. Note that `glusterfs_on_compute` is not a true Docker
Volume Driver. For this volume (`shipyardvol`), as this is an Azure File
shared volume, the `volume_driver` should be set as `azurefile`.
* `storage_account_settings` is a link to the alias of the storage account
specified that holds this Azure File Share.
* `azure_file_share_name` is the name of the share name on Azure Files. Note
that the Azure File share must be created beforehand, the toolkit does not
create Azure File shares, it only mounts them to the compute nodes.
* `container_path` is the path in the container to mount.
* `mount_options` are the mount options to pass to the mount command. Supported
options are documented
[here](https://github.com/Azure/azurefile-dockervolumedriver). It is
recommended to use `0777` for both `filemode` and `dirmode` as the `uid` and
`gid` cannot be reliably determined before the compute pool is allocated and
this volume will be mounted as the root user.
Note that when using `azurefile` for a shared data volume, the storage account
that holds the file share must reside within the same Azure region as the
Azure Batch compute pool. Attempting to mount an Azure File share that is
cross-region will result in failure as current Linux Samba clients do not
support share level encryption at this time.
The second shared volue, `glustervol`, is a
[GlusterFS](https://www.gluster.org/) network file system. Please note that
`glusterfs_on_compute` are GlusterFS volumes co-located on the VM's temporary
local disk space which is a shared resource. Sizes of the local temp disk for
each VM size can be found
[here](https://azure.microsoft.com/en-us/documentation/articles/virtual-machines-windows-sizes/).
If specifying a `glusterfs_on_compute` volume, you must enable internode
communication in the pool configuration file. These volumes have the following
properties:
* (required) `volume_driver` property should be set as `glusterfs_on_compute`.
* (required) `container_path` is the path in the container to mount.
* (optional) `volume_type` property defines the GlusterFS volume type.
Currently, `replica` is the only supported type.
* (optional) `volume_options` property defines additional GlusterFS volume
options to set.
`glusterfs_on_compute` volumes are mounted on the host at
`$AZ_BATCH_NODE_SHARED_DIR/.gluster/gv0`. Batch Shipyard will automatically
replace container path references in direct and storage-based data
ingress/egress with their host path equivalents.
Note that when resizing a pool with a `glusterfs_on_compute` shared file
systems that you must resize with the `pool resize` command in `shipyard.py`
and not with Azure Portal, Batch Explorer or any other tool.
Finally, note that all `docker_volumes` can be omitted completely along with
one or all of `data_volumes` and `shared_data_volumes` if you do not require
this functionality.
## Full template
An full template of a credentials file can be found
[here](../config\_templates/config.json). Note that this template cannot
be used as-is and must be modified to fit your scenario.

Просмотреть файл

@ -0,0 +1,163 @@
# Batch Shipyard Pool Configuration
This page contains in-depth details on how to configure the pool
json file for Batch Shipyard.
## Schema
The pool schema is as follows:
```json
{
"pool_specification": {
"id": "dockerpool",
"vm_size": "STANDARD_A9",
"vm_count": 10,
"max_tasks_per_node": 1,
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"reboot_on_start_task_failed": true,
"block_until_all_global_resources_loaded": true,
"transfer_files_on_pool_creation": false,
"input_data": {
"azure_batch": [
{
"job_id": "jobonanotherpool",
"task_id": "mytask",
"include": ["wd/*.dat"],
"exclude": ["*.txt"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/jobonanotherpool"
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "poolcontainer",
"include": ["pooldata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/pooldata",
"blobxfer_extra_options": null
}
]
},
"ssh": {
"username": "docker",
"expiry_days": 7,
"ssh_public_key": null,
"generate_docker_tunnel_script": true,
"generated_file_export_path": null,
"hpn_server_swap": false
},
"gpu": {
"nvidia_driver": {
"source": "https://some.url"
}
},
"additional_node_prep_commands": [
]
}
}
```
The `pool_specification` property has the following members:
* (required) `id` is the compute pool ID.
* (required) `vm_size` is the
[Azure Virtual Machine Instance Size](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/).
Please note that not all regions have every VM size available.
* (required) `vm_count` is the number of compute nodes to allocate.
* (optional) `max_tasks_per_node` is the maximum number of concurrent tasks
that can be running at any one time on a compute node. This defaults to a
value of 1 if not specified.
* (optional) `inter_node_communication_enabled` designates if this pool is set
up for inter-node communication. This must be set to `true` for any containers
that must communicate with each other such as MPI applications. This property
will be force enabled if peer-to-peer replication is enabled.
* (required) `publisher` is the publisher name of the Marketplace VM image.
* (required) `offer` is the offer name of the Marketplace VM image.
* (required) `sku` is the sku name of the Marketplace VM image.
* (optional) `reboot_on_start_task_failed` allows Batch Shipyard to reboot the
compute node in case there is a transient failure in node preparation (e.g.,
network timeout, resolution failure or download problem). This defaults to
`false`.
* (optional) `block_until_all_global_resources_loaded` will block the node
from entering ready state until all Docker images are loaded. This defaults
to `true`.
* (optional) `transfer_files_on_pool_creation` will ingress all `files`
specified in the `global_resources` section of the configuration json when
the pool is created. If files are to be ingressed to Azure Blob or File
Storage, then data movement operations are overlapped with the creation of the
pool. If files are to be ingressed to a shared file system on the compute
nodes, then the files are ingressed after the pool is created and the shared
file system is ready. Files can be ingressed to both Azure Blob Storage and a
shared file system during the same pool creation invocation. If this property
is set to `true` then `block_until_all_global_resources_loaded` will be force
disabled. If omitted, this property defaults to `false`.
* (optional) `input_data` is an object containing data that should be
ingressed to all compute nodes as part of node preparation. It is
important to note that if you are combining this action with `files` and
are ingressing data to Azure Blob or File storage as part of pool creation,
that the blob containers or file shares defined here will be downloaded as
soon as the compute node is ready to do so. This may result in the blob
container/blobs or file share/files not being ready in time for the
`input_data` transfer. It is up to you to ensure that these two operations do
not overlap. If there is a possibility of overlap, then you should ingress
data defined in `files` prior to pool creation and disable the option above
`transfer_files_on_pool_creation`. This object currently supports
`azure_batch` and `azure_storage` as members.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (required) `destination` is the destination path to place the files
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (required) `destination` property defines where to place the
downloaded files on the host file system. Please note that you should
not specify a destination that is on a shared file system. If you
require ingressing to a shared file system location like a GlusterFS
volume, then use the global configuration `files` property and the
`data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `ssh` is the property for creating a user to accomodate SSH
sessions to compute nodes. If this property is absent, then an SSH user is not
created with pool creation.
* (required) `username` is the user to create on the compute nodes.
* (optional) `expiry_days` is the number of days from now for the account on
the compute nodes to expire. The default is 30 days from invocation time.
* (optional) `ssh_public_key` is the path to an existing SSH public key to
use. If not specified, an RSA public/private keypair will be automatically
generated only on Linux. If this is `null` or not specified on Windows,
the SSH user is not created.
* (optional) `generate_docker_tunnel_script` property directs script to
generate an SSH tunnel script that can be used to connect to the remote
Docker engine running on a compute node.
* (optional) `generated_file_export_path` is the path to export the
generated RSA keypair and docker tunnel script to. If omitted, the
current directory is used.
* (experimental) `hpn_server_swap` property enables an OpenSSH server with
[HPN patches](https://www.psc.edu/index.php/using-joomla/extensions/templates/atomic/636-hpn-ssh)
to be swapped with the standard distribution OpenSSH server. This is not
supported on all Linux distributions and may be force disabled.
* (required for `STANDARD_NV` instances, optional for `STANDARD_NC` instances)
`gpu` property defines additional information for NVIDIA GPU-enabled VMs:
* `nvidia_driver` property contains the following required members:
* `source` is the source url to download the driver.
* (optional) `additional_node_prep_commands` is an array of additional commands
to execute on the compute node host as part of node preparation. This can
be empty or omitted.
## Full template
An full template of a credentials file can be found
[here](../config\_templates/pool.json). Note that this template cannot
be used as-is and must be modified to fit your scenario.

Просмотреть файл

@ -0,0 +1,378 @@
# Batch Shipyard Jobs Configuration
This page contains in-depth details on how to configure the jobs
json file for Batch Shipyard.
## Schema
The jobs schema is as follows:
```json
{
"job_specifications": [
{
"id": "dockerjob",
"multi_instance_auto_complete": true,
"environment_variables": {
"abc": "xyz"
},
"environment_variables_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/myjobenv",
"max_task_retries": 3,
"allow_run_on_missing_image": false,
"input_data": {
"azure_batch": [
{
"job_id": "someotherjob",
"task_id": "task-a",
"include": ["wd/*.dat"],
"exclude": ["*.txt"],
"destination": null
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "jobcontainer",
"include": ["jobdata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/jobdata",
"blobxfer_extra_options": null
}
]
},
"tasks": [
{
"id": null,
"depends_on": [
"taskid-a", "taskid-b", "taskid-c"
],
"depends_on_range": [
1, 10
],
"image": "busybox",
"name": null,
"labels": [],
"environment_variables": {
"def": "123"
},
"environment_variables_keyvault_secret_id": "https://myvault.vault.azure.net/secrets/mytaskenv",
"ports": [],
"data_volumes": [
"contdatavol",
"hosttempvol"
],
"shared_data_volumes": [
"azurefilevol"
],
"resource_files": [
{
"file_path": "",
"blob_source": "",
"file_mode": ""
}
],
"input_data": {
"azure_batch": [
{
"job_id": "previousjob",
"task_id": "mytask1",
"include": ["wd/output/*.bin"],
"exclude": ["*.txt"],
"destination": null
}
],
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "taskcontainer",
"include": ["taskdata*.bin"],
"destination": "$AZ_BATCH_NODE_SHARED_DIR/taskdata",
"blobxfer_extra_options": null
}
]
},
"output_data": {
"azure_storage": [
{
"storage_account_settings": "mystorageaccount",
"container": "output",
"source": null,
"include": ["**/out*.dat"],
"blobxfer_extra_options": null
}
]
},
"remove_container_after_exit": true,
"shm_size": "256m",
"additional_docker_run_options": [
],
"infiniband": false,
"gpu": false,
"max_task_retries": 3,
"retention_time": "1.12:00:00",
"multi_instance": {
"num_instances": "pool_current_dedicated",
"coordination_command": null,
"resource_files": [
{
"file_path": "",
"blob_source": "",
"file_mode": ""
}
]
},
"entrypoint": null,
"command": ""
}
]
}
]
}
```
`job_specifications` array consists of jobs to create.
* (required) `id` is the job id to create. If the job already exists, the
specified `tasks` under the job will be added to the existing job.
* (optional) `multi_instance_auto_complete` enables auto-completion of the job
for which a multi-task instance is run. This allows automatic cleanup of the
Docker container in multi-instance tasks. This is defaulted to `true` when
multi-instance tasks are specified.
* (optional) `environment_variables` under the job are environment variables
which will be applied to all tasks operating under the job. Note that
environment variables are not expanded and are passed as-is. You will need
to source the environment file `$AZ_BATCH_TASK_WORKING_DIR/.shipyard.envlist`
in a shell within the docker `command` or `entrypoint` if you want any
environment variables to be expanded.
* (optional) `environment_variables_keyvault_secret_id` under the job are
environment variables stored in KeyVault that should be applied to all tasks
operating under the job. The secret stored in KeyVault must be a valid json
string, e.g., `{ "env_var_name": "env_var_value" }`.
* (optional) `max_task_retries` sets the maximum number of times that
Azure Batch should retry all tasks in this job for. By default, Azure Batch
does not retry tasks that fail (i.e. `max_task_retries` is 0).
* (optional) `allow_run_on_missing` allows tasks with a Docker image reference
that was not pre-loaded on to the compute node via
`global_resources`:`docker_images` in the global configuration to be able to
run. Note that you should attempt to specify all Docker images that you intend
to run in the `global_resources`:`docker_images` property in the global
configuration to minimize scheduling to task execution latency.
* (optional) `input_data` is an object containing data that should be
ingressed for the job. Any `input_data` defined at this level will be
downloaded for this job which can be run on any number of compute nodes
depending upon the number of constituent tasks and repeat invocations. However,
`input_data` is only downloaded once per job invocation on a compute node.
For example, if `job-1`:`task-1` is run on compute node A and then
`job-1`:`task-2` is run on compute node B, then this `input_data` is ingressed
to both compute node A and B. However, if `job-1`:`task-3` is then run on
compute node A after `job-1`:`task-1`, then the `input_data` is not
transferred again. This object currently supports `azure_batch` and
`azure_storage` as members.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (required) `destination` is the destination path to place the files
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (required) `destination` property defines where to place the
downloaded files on the host file system. Please note that you should
not specify a destination that is on a shared file system. If you
require ingressing to a shared file system location like a GlusterFS
volume, then use the global configuration `files` property and the
`data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (required) `tasks` is an array of tasks to add to the job.
* (optional) `id` is the task id. Note that if the task `id` is null or
empty then a generic task id will be assigned. The generic task id is
formatted as `dockertask-NNNNN` where `NNNNN` starts from `00000` and is
increased by 1 for each task added to the same job. If there are more
than `99999` autonamed tasks in a job then the numbering is not
padded for tasks exceeding 5 digits.
* (optional) `depends_on` is an array of task ids for which this container
invocation (task) depends on and must run to successful completion prior
to this task executing.
* (optional) `depends_on_range` is an array with exactly two integral
elements containing a task `id` range for which this task is dependent
upon, i.e., the start `id` and the end `id` for which this task depends
on. Although task `id`s are always strings, the dependent task `id`s for
ranges must be expressed by their integral representation for this
property. This also implies that task `id`s for which this task depends
on must be integral in nature. For example, if `depends_on_range` is set
to `[1, 10]` (note the integral members), then there should be task
`id`s of `"1"`, `"2"`, ... `"10"` within the job. Once these dependent
tasks complete successfully, then this specified task will execute.
* (required) `image` is the Docker image to use for this task
* (optional) `name` is the name to assign to the container. If not
specified, the value of the `id` property will be used for `name`.
* (optional) `labels` is an array of labels to apply to the container.
* (optional) `environment_variables` are any additional task-specific
environment variables that should be applied to the container. Note that
environment variables are not expanded and are passed as-is. You will
need to source the environment file
`$AZ_BATCH_TASK_WORKING_DIR/.shipyard.envlist` in a shell within the
docker `command` or `entrypoint` if you want any environment variables
to be expanded.
* (optional) `environment_variables_keyvault_secret_id` are any additional
task-specific environment variables that should be applied to the
container but are stored in KeyVault. The secret stored in KeyVault must
be a valid json string, e.g., `{ "env_var_name": "env_var_value" }`.
* (optional) `ports` is an array of port specifications that should be
exposed to the host.
* (optional) `data_volumes` is an array of `data_volume` aliases as defined
in the global configuration file. These volumes will be mounted in the
container.
* (optional) `shared_data_volumes` is an array of `shared_data_volume`
aliases as defined in the global configuration file. These volumes will be
mounted in the container.
* (optional) `resource_files` is an array of resource files that should be
downloaded as part of the task. Each array entry contains the following
information:
* `file_path` is the path within the task working directory to place the
file on the compute node.
* `blob_source` is an accessible HTTP/HTTPS URL. This need not be an Azure
Blob Storage URL.
* `file_mode` if the file mode to set for the file on the compute node.
This is optional.
* (optional) `input_data` is an object containing data that should be
ingressed for this specific task. This object currently supports
`azure_batch` and `azure_storage` as members. Note for multi-instance
tasks, transfer of `input_data` is only applied to the task running the
application command.
* `azure_batch` contains the following members:
* (required) `job_id` the job id of the task
* (required) `task_id` the id of the task to fetch files from
* (optional) `include` is an array of include filters
* (optional) `exclude` is an array of exclude filters
* (optional) `destination` is the destination path to place the files.
If `destination` is not specified at this level, then files are
defaulted to download into `$AZ_BATCH_TASK_WORKING_DIR`.
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when downloading
from Azure Blob Storage or Azure File Storage, respectively.
`container` specifies which container to download from for Azure Blob
Storage while `file_share` specifies which file share to download from
for Azure File Storage. Only one of these properties can be specified
per `data_transfer` object.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (optional) `destination` property defines where to place the
downloaded files on the host file system. Unlike the job-level
version of `input_data`, this `destination` property can be ommitted.
If `destination` is not specified at this level, then files are
defaulted to download into `$AZ_BATCH_TASK_WORKING_DIR`. Please note
that you should not specify a destination that is on a shared file
system. If you require ingressing to a shared file system location
like a GlusterFS volume, then use the global configuration `files`
property and the `data ingress` command.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `output_data` is an object containing data that should be
egressed for this specific task if and only if the task completes
successfully. This object currently only supports `azure_storage` as a
member. Note for multi-instance tasks, transfer of `output_data` is only
applied to the task running the application command.
* `azure_storage` contains the following members:
* (required) `storage_account_settings` contains a storage account link
as defined in the credentials json.
* (required) `container` or `file_share` is required when uploading to
Azure Blob Storage or Azure File Storage, respectively. `container`
specifies which container to upload to for Azure Blob Storage while
`file_share` specifies which file share to upload to for Azure File
Storage. Only one of these properties can be specified per
`data_transfer` object.
* (optional) `source` property defines which directory to upload to
Azure storage. If `source` is not specified, then `source` is
defaulted to `$AZ_BATCH_TASK_DIR`.
* (optional) `include` property defines an optional include filter.
Although this property is an array, it is only allowed to have 1
maximum filter.
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (optional) `remove_container_after_exit` property specifies if the
container should be automatically removed/cleaned up after it exits. This
defaults to `false`.
* (optional) `shm_size` property specifies the size of `/dev/shm` in
the container. The default is `64m`. The postfix unit can be designated
as `b` (bytes), `k` (kilobytes), `m` (megabytes), or `g` (gigabytes). This
value may need to be increased from the default of `64m` for certain
Docker applications, including multi-instance tasks using Intel MPI
(see [issue #8](https://github.com/Azure/batch-shipyard/issues/8)).
* (optional) `additional_docker_run_options` is an array of addition Docker
run options that should be passed to the Docker daemon when starting this
container.
* (optional) `infiniband` designates if this container requires access to the
Infiniband/RDMA devices on the host. Note that this will automatically
force the container to use the host network stack. If this property is
set to `true`, ensure that the `pool_specification` property
`inter_node_communication_enabled` is set to `true`.
* (optional) `gpu` designates if this container requires access to the GPU
devices on the host. If this property is set to `true`, Docker containers
are instantiated via `nvidia-docker`. This requires N-series VM instances.
* (optional) `max_task_retries` sets the maximum number of times that
Azure Batch should retry this task for. This overrides the job-level task
retry count. By default, Azure Batch does not retry tasks that fail
(i.e. `max_task_retries` is 0).
* (optional) `retention_time` sets the timedelta to retain the task
directory on the compute node where it ran after the task completes.
The format for this property is a timedelta with a string representation
of "d.HH:mm:ss". For example, "1.12:00:00" would allow the compute node
to clean up this task's directory 36 hours after the task completed. The
default, if unspecified, is effectively infinite - i.e., task data is
retained forever on the compute node that ran the task.
* (optional) `multi_instance` is a property indicating that this task is a
multi-instance task. This is required if the Docker image is an MPI
program. Additional information about multi-instance tasks and Batch
Shipyard can be found
[here](80-batch-shipyard-multi-instance-tasks.md). Do not define this
property for tasks that are not multi-instance. Additional members of this
property are:
* `num_instances` is a property setting the number of compute node
instances are required for this multi-instance task. This can be any one
of the following:
1. An integral number
2. `pool_current_dedicated` which is the instantaneous reading of the
target pool's current dedicated count during this function invocation.
3. `pool_specification_vm_count` which is the `vm_count` specified in the
pool configuration.
* `coordination_command` is the coordination command this is run by each
instance (compute node) of this multi-instance task prior to the
application command. This command must not block and must exit
successfully for the multi-instance task to proceed. This is the command
passed to the container in `docker run` for multi-instance tasks. This
docker container instance will automatically be daemonized. This is
optional and may be null.
* `resource_files` is an array of resource files that should be downloaded
as part of the multi-instance task. Each array entry contains the
following information:
* `file_path` is the path within the task working directory to place
the file on the compute node.
* `blob_source` is an accessible HTTP/HTTPS URL. This need not be an
Azure Blob Storage URL.
* `file_mode` if the file mode to set for the file on the compute node.
This is optional.
* (optional) `entrypoint` is the property that can override the Docker image
defined `ENTRYPOINT`.
* (optional) `command` is the command to execute in the Docker container
context. If this task is a regular non-multi-instance task, then this is
the command passed to the container context during `docker run`. If this
task is a multi-instance task, then this `command` is the application
command and is executed with `docker exec` in the running Docker container
context from the `coordination_command` in the `multi_instance` property.
This property may be null.
## Full template
An full template of a credentials file can be found
[here](../config\_templates/jobs.json). Note that this template cannot
be used as-is and must be modified to fit your scenario.

Просмотреть файл

@ -17,7 +17,9 @@ The following are general limitations or restrictions:
* Compute pool resize down (i.e., removing nodes from a pool) is not supported
when peer-to-peer transfer is enabled.
* The maximum number of compute nodes with peer-to-peer enabled is currently
40 for Linux pools for non-UserSubscription Batch accounts.
40 for Linux pools for non-UserSubscription Batch accounts. This check is
no longer performed before a pool is created and will instead result in
a ResizeError on the pool if not all compute nodes can be allocated.
* Data movement between Batch tasks as defined by `input_data`:`azure_batch`
is restricted to Batch accounts with keys (non-AAD).
* Virtual network support in Batch pools can only be used with

Просмотреть файл

@ -6,6 +6,10 @@ and effectively running your batch-style Docker workloads on Azure Batch.
2. [Installation](01-batch-shipyard-installation.md)
3. [Quick Start](02-batch-shipyard-quickstart.md)
4. [Configuration](10-batch-shipyard-configuration.md)
1. [Credentials Configuration](11-batch-shipyard-configuration-credentials.md)
2. [Global Configuration](12-batch-shipyard-configuration-global.md)
3. [Pool Configuration](13-batch-shipyard-configuration-pool.md)
4. [Jobs Configuration](14-batch-shipyard-configuration-jobs.md)
5. [Usage](20-batch-shipyard-usage.md)
6. [Data Movement](70-batch-shipyard-data-movement.md)
7. [Azure KeyVault for Credential Management](74-batch-shipyard-azure-keyvault.md)

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -20,7 +20,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
* `inter_node_communication_enabled` must be set to `true`
* `max_tasks_per_node` must be set to 1 or omitted

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -17,7 +17,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
### Global Configuration

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -17,7 +17,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
### Global Configuration

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -18,7 +18,7 @@ audio/video, it is best to choose `NV` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
* `gpu` property should be specified with the following members:
* `nvidia_driver` property contains the following members:

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -18,7 +18,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
### Global Configuration

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -17,7 +17,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
* `inter_node_communication_enabled` must be set to `true`
* `max_tasks_per_node` must be set to 1 or omitted

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 2,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -19,7 +19,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
* `max_tasks_per_node` must be set to 1 or omitted

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -20,7 +20,7 @@ If not using GPUs, another appropriate SKU can be selected.
supported once they are available for N-series VMs.
* `offer` should be `UbuntuServer` if using GPUs. Other offers will be
supported once they are available for N-series VMs.
* `sku` should be `16.04.0-LTS` if using GPUs. Other skus will be supported
* `sku` should be `16.04-LTS` if using GPUs. Other skus will be supported
once they are available for N-series VMs.
If on multiple CPUs:

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -6,7 +6,7 @@
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -17,7 +17,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
### Global Configuration

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -17,7 +17,7 @@ compute application, it is best to choose `NC` VM instances.
once they are available for N-series VMs.
* `offer` should be `UbuntuServer`. Other offers will be supported once they
are available for N-series VMs.
* `sku` should be `16.04.0-LTS`. Other skus will be supported once they are
* `sku` should be `16.04-LTS`. Other skus will be supported once they are
available for N-series VMs.
### Global Configuration

Просмотреть файл

@ -1,7 +1,6 @@
{
"credentials": {
"batch": {
"account": "<batch account name>",
"account_key": "<batch account key>",
"account_service_url": "<batch account service url>"
},

Просмотреть файл

@ -5,7 +5,7 @@
"vm_count": 1,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04.0-LTS",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -60,6 +60,9 @@ if [ $server_type == "nfs" ]; then
echo ""
echo "nfsstat:"
nfsstat -s -4
echo ""
echo "connected clients:"
netstat -tn | grep :2049
else
echo "$server_type not supported."
exit 1