Support ARM Images for custom images (#126)

This commit is contained in:
Fred Park 2017-09-28 21:29:42 -07:00
Родитель 238982db77
Коммит 60b4fc446f
7 изменённых файлов: 82 добавлений и 111 удалений

Просмотреть файл

@ -4,6 +4,7 @@
### Added
- Support for deploying to an ARM Virtual Network in Batch Service mode
- Support for deploying a compute node from an ARM Image resource
- YAML configuration support. JSON formatted configuration files will continue
to be supported, however, note the breaking change with the corresponding
environment variable names for specifying individual config files from the
@ -16,6 +17,10 @@ commandline.
`SHIPYARD_FS_JSON` have been renamed to `SHIPYARD_CREDENTIALS_CONF`,
`SHIPYARD_CONFIG_CONF`, `SHIPYARD_POOL_CONF`, `SHIPYARD_JOBS_CONF`, and
`SHIPYARD_FS_CONF` respectively.
- **Breaking Change:** `image_uris` in the `vm_configuration`:`custom_image`
property of the pool configuration has been replaced with `arm_image_id`
which is a reference to an ARM Image resource. Please see the custom image
guide for more information.
- `aad` can be specified at a "global" level in the credentials configuration
file, which is then applied to `batch`, `keyvault` and/or `management`
section. Please see the credentials configuration guide for more information.

Просмотреть файл

@ -8,9 +8,8 @@ pool_specification:
version: latest
native: false
custom_image:
image_uris:
- https://mystorageaccount.blob.core.windows.net/myvhds/mycustomimg.vhd
node_agent: batch.node.ubuntu 16.04
arm_image_id: /subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/images/<image_name>
node_agent: <node agent sku id>
native: false
vm_size: STANDARD_D2_V2
vm_count:

Просмотреть файл

@ -31,7 +31,6 @@ from builtins import ( # noqa
next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports
import logging
import math
import os
try:
import pathlib2 as pathlib
@ -188,49 +187,29 @@ def check_for_invalid_config(config):
"""Check for invalid configuration settings
:param dict config: configuration dict
"""
# check for invalid properties, remove checks on next major release
try:
config['pool_specification']['ssh_docker_tunnel']
except KeyError:
pass
else:
raise ValueError(
'Invalid ssh_docker_tunnel property found in pool_specification. '
'Please update your pool configuration file. See the '
'configuration doc for more information.')
try:
config['docker_registry']['login']
except KeyError:
pass
else:
raise ValueError(
'Invalid docker_registry:login property found in global '
'configuration. Please update your global configuration and '
'credentials file. See the configuration doc for more '
'information.')
try:
config['docker_registry']['storage_account_settings']
except KeyError:
pass
else:
raise ValueError(
'Invalid docker_registry:storage_account_settings property '
'found in global configuration. Please update your global '
'configuration file. See the configuration doc for more '
'information.')
# check for deprecated properties
# check for invalid properties, remove checks in a future release
try:
config['docker_registry']['azure_storage']
except KeyError:
pass
else:
logger.warning(
'DEPRECATION WARNING: docker_registry:azure_storage is '
'specified. Docker private registries backed by Azure Storage '
'blobs will not be supported in future releases. Please '
'migrate your Docker images to Azure Container Registry, '
'Docker Hub (public or private), or any other Internet '
'accessible Docker registry solution.')
raise ValueError(
'docker_registry:azure_storage is specified. Docker private '
'registries backed by Azure Storage blobs is no longer '
'supported. Please migrate your Docker images to Azure '
'Container Registry, Docker Hub (public or private), or any '
'other Internet accessible Docker registry solution.')
try:
config['pool_specification']['vm_configuration']['custom_image'][
'image_uris']
except KeyError:
pass
else:
raise ValueError(
'Invalid image_uris specified for custom_image. Please update '
'your pool configuration file. Please see the pool configuration '
'doc for more information.')
# check for deprecated properties, migrate to invalid in a future release
try:
if isinstance(config['pool_specification']['vm_count'], int):
logger.warning(
@ -998,12 +977,14 @@ def _construct_pool_object(
if util.is_not_empty(custom_image_na):
_rflist.append(_NODEPREP_CUSTOMIMAGE_FILE)
vmconfig = batchmodels.VirtualMachineConfiguration(
os_disk=batchmodels.OSDisk(
image_uris=pool_settings.vm_configuration.image_uris,
caching=batchmodels.CachingType.read_write,
image_reference=batchmodels.ImageReference(
virtual_machine_image_id=pool_settings.
vm_configuration.arm_image_id,
),
node_agent_sku_id=pool_settings.vm_configuration.node_agent,
)
logger.debug('deploying custom image: {}'.format(
vmconfig.image_reference.virtual_machine_image_id))
if settings.is_native_docker_pool(
config, vm_config=pool_settings.vm_configuration):
registries = []
@ -1864,19 +1845,6 @@ def _adjust_settings_for_pool_creation(config):
publisher, offer, sku, pool.vm_size))
# compute total vm count
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
# ensure enough vhds for custom image pools
if util.is_not_empty(node_agent):
vhds = len(pool.vm_configuration.image_uris)
if node_agent == 'batch.node.windows amd64':
vhds_req = int(math.ceil(pool_total_vm_count / 20))
else:
vhds_req = int(math.ceil(pool_total_vm_count / 40))
if vhds_req > vhds:
raise ValueError(
('insufficient number of VHDs ({}) supplied for the number '
'of compute nodes to allocate ({}). At least {} VHDs are '
'required.').format(
vhds, pool.vm_count, vhds_req))
# adjust for shipyard container requirement
if shipyard_container_required or util.is_not_empty(node_agent):
settings.set_use_shipyard_docker_image(config, True)

Просмотреть файл

@ -94,7 +94,7 @@ PoolVmPlatformImageSettings = collections.namedtuple(
)
PoolVmCustomImageSettings = collections.namedtuple(
'PoolVmCustomImageSettings', [
'image_uris',
'arm_image_id',
'node_agent',
'native',
]
@ -674,7 +674,7 @@ def _populate_pool_vm_configuration(config):
else:
conf = pool_vm_configuration(config, 'custom_image')
return PoolVmCustomImageSettings(
image_uris=conf['image_uris'],
arm_image_id=_kv_read_checked(conf, 'arm_image_id'),
node_agent=conf['node_agent'].lower(),
native=_kv_read(conf, 'native', default=False),
)

Просмотреть файл

@ -16,9 +16,8 @@ pool_specification:
version: latest
native: false
custom_image:
image_uris:
- https://mystorageaccount.blob.core.windows.net/myvhds/mycustomimg.vhd
node_agent: batch.node.ubuntu 16.04
arm_image_id: /subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/images/<image_name>
node_agent: <node agent sku id>
native: false
vm_size: STANDARD_D2_V2
vm_count:
@ -115,9 +114,11 @@ specify both. If using a custom image, please see the
conversion equivalent for the specified `publisher`, `offer`, `sku`
then no conversion is performed. The default is `false`.
* (required for custom image) `custom_image` defines the custom image to
use:
* (required for custom image) `image_uris` defines a list of page blob
VHDs to use for the pool. These should be bare URLs without SAS keys.
use. AAD `batch` credentials are required to use custom iamges for both
Batch service and User Subscription modes.
* (required for custom image) `arm_image_id` defines the ARM image id
to use as the OS image for the pool. The ARM image must be in the
same subscription and region as the Batch account.
* (required for custom image) `node_agent` is the node agent sku id to
use with this custom image. You can view supported base images and
their node agent sku ids with the `pool listskus` command.
@ -223,7 +224,9 @@ network timeout, resolution failure or download problem). This defaults to
`false`.
* (optional) `block_until_all_global_resources_loaded` will block the node
from entering ready state until all Docker images are loaded. This defaults
to `true`.
to `true`. This option has no effect on `native` container support pools (the
behavior will effectively reflect `true` for this property on `native`
container support pools).
* (optional) `transfer_files_on_pool_creation` will ingress all `files`
specified in the `global_resources` section of the global configuration file
when the pool is created. If files are to be ingressed to Azure Blob or File

Просмотреть файл

@ -1,30 +1,33 @@
# Custom Images with Batch Shipyard
The focus of this article is to explain how to provision a custom image (VHD)
and then deploy it with Batch Shipyard as the VM image to use for your
compute node hosts.
The focus of this article is to explain how to provision an ARM Image
resource and then deploy it with Batch Shipyard as the VM image to use for
your compute node hosts.
## Background: Azure Batch, Azure Storage and Custom Images
Azure Batch allows provisioning compute nodes with custom images (VHDs) with
User Subscription Batch accounts. This allows users to customize the
compute node with software, settings, etc. that fit their use case. With
containerization, this requirement is weakened but some users may still
want to customize the host compute node environment with particular
versions of software such as the Docker Host engine or even embed the GPU
driver for potential faster provisioning times.
## Background: Azure Resources and Azure Batch Custom Images
Azure Batch allows provisioning compute nodes with custom images with both
Batch Service and User Subscription Batch accounts. This allows users to
customize the compute node with software, settings, etc. that fit their use
case. With containerization, this requirement is weakened but some users may
still want to customize the host compute node environment with particular
versions of software such as the Docker Host engine or pre-install and embed
certain software.
Azure Storage is used to host these custom image VHDs. Currently, there are
two sources for creating virtual machines in Azure which are, page blob
VHDs and managed disks. Currently, Azure Batch does not support managed
disks, so you will need to create page blobs with your VHD image.
Azure Batch only supports creating compute nodes through ARM Image resources.
You can create these images using existing page blob VHDs or exporting
managed disks. You must create the Image in the same subscription and
region as your Batch account.
Due to Storage account throttling limits, you must limit the number of
compute nodes served from a single storage account (and thus VHD). For
maximum performance, you should limit one VHD for every 40 VMs for Linux
(or 20 VMs for Windows) and these VHDs should be on separate storage accounts
within the same subscription in the same region as your Batch account.
You can use [blobxfer](https://github.com/Azure/blobxfer) or
[AzCopy](https://azure.microsoft.com/en-us/documentation/articles/storage-use-azcopy/)
to copy and/or replicate your VHD images.
to copy your page blob VHDs if they are in a different region than your
Batch account.
### Azure Active Directory Authentication Required
Azure Active Directory authentication is required for the `batch` account
regardless of the account mode. This means that the
[credentials configuration file](11-batch-shipyard-configuration-credentials.md)
must include an `aad` section with the appropriate options, including the
authentication method of your choosing.
## Provisioning a Custom Image
You will need to ensure that your custom image is sufficiently prepared
@ -132,19 +135,12 @@ scripts to create a custom image from an existing Marketplace platform image.
When allocating a compute pool with a custom image, you must ensure the
following:
0. You will be deploying the pool with a *User Subscription* Batch account
1. Custom image VHD is in your storage account as a page blob object
2. The storage account is in the same subscription and region as your
User Subscription Batch account
3. You have sufficiently replicated the custom image VHD across enough
storage accounts if your compute pool exceeds the single VHD limit. These
storage accounts are in the same subscription and region as your
User Subscription Batch account
4. Your pool specification has the proper `vm_configuration` settings
for `custom_image`
* You have `image_uris` for all of these custom image VHDs. These URIs
should not include SAS information of any kind. They should be "bare"
URLs.
1. The ARM Image is in the same subscription and region as your Batch account.
2. You are specifying the proper `aad` settings in your credentials
configuration file for `batch` (or "globally" in the credentials file).
3. Your pool specification has the proper `vm_configuration` settings
for `custom_image`.
* The `arm_image_id` points to a valid ARM Image resource
* `node_agent` is populated with the correct node agent sku id which
corresponds to the distribution used in the custom image. For instance,
if your custom image is based on Ubuntu 16.04, you would use

Просмотреть файл

@ -9,12 +9,13 @@ document.
The following Azure Batch actions should only be performed through Batch
Shipyard when deploying your workload through this toolkit as Batch
Shipyard needs to take special actions or ensure the intended outcome:
* Task termination (if task is running): use `jobs termtasks`
* Task deletion (if task is running): use `jobs deltasks`
* Job termination (if any tasks are running in the job): use the
`--termtasks` option with `jobs term`
* Job deletion (if any tasks are running in the job): use the
`--termtasks` option with `jobs del`
* Non-`native` container pools:
* Task termination (if task is running): use `jobs termtasks`
* Task deletion (if task is running): use `jobs deltasks`
* Job termination (if any tasks are running in the job): use the
`--termtasks` option with `jobs term`
* Job deletion (if any tasks are running in the job): use the
`--termtasks` option with `jobs del`
* Pool resize: use `pool resize`
* Pool deletion: use `pool del`
@ -27,8 +28,9 @@ to create compute resources to execute your Batch Shipyard jobs against.
* Tasks can have a maximum run time of 7 days (including time spent for
data movement). This is a current fundamental limitation in the Azure Batch
service.
* It is recommended to provision an SSH user to aid in client-side assisted
task termination and other tasks that may require direct SSH access.
* For non-`native` container pools, it is recommended to provision an SSH
user to aid in client-side assisted task termination and other tasks that
may require direct SSH access.
* SSH tunnel script generation is only compatible with non-Windows machines.
* Data movement support on Windows is restricted to scp. Both `ssh.exe` and
`scp.exe` must be found through `%PATH%` or in the current working directory.
@ -45,8 +47,6 @@ no longer performed before a pool is created and will instead result in
a ResizeError on the pool if not all compute nodes can be allocated.
* Data movement between Batch tasks as defined by `input_data`:`azure_batch`
is restricted to Batch accounts with keys (non-AAD).
* Virtual network support in Batch pools can only be used with
UserSubscription Batch accounts.
* Windows Server 2016, Clear Linux, and Oracle Linux are not supported with
Batch Shipyard at this time.
* Task dependencies are incompatible with multi-instance tasks. This is a