Support ARM Images for custom images (#126)
This commit is contained in:
Родитель
238982db77
Коммит
60b4fc446f
|
@ -4,6 +4,7 @@
|
|||
|
||||
### Added
|
||||
- Support for deploying to an ARM Virtual Network in Batch Service mode
|
||||
- Support for deploying a compute node from an ARM Image resource
|
||||
- YAML configuration support. JSON formatted configuration files will continue
|
||||
to be supported, however, note the breaking change with the corresponding
|
||||
environment variable names for specifying individual config files from the
|
||||
|
@ -16,6 +17,10 @@ commandline.
|
|||
`SHIPYARD_FS_JSON` have been renamed to `SHIPYARD_CREDENTIALS_CONF`,
|
||||
`SHIPYARD_CONFIG_CONF`, `SHIPYARD_POOL_CONF`, `SHIPYARD_JOBS_CONF`, and
|
||||
`SHIPYARD_FS_CONF` respectively.
|
||||
- **Breaking Change:** `image_uris` in the `vm_configuration`:`custom_image`
|
||||
property of the pool configuration has been replaced with `arm_image_id`
|
||||
which is a reference to an ARM Image resource. Please see the custom image
|
||||
guide for more information.
|
||||
- `aad` can be specified at a "global" level in the credentials configuration
|
||||
file, which is then applied to `batch`, `keyvault` and/or `management`
|
||||
section. Please see the credentials configuration guide for more information.
|
||||
|
|
|
@ -8,9 +8,8 @@ pool_specification:
|
|||
version: latest
|
||||
native: false
|
||||
custom_image:
|
||||
image_uris:
|
||||
- https://mystorageaccount.blob.core.windows.net/myvhds/mycustomimg.vhd
|
||||
node_agent: batch.node.ubuntu 16.04
|
||||
arm_image_id: /subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/images/<image_name>
|
||||
node_agent: <node agent sku id>
|
||||
native: false
|
||||
vm_size: STANDARD_D2_V2
|
||||
vm_count:
|
||||
|
|
|
@ -31,7 +31,6 @@ from builtins import ( # noqa
|
|||
next, oct, open, pow, round, super, filter, map, zip)
|
||||
# stdlib imports
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
try:
|
||||
import pathlib2 as pathlib
|
||||
|
@ -188,49 +187,29 @@ def check_for_invalid_config(config):
|
|||
"""Check for invalid configuration settings
|
||||
:param dict config: configuration dict
|
||||
"""
|
||||
# check for invalid properties, remove checks on next major release
|
||||
try:
|
||||
config['pool_specification']['ssh_docker_tunnel']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid ssh_docker_tunnel property found in pool_specification. '
|
||||
'Please update your pool configuration file. See the '
|
||||
'configuration doc for more information.')
|
||||
try:
|
||||
config['docker_registry']['login']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid docker_registry:login property found in global '
|
||||
'configuration. Please update your global configuration and '
|
||||
'credentials file. See the configuration doc for more '
|
||||
'information.')
|
||||
try:
|
||||
config['docker_registry']['storage_account_settings']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid docker_registry:storage_account_settings property '
|
||||
'found in global configuration. Please update your global '
|
||||
'configuration file. See the configuration doc for more '
|
||||
'information.')
|
||||
# check for deprecated properties
|
||||
# check for invalid properties, remove checks in a future release
|
||||
try:
|
||||
config['docker_registry']['azure_storage']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
logger.warning(
|
||||
'DEPRECATION WARNING: docker_registry:azure_storage is '
|
||||
'specified. Docker private registries backed by Azure Storage '
|
||||
'blobs will not be supported in future releases. Please '
|
||||
'migrate your Docker images to Azure Container Registry, '
|
||||
'Docker Hub (public or private), or any other Internet '
|
||||
'accessible Docker registry solution.')
|
||||
raise ValueError(
|
||||
'docker_registry:azure_storage is specified. Docker private '
|
||||
'registries backed by Azure Storage blobs is no longer '
|
||||
'supported. Please migrate your Docker images to Azure '
|
||||
'Container Registry, Docker Hub (public or private), or any '
|
||||
'other Internet accessible Docker registry solution.')
|
||||
try:
|
||||
config['pool_specification']['vm_configuration']['custom_image'][
|
||||
'image_uris']
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid image_uris specified for custom_image. Please update '
|
||||
'your pool configuration file. Please see the pool configuration '
|
||||
'doc for more information.')
|
||||
# check for deprecated properties, migrate to invalid in a future release
|
||||
try:
|
||||
if isinstance(config['pool_specification']['vm_count'], int):
|
||||
logger.warning(
|
||||
|
@ -998,12 +977,14 @@ def _construct_pool_object(
|
|||
if util.is_not_empty(custom_image_na):
|
||||
_rflist.append(_NODEPREP_CUSTOMIMAGE_FILE)
|
||||
vmconfig = batchmodels.VirtualMachineConfiguration(
|
||||
os_disk=batchmodels.OSDisk(
|
||||
image_uris=pool_settings.vm_configuration.image_uris,
|
||||
caching=batchmodels.CachingType.read_write,
|
||||
image_reference=batchmodels.ImageReference(
|
||||
virtual_machine_image_id=pool_settings.
|
||||
vm_configuration.arm_image_id,
|
||||
),
|
||||
node_agent_sku_id=pool_settings.vm_configuration.node_agent,
|
||||
)
|
||||
logger.debug('deploying custom image: {}'.format(
|
||||
vmconfig.image_reference.virtual_machine_image_id))
|
||||
if settings.is_native_docker_pool(
|
||||
config, vm_config=pool_settings.vm_configuration):
|
||||
registries = []
|
||||
|
@ -1864,19 +1845,6 @@ def _adjust_settings_for_pool_creation(config):
|
|||
publisher, offer, sku, pool.vm_size))
|
||||
# compute total vm count
|
||||
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
|
||||
# ensure enough vhds for custom image pools
|
||||
if util.is_not_empty(node_agent):
|
||||
vhds = len(pool.vm_configuration.image_uris)
|
||||
if node_agent == 'batch.node.windows amd64':
|
||||
vhds_req = int(math.ceil(pool_total_vm_count / 20))
|
||||
else:
|
||||
vhds_req = int(math.ceil(pool_total_vm_count / 40))
|
||||
if vhds_req > vhds:
|
||||
raise ValueError(
|
||||
('insufficient number of VHDs ({}) supplied for the number '
|
||||
'of compute nodes to allocate ({}). At least {} VHDs are '
|
||||
'required.').format(
|
||||
vhds, pool.vm_count, vhds_req))
|
||||
# adjust for shipyard container requirement
|
||||
if shipyard_container_required or util.is_not_empty(node_agent):
|
||||
settings.set_use_shipyard_docker_image(config, True)
|
||||
|
|
|
@ -94,7 +94,7 @@ PoolVmPlatformImageSettings = collections.namedtuple(
|
|||
)
|
||||
PoolVmCustomImageSettings = collections.namedtuple(
|
||||
'PoolVmCustomImageSettings', [
|
||||
'image_uris',
|
||||
'arm_image_id',
|
||||
'node_agent',
|
||||
'native',
|
||||
]
|
||||
|
@ -674,7 +674,7 @@ def _populate_pool_vm_configuration(config):
|
|||
else:
|
||||
conf = pool_vm_configuration(config, 'custom_image')
|
||||
return PoolVmCustomImageSettings(
|
||||
image_uris=conf['image_uris'],
|
||||
arm_image_id=_kv_read_checked(conf, 'arm_image_id'),
|
||||
node_agent=conf['node_agent'].lower(),
|
||||
native=_kv_read(conf, 'native', default=False),
|
||||
)
|
||||
|
|
|
@ -16,9 +16,8 @@ pool_specification:
|
|||
version: latest
|
||||
native: false
|
||||
custom_image:
|
||||
image_uris:
|
||||
- https://mystorageaccount.blob.core.windows.net/myvhds/mycustomimg.vhd
|
||||
node_agent: batch.node.ubuntu 16.04
|
||||
arm_image_id: /subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.Compute/images/<image_name>
|
||||
node_agent: <node agent sku id>
|
||||
native: false
|
||||
vm_size: STANDARD_D2_V2
|
||||
vm_count:
|
||||
|
@ -115,9 +114,11 @@ specify both. If using a custom image, please see the
|
|||
conversion equivalent for the specified `publisher`, `offer`, `sku`
|
||||
then no conversion is performed. The default is `false`.
|
||||
* (required for custom image) `custom_image` defines the custom image to
|
||||
use:
|
||||
* (required for custom image) `image_uris` defines a list of page blob
|
||||
VHDs to use for the pool. These should be bare URLs without SAS keys.
|
||||
use. AAD `batch` credentials are required to use custom iamges for both
|
||||
Batch service and User Subscription modes.
|
||||
* (required for custom image) `arm_image_id` defines the ARM image id
|
||||
to use as the OS image for the pool. The ARM image must be in the
|
||||
same subscription and region as the Batch account.
|
||||
* (required for custom image) `node_agent` is the node agent sku id to
|
||||
use with this custom image. You can view supported base images and
|
||||
their node agent sku ids with the `pool listskus` command.
|
||||
|
@ -223,7 +224,9 @@ network timeout, resolution failure or download problem). This defaults to
|
|||
`false`.
|
||||
* (optional) `block_until_all_global_resources_loaded` will block the node
|
||||
from entering ready state until all Docker images are loaded. This defaults
|
||||
to `true`.
|
||||
to `true`. This option has no effect on `native` container support pools (the
|
||||
behavior will effectively reflect `true` for this property on `native`
|
||||
container support pools).
|
||||
* (optional) `transfer_files_on_pool_creation` will ingress all `files`
|
||||
specified in the `global_resources` section of the global configuration file
|
||||
when the pool is created. If files are to be ingressed to Azure Blob or File
|
||||
|
|
|
@ -1,30 +1,33 @@
|
|||
# Custom Images with Batch Shipyard
|
||||
The focus of this article is to explain how to provision a custom image (VHD)
|
||||
and then deploy it with Batch Shipyard as the VM image to use for your
|
||||
compute node hosts.
|
||||
The focus of this article is to explain how to provision an ARM Image
|
||||
resource and then deploy it with Batch Shipyard as the VM image to use for
|
||||
your compute node hosts.
|
||||
|
||||
## Background: Azure Batch, Azure Storage and Custom Images
|
||||
Azure Batch allows provisioning compute nodes with custom images (VHDs) with
|
||||
User Subscription Batch accounts. This allows users to customize the
|
||||
compute node with software, settings, etc. that fit their use case. With
|
||||
containerization, this requirement is weakened but some users may still
|
||||
want to customize the host compute node environment with particular
|
||||
versions of software such as the Docker Host engine or even embed the GPU
|
||||
driver for potential faster provisioning times.
|
||||
## Background: Azure Resources and Azure Batch Custom Images
|
||||
Azure Batch allows provisioning compute nodes with custom images with both
|
||||
Batch Service and User Subscription Batch accounts. This allows users to
|
||||
customize the compute node with software, settings, etc. that fit their use
|
||||
case. With containerization, this requirement is weakened but some users may
|
||||
still want to customize the host compute node environment with particular
|
||||
versions of software such as the Docker Host engine or pre-install and embed
|
||||
certain software.
|
||||
|
||||
Azure Storage is used to host these custom image VHDs. Currently, there are
|
||||
two sources for creating virtual machines in Azure which are, page blob
|
||||
VHDs and managed disks. Currently, Azure Batch does not support managed
|
||||
disks, so you will need to create page blobs with your VHD image.
|
||||
Azure Batch only supports creating compute nodes through ARM Image resources.
|
||||
You can create these images using existing page blob VHDs or exporting
|
||||
managed disks. You must create the Image in the same subscription and
|
||||
region as your Batch account.
|
||||
|
||||
Due to Storage account throttling limits, you must limit the number of
|
||||
compute nodes served from a single storage account (and thus VHD). For
|
||||
maximum performance, you should limit one VHD for every 40 VMs for Linux
|
||||
(or 20 VMs for Windows) and these VHDs should be on separate storage accounts
|
||||
within the same subscription in the same region as your Batch account.
|
||||
You can use [blobxfer](https://github.com/Azure/blobxfer) or
|
||||
[AzCopy](https://azure.microsoft.com/en-us/documentation/articles/storage-use-azcopy/)
|
||||
to copy and/or replicate your VHD images.
|
||||
to copy your page blob VHDs if they are in a different region than your
|
||||
Batch account.
|
||||
|
||||
### Azure Active Directory Authentication Required
|
||||
Azure Active Directory authentication is required for the `batch` account
|
||||
regardless of the account mode. This means that the
|
||||
[credentials configuration file](11-batch-shipyard-configuration-credentials.md)
|
||||
must include an `aad` section with the appropriate options, including the
|
||||
authentication method of your choosing.
|
||||
|
||||
## Provisioning a Custom Image
|
||||
You will need to ensure that your custom image is sufficiently prepared
|
||||
|
@ -132,19 +135,12 @@ scripts to create a custom image from an existing Marketplace platform image.
|
|||
When allocating a compute pool with a custom image, you must ensure the
|
||||
following:
|
||||
|
||||
0. You will be deploying the pool with a *User Subscription* Batch account
|
||||
1. Custom image VHD is in your storage account as a page blob object
|
||||
2. The storage account is in the same subscription and region as your
|
||||
User Subscription Batch account
|
||||
3. You have sufficiently replicated the custom image VHD across enough
|
||||
storage accounts if your compute pool exceeds the single VHD limit. These
|
||||
storage accounts are in the same subscription and region as your
|
||||
User Subscription Batch account
|
||||
4. Your pool specification has the proper `vm_configuration` settings
|
||||
for `custom_image`
|
||||
* You have `image_uris` for all of these custom image VHDs. These URIs
|
||||
should not include SAS information of any kind. They should be "bare"
|
||||
URLs.
|
||||
1. The ARM Image is in the same subscription and region as your Batch account.
|
||||
2. You are specifying the proper `aad` settings in your credentials
|
||||
configuration file for `batch` (or "globally" in the credentials file).
|
||||
3. Your pool specification has the proper `vm_configuration` settings
|
||||
for `custom_image`.
|
||||
* The `arm_image_id` points to a valid ARM Image resource
|
||||
* `node_agent` is populated with the correct node agent sku id which
|
||||
corresponds to the distribution used in the custom image. For instance,
|
||||
if your custom image is based on Ubuntu 16.04, you would use
|
||||
|
|
|
@ -9,12 +9,13 @@ document.
|
|||
The following Azure Batch actions should only be performed through Batch
|
||||
Shipyard when deploying your workload through this toolkit as Batch
|
||||
Shipyard needs to take special actions or ensure the intended outcome:
|
||||
* Task termination (if task is running): use `jobs termtasks`
|
||||
* Task deletion (if task is running): use `jobs deltasks`
|
||||
* Job termination (if any tasks are running in the job): use the
|
||||
`--termtasks` option with `jobs term`
|
||||
* Job deletion (if any tasks are running in the job): use the
|
||||
`--termtasks` option with `jobs del`
|
||||
* Non-`native` container pools:
|
||||
* Task termination (if task is running): use `jobs termtasks`
|
||||
* Task deletion (if task is running): use `jobs deltasks`
|
||||
* Job termination (if any tasks are running in the job): use the
|
||||
`--termtasks` option with `jobs term`
|
||||
* Job deletion (if any tasks are running in the job): use the
|
||||
`--termtasks` option with `jobs del`
|
||||
* Pool resize: use `pool resize`
|
||||
* Pool deletion: use `pool del`
|
||||
|
||||
|
@ -27,8 +28,9 @@ to create compute resources to execute your Batch Shipyard jobs against.
|
|||
* Tasks can have a maximum run time of 7 days (including time spent for
|
||||
data movement). This is a current fundamental limitation in the Azure Batch
|
||||
service.
|
||||
* It is recommended to provision an SSH user to aid in client-side assisted
|
||||
task termination and other tasks that may require direct SSH access.
|
||||
* For non-`native` container pools, it is recommended to provision an SSH
|
||||
user to aid in client-side assisted task termination and other tasks that
|
||||
may require direct SSH access.
|
||||
* SSH tunnel script generation is only compatible with non-Windows machines.
|
||||
* Data movement support on Windows is restricted to scp. Both `ssh.exe` and
|
||||
`scp.exe` must be found through `%PATH%` or in the current working directory.
|
||||
|
@ -45,8 +47,6 @@ no longer performed before a pool is created and will instead result in
|
|||
a ResizeError on the pool if not all compute nodes can be allocated.
|
||||
* Data movement between Batch tasks as defined by `input_data`:`azure_batch`
|
||||
is restricted to Batch accounts with keys (non-AAD).
|
||||
* Virtual network support in Batch pools can only be used with
|
||||
UserSubscription Batch accounts.
|
||||
* Windows Server 2016, Clear Linux, and Oracle Linux are not supported with
|
||||
Batch Shipyard at this time.
|
||||
* Task dependencies are incompatible with multi-instance tasks. This is a
|
||||
|
|
Загрузка…
Ссылка в новой задаче