- Update recipes for vm_configuration
- Fix some issues with platform pools with new changes
This commit is contained in:
Fred Park 2017-06-06 09:31:32 -07:00
Родитель 8397b411c5
Коммит a41713c5ee
42 изменённых файлов: 386 добавлений и 187 удалений

Просмотреть файл

@ -63,6 +63,7 @@ tunneling to Docker Hosts on compute nodes
[Azure KeyVault](https://azure.microsoft.com/en-us/services/key-vault/)
* Support for execution on an
[Azure Function App environment](https://github.com/Azure/batch-shipyard/blob/master/docs/60-batch-shipyard-site-extension.md)
* Support for [custom host images](https://github.com/Azure/batch-shipyard/blob/master/docs/63-batch-shipyard-custom-images.md)
## Installation
Installation is typically an easy two-step process. The CLI is also available

Просмотреть файл

@ -880,9 +880,11 @@ def _add_pool(
for image_ref in sorted(
nas.verified_image_references, key=lambda item: item.sku)
if image_ref.publisher.lower() ==
pool_settings.publisher.lower() and
image_ref.offer.lower() == pool_settings.offer.lower() and
image_ref.sku.lower() == pool_settings.sku.lower()
pool_settings.vm_configuration.publisher.lower() and
image_ref.offer.lower() ==
pool_settings.vm_configuration.offer.lower() and
image_ref.sku.lower() ==
pool_settings.vm_configuration.sku.lower()
]
try:
sku_to_use, image_ref_to_use = skus_to_use[-1]
@ -912,11 +914,11 @@ def _add_pool(
sc_args) else '',
n=' -n' if settings.can_tune_tcp(
pool_settings.vm_size) else '',
o=' -o {}'.format(pool_settings.offer),
o=' -o {}'.format(pool_settings.vm_configuration.offer),
p=' -p {}'.format(bs.storage_entity_prefix)
if bs.storage_entity_prefix else '',
r=' -r {}'.format(preg.container) if preg.container else '',
s=' -s {}'.format(pool_settings.sku),
s=' -s {}'.format(pool_settings.vm_configuration.sku),
t=' -t {}'.format(torrentflags),
v=' -v {}'.format(__version__),
w=' -w' if pool_settings.ssh.hpn_server_swap else '',
@ -1548,13 +1550,15 @@ def _adjust_settings_for_pool_creation(config):
raise ValueError(
('Unsupported Docker Host VM Config, publisher={} offer={} '
'sku={} vm_size={}').format(publisher, offer, sku, pool.vm_size))
# compute total vm count
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
# ensure enough vhds for custom image pools
if util.is_not_empty(node_agent):
vhds = len(pool.vm_configuration.image_uris)
if node_agent == 'batch.node.windows amd64':
vhds_req = int(math.ceil(pool.vm_count / 20))
vhds_req = int(math.ceil(pool_total_vm_count / 20))
else:
vhds_req = int(math.ceil(pool.vm_count / 40))
vhds_req = int(math.ceil(pool_total_vm_count / 40))
if vhds_req > vhds:
raise ValueError(
('insufficient number of VHDs ({}) supplied for the number '
@ -1569,7 +1573,6 @@ def _adjust_settings_for_pool_creation(config):
'VM config, publisher={} offer={} sku={}').format(
publisher, offer, sku))
# adjust inter node comm setting
pool_total_vm_count = pool.vm_count.dedicated + pool.vm_count.low_priority
if pool_total_vm_count < 1:
raise ValueError('invalid total vm_count: {}'.format(
pool_total_vm_count))

Просмотреть файл

@ -2164,8 +2164,9 @@ def task_settings(cloud_pool, config, poolconf, jobspec, conf, missing_images):
pool_id = cloud_pool.id
vm_size = cloud_pool.vm_size.lower()
inter_node_comm = cloud_pool.enable_inter_node_communication
is_custom_image = util.is_none_or_empty(
cloud_pool.virtual_machine_configuration.os_disk)
is_custom_image = (
cloud_pool.virtual_machine_configuration.os_disk is not None
)
if is_custom_image:
publisher = None
offer = None

Просмотреть файл

@ -0,0 +1,115 @@
# Custom Images with Batch Shipyard
The focus of this article is to explain how to provision a custom image (VHD)
and then deploy it with Batch Shipyard as the VM image to use for your
compute node hosts.
## Background: Azure Batch, Azure Storage and Custom Images
Azure Batch allows provisioning compute nodes with custom images (VHDs) with
User Subscription Batch accounts. This allows users to customize the
compute node with software, settings, etc. that fit their use case. With
containerization, this requirement is weakened but some users may still
want to customize the host compute node environment with particular
versions of software such as the Docker Host engine or even embed the GPU
driver for potential faster provisioning times.
Azure Storage is used to host these custom image VHDs. Currently, there are
two sources for creating virtual machines in Azure which are, page blob
VHDs and managed disks. Currently, Azure Batch does not support managed
disks, so you will need to create page blobs with your VHD image.
Due to Storage account throttling limits, you must limit the number of
compute nodes served from a single storage account (and thus VHD). For
the maximum performance, you should limit one VHD for every 40 VMs for Linux
(or 20 VMs for Windows) and these VHDs should be on separate storage accounts
within the same subscription in the same region as your Batch account.
You can use [blobxfer](https://github.com/Azure/blobxfer) or
[AzCopy](https://azure.microsoft.com/en-us/documentation/articles/storage-use-azcopy/)
to copy your VHD images.
## Provisioning a Custom Image
You will need to ensure that your custom image is sufficiently prepared
before using it as a source VHD for Batch Shipyard. The following
sub-section will detail the reasons and requisites.
### Batch Shipyard Node Preparation and Custom Images
For non-custom images (i.e., platform images or Marketplace images), Batch
Shipyard takes care of preparing the compute node with the necessary
software in order for tasks to run with Batch Shipyard.
Because custom images can muddy the assumptions with what is available or
not in the operating system, Batch Shipyard requires that the user prepare
the custom image with the necessary software and only attempts to modify
items that are needed for functionality. Software that is required is
checked during compute node preparation.
### Base Required Software
#### Docker Host Engine
The [Docker](https://docker.com) host engine must be installed and must
be invocable as root with default path and permissions. The service must
be running upon boot. The Docker socket (`/var/run/docker.sock`) must
be available (it is available by default).
#### SSH Server
An SSH server should be installed and operational on port 22. You can
limit inbound connections through the Batch service deployed NSG on the
virtual network or network interface (and/or through the software firewall
on the host).
#### GPU-enabled Compute Nodes
In order to utilize the GPUs available on compute nodes that have them
(e.g., N-series VMs), the NVIDIA driver must be installed and loaded upon
boot.
Additionally, [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
must be installed and the service must be running upon boot.
#### Infiniband/RDMA-enabled Compute Nodes
The host VM Infiniband/RDMA stack must be enabled with the proper drivers
and the required user-land software for Infiniband installed. It is best to
base a custom image off of the existing Azure platform images that support
Infiniband/RDMA.
#### Storage Cluster Auto-Linking and Mounting
If mounting a storage cluster, the required NFSv4 or GlusterFS client tooling
must be installed and invocable such that the auto-link mount functionality
is operable. Both clients need not be installed unless you are mounting
both types of storage clusters.
#### GlusterFS On Compute
If a GlusterFS on compute shared data volume is required, then GlusterFS
server and client tooling must be installed and invocable so the shared
data volume can be created amongst the compute nodes.
### Installed/Configured Software
#### Encryption Certificates and Credential Decryption
If employing credential encryption, Batch Shipyard will exercise the necessary
logic to decrypt any encrypted field if credential encryption is enabled.
Properties in the global configuration should be enabled as per requirements
as if deploying a non-Custom Image-based compute node.
#### Batch Shipyard Docker Images
Batch Shipyard Docker images required for functionality on the compute node
will be automatically installed.
#### Azure File Docker Volume Driver
Batch Shipyard will install and configure the Azure File Docker Volume
Driver for any Azure File shared data volumes that are specified.
### Packer Samples
The [contrib](../contrib) area of the repository contain example `packer`
scripts to create a custom image from an existing Marketplace platform image.
## Allocating a Pool with a Custom Image
When allocating a compute pool with a custom image, you must ensure the
following:
0. You have a User Subscription Batch account
1. Custom image VHD is in your storage account as a page blob
2. The storage account is in the same subscription and region as your
*User Subscription* Batch account
3. You have sufficiently replicated the custom image VHD across enough
storage accounts to support your compute pool
4. You have URIs for all of these custom image VHDs. These URIs should not
include SAS information of any kind. They should be "bare" URLs.
5. Your pool configuration file has the proper `vm_configuration` settings
for `custom_image`

Просмотреть файл

@ -1,8 +1,12 @@
# Low Priority Compute Node Considerations
Please read the following carefully concerning pools allocated with low-
priority compute nodes.
priority compute nodes. You may also want to read the
[Azure Batch Low Priority Compute Node](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms)
documentation.
### Pool Allocation and Resizing
* Low priority compute nodes can only be allocated with non-User Subscription
Batch accounts.
* Pool and compute node allocation may take up to the full resize timeout
and not reach full allocation with low priority if a low priority node is
pre-empted and the target number of low priority nodes cannot be reached.

Просмотреть файл

@ -49,6 +49,8 @@ factors that Batch Shipyard has no control over.
regarding your request. Pull requests are always welcome!
* How do I contribute a recipe?
* Please see this [guide](98-contributing-recipes.md).
* Does Batch Shipyard support Linux custom images?
* Yes, please see [the guide](63-batch-shipyard-custom-images.md).
* Does Batch Shipyard support Windows Server Containers?
* Not at this time, we are tracking the issue
[here](https://github.com/Azure/batch-shipyard/issues/7).

Просмотреть файл

@ -13,6 +13,7 @@ and effectively running your batch-style Docker workloads on Azure Batch.
* [FS Configuration](15-batch-shipyard-configuration-fs.md)
5. [Usage](20-batch-shipyard-usage.md)
6. [Azure Functions and Batch Shipyard](60-batch-shipyard-site-extension.md)
7. [Custom Image for Host Compute Nodes](63-batch-shipyard-custom-images.md)
7. [Remote Filesystems](65-batch-shipyard-remote-fs.md)
8. [Data Movement](70-batch-shipyard-data-movement.md)
9. [Azure KeyVault for Credential Management](74-batch-shipyard-azure-keyvault.md)

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-cntk-rdma",
"vm_size": "STANDARD_A9",
"vm_configuration": {
"platform_image": {
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1"
}
},
"vm_size": "STANDARD_H16R",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "cntk-cpu-multinode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 3
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "cntk-cpu-singlenode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "cntk-multinode-multigpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC24",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "cntk-singlenode-multigpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC24",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "cntk-singlenode-singlegpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "caffe-cpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "caffe-gpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "chainer-cpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "chainer-gpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "dockerffmpeg",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NV6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-hpcg",
"vm_size": "STANDARD_A9",
"vm_configuration": {
"platform_image": {
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1"
}
},
"vm_size": "STANDARD_H16R",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-linpack",
"vm_size": "STANDARD_A9",
"vm_configuration": {
"platform_image": {
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1"
}
},
"vm_size": "STANDARD_H16R",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,37 +0,0 @@
# Dockerfile for Keras+Theano-CPU for use with Batch Shipyard on Azure Batch
FROM ubuntu:14.04
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
gfortran \
git \
wget \
curl \
ca-certificates \
libhdf5-dev \
liblapack-dev \
libopenblas-dev \
python-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# upgrade pip and install dependencies
RUN curl --silent --show-error --retry 5 https://bootstrap.pypa.io/get-pip.py | python && \
pip install --upgrade --no-cache-dir setuptools wheel six && \
pip install --upgrade --no-cache-dir pyyaml nose h5py && \
pip install --upgrade --no-cache-dir numpy && \
pip install --upgrade --no-cache-dir scipy
# install theano and keras
RUN pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git && \
git clone https://github.com/fchollet/keras.git && \
cd keras && \
python setup.py install
# set keras backend to theano
ENV KERAS_BACKEND=theano
# copy in default theanorc file
COPY theanorc /root/.theanorc

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "keras-cpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,6 +1,7 @@
# Dockerfile for Keras+Theano-CPU for use with Batch Shipyard on Azure Batch
FROM ubuntu:14.04
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \

Просмотреть файл

@ -1,37 +0,0 @@
# Dockerfile for Keras+Theano-CPU for use with Batch Shipyard on Azure Batch
FROM ubuntu:14.04
MAINTAINER Fred Park <https://github.com/Azure/batch-shipyard>
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
gfortran \
git \
wget \
curl \
ca-certificates \
libhdf5-dev \
liblapack-dev \
libopenblas-dev \
python-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# upgrade pip and install dependencies
RUN curl --silent --show-error --retry 5 https://bootstrap.pypa.io/get-pip.py | python && \
pip install --upgrade --no-cache-dir setuptools wheel six && \
pip install --upgrade --no-cache-dir pyyaml nose h5py && \
pip install --upgrade --no-cache-dir numpy && \
pip install --upgrade --no-cache-dir scipy
# install theano and keras
RUN pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git && \
git clone https://github.com/fchollet/keras.git && \
cd keras && \
python setup.py install
# set keras backend to theano
ENV KERAS_BACKEND=theano
# copy in default theanorc file
COPY theanorc /root/.theanorc

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "keras-gpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "mxnet-cpu-multinode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D4_V2",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,17 @@
{
"pool_specification": {
"id": "mxnet-cpu-singlenode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,18 @@
{
"pool_specification": {
"id": "mxnet-multinode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC24",
"vm_count": {
"dedicated": 2
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"inter_node_communication_enabled": true,
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "mxnet-singlenode",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC24",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "namd-multigpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC12",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-namd-rdma",
"vm_size": "STANDARD_A9",
"vm_configuration": {
"platform_image": {
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1"
}
},
"vm_size": "STANDARD_H16R",
"vm_count": {
"dedicated": 4
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "namd-tcp",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D3_V2",
"vm_count": {
"dedicated": 4
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS",
"sku": "7.3",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-openfoam-rdma",
"vm_size": "STANDARD_A9",
"vm_configuration": {
"platform_image": {
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1"
}
},
"vm_size": "STANDARD_H16R",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS-HPC",
"sku": "7.1",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "docker-openfoam-tcp",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D2_V2",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "OpenLogic",
"offer": "CentOS",
"sku": "7.3",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "remotefs-batchpool",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D2_V2",
"vm_count": {
"dedicated": 4
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "tensorflow-cpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D1_V2",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "tensorflow-distributed",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D4_V2",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,14 +1,18 @@
{
"pool_specification": {
"id": "tensorflow-distributed",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC12",
"vm_count": {
"dedicated": 2
},
"inter_node_communication_enabled": true,
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "tensorflow-gpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "torch",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_D3_V2",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -1,13 +1,17 @@
{
"pool_specification": {
"id": "torch-gpu",
"vm_configuration": {
"platform_image": {
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS"
}
},
"vm_size": "STANDARD_NC6",
"vm_count": {
"dedicated": 1
},
"publisher": "Canonical",
"offer": "UbuntuServer",
"sku": "16.04-LTS",
"ssh": {
"username": "docker"
},

Просмотреть файл

@ -158,6 +158,20 @@ check_for_nvidia() {
fi
}
check_docker_root_dir() {
set +e
rootdir=$(docker info | grep "Docker Root Dir" | cut -d' ' -f 4)
set -e
echo "$rootdir"
if [ -z "$rootdir" ]; then
echo "ERROR: could not determine docker graph root"
elif [[ "$rootdir" == /mnt* && "$1" == "ubuntu" ]] || [[ "$rootdir" == /mnt/resource* && "$1" != "ubuntu" ]]; then
echo "INFO: docker root is within ephemeral temp disk"
else
echo "WARNING: docker graph root is on the OS disk. Performance may be impacted."
fi
}
check_for_docker_host_engine() {
set +e
docker --version
@ -171,11 +185,14 @@ check_for_docker_host_engine() {
check_for_glusterfs_on_compute() {
set +e
gluster
if [ $? -ne 0 ]; then
echo "ERROR: gluster server not installed"
rc0=$?
glusterfs -V
rc1=$?
set -e
if [ $rc0 -ne 0 ] || [ $rc1 -ne 0 ]; then
echo "ERROR: gluster server and client not installed"
exit 1
fi
set -e
}
check_for_storage_cluster_software() {
@ -309,11 +326,15 @@ fi
# one-time setup
if [ ! -f $nodeprepfinished ] && [ $networkopt -eq 1 ]; then
# do not fail script if this function fails
set +e
optimize_tcp_network_settings $DISTRIB_ID $DISTRIB_RELEASE
set -e
fi
# check for docker host engine
check_for_docker_host_engine
check_docker_root_dir $DISTRIB_ID
# TODO warn if graph is on os disk