Add task factory (parameter sweep) support

- Resolves #93
This commit is contained in:
Fred Park 2017-07-27 15:01:41 -07:00
Родитель 23a753a110
Коммит 4105acc2f8
10 изменённых файлов: 446 добавлений и 20 удалений

Просмотреть файл

@ -53,8 +53,10 @@ to containers executed on compute nodes
* Support for [Low Priority Compute Nodes](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms)
* Support for Azure Batch "auto" concepts, including [autoscale](https://github.com/Azure/batch-shipyard/blob/master/docs/30-batch-shipyard-autoscale.md) and autopool
to dynamically scale and control computing resources on-demand
* Support for
[Azure Batch task dependencies](https://azure.microsoft.com/en-us/documentation/articles/batch-task-dependencies/)
* Support for [Task Factories](https://github.com/Azure/batch-shipyard/blob/master/docs/35-batch-shipyard-task-factory.md)
with the ability to generate parametric (parameter) sweeps and replicate
tasks
* Support for [Azure Batch task dependencies](https://azure.microsoft.com/en-us/documentation/articles/batch-task-dependencies/)
allowing complex processing pipelines and DAGs with Docker containers
* Transparent support for
[GPU-accelerated Docker applications](https://github.com/NVIDIA/nvidia-docker)
@ -67,6 +69,7 @@ for MPI on HPC low-latency Azure VM instances:
* [A-Series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/a8-a9-a10-a11-specs): STANDARD\_A8, STANDARD\_A9
* [H-Series](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/a8-a9-a10-a11-specs): STANDARD\_H16R, STANDARD\_H16MR
* [N-Series](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/sizes-gpu): STANDARD\_NC24R (not yet ready with Linux hosts)
* Support for live job migration between pools
* Automatic setup of SSH users to all nodes in the compute pool and optional
tunneling to Docker Hosts on compute nodes
* Support for credential management through

Просмотреть файл

@ -55,6 +55,28 @@
},
"tasks": [
{
"task_factory": {
"parametric_sweep": {
"product": [
{
"start": 0,
"stop": 10,
"step": 1
}
],
"combinations": {
"iterable": ["ABC", "012"],
"length": 2,
"replacement": false
},
"permutations": {
"iterable": "ABCDEF",
"length": 3
},
"zip": ["ab", "01"]
},
"repeat": 3
},
"id": null,
"image": "busybox",
"name": null,

Просмотреть файл

@ -61,7 +61,7 @@ util.setup_logger(logger)
# global defines
_MAX_REBOOT_RETRIES = 5
_SSH_TUNNEL_SCRIPT = 'ssh_docker_tunnel_shipyard.sh'
_GENERIC_DOCKER_TASK_PREFIX = 'dockertask-'
_GENERIC_DOCKER_TASK_PREFIX = 'task-'
_RUN_ELEVATED = batchmodels.UserIdentity(
auto_user=batchmodels.AutoUserSpecification(
scope=batchmodels.AutoUserScope.pool,

Просмотреть файл

@ -31,7 +31,9 @@ from builtins import ( # noqa
next, oct, open, pow, round, super, filter, map, zip)
# stdlib imports
import collections
import copy
import datetime
import itertools
try:
import pathlib2 as pathlib
except ImportError:
@ -2046,6 +2048,72 @@ def job_specifications(config):
return config['job_specifications']
def _generate_task(task):
# type: (dict) -> TaskSettings
"""Generate a task given a config
:param dict config: configuration object
:rtype: TaskSettings
:return: generated task
"""
# retrieve type of task factory
task_factory = task['task_factory']
if 'repeat' in task_factory:
for _ in range(0, task_factory['repeat']):
taskcopy = copy.deepcopy(task)
taskcopy.pop('task_factory')
yield taskcopy
elif 'parametric_sweep' in task_factory:
sweep = task['task_factory']['parametric_sweep']
if 'product' in sweep:
product = []
for chain in sweep['product']:
product.append(
range(
chain['start'],
chain['stop'],
chain['step']
)
)
for arg in itertools.product(*product):
taskcopy = copy.deepcopy(task)
taskcopy.pop('task_factory')
taskcopy['command'] = taskcopy['command'].format(*arg)
yield taskcopy
elif 'combinations' in sweep:
iterable = sweep['combinations']['iterable']
try:
if sweep['combinations']['replacement']:
func = itertools.combinations_with_replacement
else:
func = itertools.combinations
except KeyError:
func = itertools.combinations
for arg in func(iterable, sweep['combinations']['length']):
taskcopy = copy.deepcopy(task)
taskcopy.pop('task_factory')
taskcopy['command'] = taskcopy['command'].format(*arg)
yield taskcopy
elif 'permutations' in sweep:
iterable = sweep['permutations']['iterable']
for arg in itertools.permutations(
iterable, sweep['permutations']['length']):
taskcopy = copy.deepcopy(task)
taskcopy.pop('task_factory')
taskcopy['command'] = taskcopy['command'].format(*arg)
yield taskcopy
elif 'zip' in sweep:
iterables = sweep['zip']
for arg in zip(*iterables):
taskcopy = copy.deepcopy(task)
taskcopy.pop('task_factory')
taskcopy['command'] = taskcopy['command'].format(*arg)
yield taskcopy
else:
raise ValueError('unknown parametric sweep type: {}'.format(sweep))
else:
raise ValueError('unknown task factory type: {}'.format(task_factory))
def job_tasks(conf):
# type: (dict) -> list
"""Get all tasks for job
@ -2053,7 +2121,12 @@ def job_tasks(conf):
:rtype: list
:return: list of tasks
"""
return conf['tasks']
for _task in conf['tasks']:
if 'task_factory' in _task:
for task in _generate_task(_task):
yield task
else:
yield _task
def job_id(conf):

Просмотреть файл

@ -186,7 +186,8 @@ each node type for `scenario` based autoscale.
to apply. When a pool is resized down and a node is selected for
removal, what action is performed for the running task is specified
with this option. The valid values are: `requeue`, `terminate`,
`taskcompletion`, and `retaineddata`. Please see [this doc](https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling#variables) for more information.
`taskcompletion`, and `retaineddata`. The default is `taskcompletion`.
Please see [this doc](https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling#variables) for more information.
* (optional) `sample_lookback_interval` is the time interval to lookback
for past history for certain scenarios such as autoscale based on
active and pending tasks. The format for this property is a timedelta

Просмотреть файл

@ -63,6 +63,28 @@ The jobs schema is as follows:
},
"tasks": [
{
"task_factory": {
"parametric_sweep": {
"product": [
{
"start": 0,
"stop": 10,
"step": 1
}
],
"combinations": {
"iterable": ["ABC", "012"],
"length": 2,
"replacement": false
},
"permutations": {
"iterable": "ABCDEF",
"length": 3
},
"zip": ["ab", "01"]
},
"repeat": 3
},
"id": null,
"depends_on": [
"taskid-a", "taskid-b", "taskid-c"
@ -287,12 +309,43 @@ transferred again. This object currently supports `azure_batch` and
* (optional) `blobxfer_extra_options` are any extra options to pass to
`blobxfer`.
* (required) `tasks` is an array of tasks to add to the job.
* (optional) `task_factory` is a way to dyanmically generate tasks. This
enables parameter sweeps and task repetition without having to
explicitly generate a task array with different parameters for the
`command`. Please see the
[Task Factory Guide](35-batch-shipyard-task-factory.md) for more
information.
* (optional) `parametric_sweep` is a parameter sweep task factory. This
has multiple modes of task generation,and only one may be specified.
* (optional) `product` is a potentially nested parameter generator.
If one set of `start`, `stop`, `step` properties are specified, then
a simple range of values is generated. In the example above, the
integers 0 to 9 are provided as arguments to the `command`
property. If another set of `start`, `stop`, `step` properties are
specified, then these are nested within the prior set.
* (optional) `combinations` generates `length` subsequences of
parameters from the `iterable`. Combinations are emitted in
lexicographic sort order.
* (optional) `iterable` is the iterable to generate parameters from
* (optional) `length` is the subsequence "r" length
* (optional) `replacement` allows individual elements to be
repeated more than once.
* (optional) `permutations` generates `length` permutations of
parameters from the `iterable`. Permutations are emitted in
lexicographic sort order.
* (optional) `iterable` is the iterable to generate parameters from
* (optional) `length` is the subsequence "r" length
* (optional) `zip` generates parameters where the i-th parameter
contains the i-th element from each iterable.
* (optional) `repeat` will create N number of identical tasks.
* (optional) `id` is the task id. Note that if the task `id` is null or
empty then a generic task id will be assigned. The generic task id is
formatted as `dockertask-NNNNN` where `NNNNN` starts from `00000` and is
increased by 1 for each task added to the same job. If there are more
than `99999` autonamed tasks in a job then the numbering is not
padded for tasks exceeding 5 digits.
padded for tasks exceeding 5 digits. `id` should not be specified in
conjunction with the `task_factory` property as `id`s will be
automatically generated.
* (optional) `depends_on` is an array of task ids for which this container
invocation (task) depends on and must run to successful completion prior
to this task executing.
@ -486,7 +539,13 @@ transferred again. This object currently supports `azure_batch` and
task is a multi-instance task, then this `command` is the application
command and is executed with `docker exec` in the running Docker container
context from the `coordination_command` in the `multi_instance` property.
This property may be null.
This property may be null. Note that if you are using a `task_factory`
for the specification, then task factory arguments are applied to the
`command`. Therefore, Python-style string formatting options (excluding
keyword formatting) are required for `parametric_sweep` task factories:
either `{}` positional or `{0}` numbering style formatters. Please see the
[Task Factory Guide](35-batch-shipyard-task-factory.md) for more
information.
## Full template
A full template of a credentials file can be found

Просмотреть файл

@ -111,6 +111,8 @@ the autoscale formula does not result in target node counts that exceed
### Formula-based Autoscaling
Formula-based autoscaling allows users with expertise in creating autoscale
formulas to create their own formula and apply it to a Batch Shipyard pool.
These formulas should be specified on the `formula` member within the
`autoscale` property.
For more information about how to create your own custom autoscale formula,
please visit this

Просмотреть файл

@ -0,0 +1,260 @@
# Batch Shipyard and Task Factories
The focus of this article is to describe the task factory concept and how it
can be utilized to generate arbitrary task arrays. This is particularly useful
in creating parameter (parametric) sweeps or repeated tasks.
## Task Factory
The normal configuration structure for a job in Batch Shipyard is through the
definition of a `tasks` array which contain individual task specifications.
Sometimes it is necessary to create a set of tasks where the base task
specification is the same (e.g., the run options, input, etc.) but the
arguments and options for the `command` must vary between tasks. This can
become tedious and error-prone to perform by hand or requires auxillary
code to generate the jobs JSON configuration.
A task factory is simply a task generator for a job. With this functionality,
you can direct Batch Shipyard to generate a set of tasks given a
`task_factory` property which is then transforms the `command`, if applicable.
Note that you can attach only one `task_factory` specification to one
task specification within the `tasks` array. However, you can have multiple
task specifications in the `tasks` array thus allow for multiple and
potentially different types of task factories per job.
Now we'll dive into each type of task factory available in Batch Shipyard.
### Repeat
A `repeat` task factory simply replicates the `command` N number of times.
### Parametric (Parameter) Sweep
A `parametric_sweep` will generate parameters to apply to the `command`
according to the type of sweep.
#### Product
A `product` `parametric_sweep` can perform nested or unnested parameter
generation. For example, if you need to generate a range of integers from
0 to 9 with a step size of 1 (thus 10 integers total), you would specify this
as:
```json
"task_factory": {
"parametric_sweep": {
"product": [
{
"start": 0,
"stop": 10,
"step": 1
}
]
}
}
```
The associated `command` would require either `{}` or `{0}` formatting to
specify where to substitute the generated argument value within the
`command` string. For example, with the following:
```json
"command": "/bin/bash -c \"sleep {0}\""
```
and the `task_factory` specified above, Batch Shipyard would generate 10
tasks:
```
Task 0:
/bin/bash -c "sleep 0"
Task 1:
/bin/bash -c "sleep 1"
Task 2:
/bin/bash -c "sleep 2"
...
Task 9:
/bin/bash -c "sleep 9"
```
As mentioned above, `product` can generate nested parameter sets. To do this
one would create two or more `start`, `stop`, `step` objects in the
`product` array. For example:
```json
"task_factory": {
"parametric_sweep": {
"product": [
{
"start": 0,
"stop": 3,
"step": 1
},
{
"start": 100,
"stop": 97,
"step": -1
}
]
}
}
```
with the `command` template of:
```json
"command": "/bin/bash -c \"sleep {0}; sleep {1}\""
```
would generate 9 tasks (i.e., `3 * 3` sets of parameters):
```
Task 0:
/bin/bash -c "sleep 0; sleep 100"
Task 1:
/bin/bash -c "sleep 0; sleep 99"
Task 2:
/bin/bash -c "sleep 0; sleep 98"
Task 3:
/bin/bash -c "sleep 1; sleep 100"
Task 4:
/bin/bash -c "sleep 1; sleep 99"
Task 5:
/bin/bash -c "sleep 1; sleep 98"
Task 6:
/bin/bash -c "sleep 2; sleep 100"
Task 7:
/bin/bash -c "sleep 2; sleep 99"
Task 8:
/bin/bash -c "sleep 2; sleep 98"
```
You can nest an arbitrary number of parameter sets within the `product`
array.
#### Combinations
The `combinations` `parametric_sweep` generates `length` subsequences of
parameters from the `iterable`. Combinations are emitted in lexicographic
sort order. Combinations with replacement can be specified by setting the
`replacement` option to `true. For example:
```json
"task_factory": {
"parametric_sweep": {
"combinations": {
"iterable": ["abc", "012", "def"],
"length": 2,
"replacement": false
}
}
}
```
with the `command` template of:
```json
"command": "/bin/bash -c \"echo {0}; echo {1}\""
```
would generate 3 tasks:
```
Task 0:
/bin/bash -c "echo abc; echo 012"
Task 1:
/bin/bash -c "echo abc; echo def"
Task 2:
/bin/bash -c "echo 012; echo def"
```
#### Permutations
The `permutations` `parametric_sweep` generates `length` permutations of
parameters from the `iterable`. Permutations are emitted in lexicographic
sort order. For example:
```json
"task_factory": {
"parametric_sweep": {
"permutations": {
"iterable": ["abc", "012", "def"],
"length": 2
}
}
}
```
with the `command` template of:
```json
"command": "/bin/bash -c \"echo {0}; echo {1}\""
```
would generate 6 tasks:
```
Task 0:
/bin/bash -c "echo abc; echo 012"
Task 1:
/bin/bash -c "echo abc; echo def"
Task 2:
/bin/bash -c "echo 012; echo abc"
Task 3:
/bin/bash -c "echo 012; echo def"
Task 4:
/bin/bash -c "echo def; echo abc"
Task 5:
/bin/bash -c "echo def; echo 012"
```
#### Zip
The `zip` `parametric_sweep` generates parameters where the i-th parameter
contains the i-th element from each iterable. For example:
```json
"task_factory": {
"parametric_sweep": {
"permutations": {
"iterable": ["abc", "012", "def"],
"length": 2
}
}
}
```
with the `command` template of:
```json
"command": "/bin/bash -c \"echo {0}; echo {1}; echo {2}\""
```
would generate 3 tasks:
```
Task 0:
/bin/bash -c "echo a; echo 0; echo d"
Task 1:
/bin/bash -c "echo b; echo 1; echo e"
Task 2:
/bin/bash -c "echo c; echo 2; echo f"
```
## Configuration guide
Please see the [jobs configuration guide](14-batch-shipyard-configuration-jobs.md)
for more information on configuration for jobs and tasks.

Просмотреть файл

@ -45,6 +45,11 @@ Network Security Group. You can optionally reduce the allowable inbound
address space for SSH on your software firewall rules or through the Azure
Batch created Network Security Group applied to compute nodes.
### Outbound Traffic and Ports
Azure Batch compute nodes must be able to communicate with Azure Storage
servers. Please ensure that oubound TCP traffic is allowed on port 443 for
HTTPS connections.
### Ephemeral (Temporary) Disk
Azure VMs have ephemeral temporary local disks attached to them which are
not persisted back to Azure Storage. Azure Batch utilizes this space for some

Просмотреть файл

@ -14,16 +14,17 @@ and effectively running your batch-style Docker workloads on Azure Batch.
* [FS Configuration](15-batch-shipyard-configuration-fs.md)
6. [CLI Commands and Usage](20-batch-shipyard-usage.md)
7. [Autoscale](30-batch-shipyard-autoscale.md)
8. [Azure Functions and Batch Shipyard](60-batch-shipyard-site-extension.md)
9. [Custom Image for Host Compute Nodes](63-batch-shipyard-custom-images.md)
10. [Remote Filesystems](65-batch-shipyard-remote-fs.md)
11. [Data Movement](70-batch-shipyard-data-movement.md)
12. [Azure KeyVault for Credential Management](74-batch-shipyard-azure-keyvault.md)
13. [Credential Encryption](75-batch-shipyard-credential-encryption.md)
14. [Batch Shipyard and Multi-Instance Tasks](80-batch-shipyard-multi-instance-tasks.md)
15. [Interactive SSH Sessions and Docker Tunnels](85-batch-shipyard-ssh-docker-tunnel.md)
16. [Low-Priority Compute Node Considerations](95-low-priority-considerations.md)
17. [Troubleshooting Guide](96-troubleshooting-guide.md)
18. [FAQ](97-faq.md)
19. [Contributing Recipes](98-contributing-recipes.md)
20. [Current Limitations](99-current-limitations.md)
8. [Task Factories](35-batch-shipyard-task-factory.md)
9. [Azure Functions and Batch Shipyard](60-batch-shipyard-site-extension.md)
10. [Custom Image for Host Compute Nodes](63-batch-shipyard-custom-images.md)
11. [Remote Filesystems](65-batch-shipyard-remote-fs.md)
12. [Data Movement](70-batch-shipyard-data-movement.md)
13. [Azure KeyVault for Credential Management](74-batch-shipyard-azure-keyvault.md)
14. [Credential Encryption](75-batch-shipyard-credential-encryption.md)
15. [Batch Shipyard and Multi-Instance Tasks](80-batch-shipyard-multi-instance-tasks.md)
16. [Interactive SSH Sessions and Docker Tunnels](85-batch-shipyard-ssh-docker-tunnel.md)
17. [Low-Priority Compute Node Considerations](95-low-priority-considerations.md)
18. [Troubleshooting Guide](96-troubleshooting-guide.md)
19. [FAQ](97-faq.md)
20. [Contributing Recipes](98-contributing-recipes.md)
21. [Current Limitations](99-current-limitations.md)