From f6c74bd8b7204c1b26e94df0743e9a0c5f1413fe Mon Sep 17 00:00:00 2001 From: Fred Park Date: Sun, 5 Nov 2017 11:48:08 -0800 Subject: [PATCH] Doc updates - Emulate "set -e" for cmd wrap for Windows --- CHANGELOG.md | 1 + README.md | 11 +- convoy/util.py | 3 +- docs/20-batch-shipyard-usage.md | 224 ++++++++++++++---------- docs/70-batch-shipyard-data-movement.md | 2 + docs/97-faq.md | 4 +- docs/99-current-limitations.md | 8 +- shipyard.py | 2 +- 8 files changed, 152 insertions(+), 103 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c88bac..41e5372 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ more information. - Mount `/opt/intel` into Singularity containers - Retry image configuration error pulls from Docker registries - AAD MFA token cache on Python2 +- Non-native coordination command fix, if not specified ## [3.0.0a2] - 2017-10-27 ### Added diff --git a/README.md b/README.md index 1749e98..053d4f5 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,8 @@ tunneling to Docker Hosts on compute nodes * Support for execution on an [Azure Function App environment](http://batch-shipyard.readthedocs.io/en/latest/60-batch-shipyard-site-extension/) * Support for [custom host images](http://batch-shipyard.readthedocs.io/en/latest/63-batch-shipyard-custom-images/) +* Support for [Windows Containers](https://docs.microsoft.com/en-us/virtualization/windowscontainers/about/) +on compliant Windows compute node pools ## Installation ### Azure Cloud Shell @@ -111,9 +113,12 @@ section for various sample Docker workloads using Azure Batch and Batch Shipyard. ## Batch Shipyard Compute Node OS Support -Batch Shipyard is currently compatible with Azure Batch supported Marketplace -Linux VMs and -[compliant Linux custom images](http://batch-shipyard.readthedocs.io/en/latest/63-batch-shipyard-custom-images/). +Batch Shipyard is currently compatible with most Azure Batch supported +Marketplace Linux VMs, +[compliant Linux custom images](http://batch-shipyard.readthedocs.io/en/latest/63-batch-shipyard-custom-images/), +and native Azure Batch +[Windows Server with Containers](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/Microsoft.WindowsServer?tab=Overview) +VMs. ## Change Log Please see the diff --git a/convoy/util.py b/convoy/util.py index 5e1a0ca..3fe484d 100644 --- a/convoy/util.py +++ b/convoy/util.py @@ -274,7 +274,8 @@ def wrap_commands_in_shell(commands, windows=False, wait=True): :return: wrapped commands """ if windows: - return 'cmd.exe /c {}'.format(' && '.join(commands)) + tmp = ['(({}) || exit /b)'.format(x) for x in commands] + return 'cmd.exe /c "{}"'.format(' && '.join(tmp)) else: return '/bin/bash -c \'set -e; set -o pipefail; {}{}\''.format( '; '.join(commands), '; wait' if wait else '') diff --git a/docs/20-batch-shipyard-usage.md b/docs/20-batch-shipyard-usage.md index 52b5386..91e895d 100644 --- a/docs/20-batch-shipyard-usage.md +++ b/docs/20-batch-shipyard-usage.md @@ -216,40 +216,45 @@ then invoked the `cert add` command. Please see the ## `data` Command The `data` command has the following sub-commands: ``` - getfile Retrieve file(s) from a job/task - getfilenode Retrieve file(s) from a compute node - ingress Ingress data into Azure - listfiles List files for tasks in jobs - stream Stream a text file to the local console + files Compute node file actions + ingress Ingress data into Azure ``` -* `getfile` will retrieve a file with job, task, filename semantics - * `--all --filespec ,,` can be given to - download all files for the job and task with an optional include pattern - * `--filespec ,,` can be given to download one - specific file from the job and task. If `` is set to - `@FIRSTRUNNING`, then the first running task within the job of `` - will be used to locate the ``. -* `getfilenode` will retrieve a file with node id and filename semantics +The `data files` sub-command has the following sub-sub-commands: +``` + list List files for tasks in jobs + node Retrieve file(s) from a compute node + stream Stream a file as text to the local console or... + task Retrieve file(s) from a job/task +``` + +* `files list` will list files for all tasks in jobs + * `--jobid` force scope to just this job id + * `--taskid` force scope to just this task id +* `files node ` will retrieve a file with node id and filename semantics * `--all --filespec ,` can be given to download all files from the compute node with the optional include pattern * `--filespec ,` can be given to download one specific file from compute node -* `ingress` will ingress data as specified in configuration files - * `--to-fs ` transfers data as specified in - configuration files to the specified remote file system storage cluster - instead of Azure Storage -* `listfiles` will list files for all tasks in jobs - * `--jobid` force scope to just this job id - * `--taskid` force scope to just this task id -* `stream` will stream a file as text (UTF-8 decoded) to the local console -or binary if streamed to disk +* `files stream` will stream a file as text (UTF-8 decoded) to the local +console or binary if streamed to disk * `--disk` will write the streamed data as binary to disk instead of output to local console * `--filespec ,,` can be given to stream a specific file. If `` is set to `@FIRSTRUNNING`, then the first running task within the job of `` will be used to locate the ``. +* `files task` will retrieve a file with job, task, filename semantics + * `--all --filespec ,,` can be given to + download all files for the job and task with an optional include pattern + * `--filespec ,,` can be given to download one + specific file from the job and task. If `` is set to + `@FIRSTRUNNING`, then the first running task within the job of `` + will be used to locate the ``. +* `ingress` will ingress data as specified in configuration files + * `--to-fs ` transfers data as specified in + configuration files to the specified remote file system storage cluster + instead of Azure Storage ## `fs` Command The `fs` command has the following sub-commands which work on two different @@ -345,18 +350,23 @@ assumed. ## `jobs` Command The `jobs` command has the following sub-commands: ``` - add Add jobs - cmi Cleanup multi-instance jobs - del Delete jobs and job schedules - deltasks Delete specified tasks in jobs - disable Disable jobs and job schedules - enable Enable jobs and job schedules - list List jobs - listtasks List tasks within jobs - migrate Migrate jobs or job schedules to another pool - stats Get statistics about jobs - term Terminate jobs and job schedules - termtasks Terminate specified tasks in jobs + add Add jobs + cmi Cleanup non-native multi-instance jobs + del Delete jobs and job schedules + disable Disable jobs and job schedules + enable Enable jobs and job schedules + list List jobs + migrate Migrate jobs or job schedules to another pool + stats Get statistics about jobs + tasks Tasks actions + term Terminate jobs and job schedules +``` + +The `jobs tasks` sub-command has the following sub-sub-commands: +``` + del Delete specified tasks in jobs + list List tasks within jobs + term Terminate specified tasks in jobs ``` * `add` will add all jobs and tasks defined in the jobs configuration file @@ -364,8 +374,8 @@ to the Batch pool * `--recreate` will recreate any completed jobs with the same id * `--tail` will tail the specified file of the last job and task added with this command invocation -* `cmi` will cleanup any stale multi-instance tasks and jobs. Note that this -sub-command is typically not required if `multi_instance_auto_complete` is +* `cmi` will cleanup any stale non-native multi-instance tasks and jobs. Note +that this sub-command is typically not required if `auto_complete` is set to `true` in the job specification for the job. * `--delete` will delete any stale cleanup jobs * `del` will delete jobs and job scheudles specified in the jobs @@ -381,12 +391,6 @@ cleaned up. Termination of running tasks requires a valid SSH user if the tasks are running on a non-`native` container support pool. * `--wait` will wait for deletion to complete -* `deltasks` will delete tasks within jobs specified in the jobs -configuration file. Active or running tasks will be terminated first on -non-`native` container support pools. - * `--jobid` force deletion scope to just this job id - * `--taskid` force deletion scope to just this task id - * `--wait` will wait for deletion to complete * `disable` will disable jobs or job schedules * `--jobid` force disable scope to just this job id * `--jobscheduleid` force disable scope to just this job schedule id @@ -397,11 +401,6 @@ non-`native` container support pools. * `--jobid` force enable scope to just this job id * `--jobscheduleid` force enable scope to just this job schedule id * `list` will list all jobs in the Batch account -* `listtasks` will list tasks from jobs specified in the jobs configuration -file - * `--all` list all tasks in all jobs in the account - * `--jobid` force scope to just this job id - * `--poll-until-tasks-complete` will poll until all tasks have completed * `migrate` will migrate jobs or job schedules to another pool. Ensure that the new target pool has the Docker images required to run the job. * `--jobid` force migration scope to just this job id @@ -412,6 +411,24 @@ the new target pool has the Docker images required to run the job. * `--wait` wait for running tasks to complete * `stats` will generate a statistics summary of a job or jobs * `--jobid` will query the specified job instead of all jobs +* `tasks del` will delete tasks within jobs specified in the jobs +configuration file. Active or running tasks will be terminated first on +non-`native` container support pools. + * `--jobid` force deletion scope to just this job id + * `--taskid` force deletion scope to just this task id + * `--wait` will wait for deletion to complete +* `tasks list` will list tasks from jobs specified in the jobs +configuration file + * `--all` list all tasks in all jobs in the account + * `--jobid` force scope to just this job id + * `--poll-until-tasks-complete` will poll until all tasks have completed +* `tasks term` will terminate tasks within jobs specified in the jobs +configuration file. Termination of running tasks requires a valid SSH +user if tasks are running on a non-`native` container support pool. + * `--force` force send docker kill signal regardless of task state + * `--jobid` force termination scope to just this job id + * `--taskid` force termination scope to just this task id + * `--wait` will wait for termination to complete * `term` will terminate jobs and job schedules found in the jobs configuration file. If an autopool is specified for all jobs and a jobid option is not specified, the storage associated with the autopool will be @@ -425,13 +442,6 @@ cleaned up. Termination of running tasks requires a valid SSH user if tasks are running on a non-`native` container support pool. * `--wait` will wait for termination to complete -* `termtasks` will terminate tasks within jobs specified in the jobs -configuration file. Termination of running tasks requires a valid SSH -user if tasks are running on a non-`native` container support pool. - * `--force` force send docker kill signal regardless of task state - * `--jobid` force termination scope to just this job id - * `--taskid` force termination scope to just this task id - * `--wait` will wait for termination to complete ## `keyvault` Command The `keyvault` command has the following sub-commands: @@ -480,35 +490,54 @@ or has run the specified task ## `pool` Command The `pool` command has the following sub-commands: ``` - add Add a pool to the Batch account - asu Add an SSH user to all nodes in pool - autoscale Pool autoscale actions - del Delete a pool from the Batch account - delnode Delete a node from a pool - dsu Delete an SSH user from all nodes in pool - grls Get remote login settings for all nodes in... - list List all pools in the Batch account - listimages List Docker images in a pool - listnodes List nodes in pool - listskus List available VM configurations available to... - rebootnode Reboot a node or nodes in a pool - resize Resize a pool - ssh Interactively login via SSH to a node in a... - stats Get statistics about a pool - udi Update Docker images in a pool + add Add a pool to the Batch account + autoscale Autoscale actions + del Delete a pool from the Batch account + images Container images actions + list List all pools in the Batch account + listskus List available VM configurations available to... + nodes Compute node actions + resize Resize a pool + ssh Interactively login via SSH to a node in a... + stats Get statistics about a pool + user Remote user actions +``` + +The `pool autoscale` sub-command has the following sub-sub-commands: +``` + disable Disable autoscale on a pool + enable Enable autoscale on a pool + evaluate Evaluate autoscale formula + lastexec Get the result of the last execution of the... +``` + +The `pool images` sub-command has the following sub-sub-commands: +``` + list List container images in a pool + update Update container images in a pool +``` + +The `pool nodes` sub-command has the following sub-sub-commands: +``` + del Delete a node from a pool + grls Get remote login settings for all nodes in... + list List nodes in pool + reboot Reboot a node or nodes in a pool +``` + +The `pool user` sub-command has the following sub-sub-commands: +``` + add Add a remote user to all nodes in pool + del Delete a remote user from all nodes in pool ``` * `add` will add the pool defined in the pool configuration file to the Batch account -* `asu` will add the SSH user defined in the pool configuration file to -all nodes in the specified pool -* `autoscale` will invoke the autoscale subcommand. The autoscale -subcommand has 4 subcommands: - * `disable` will disable autoscale on the pool - * `enable` will enable autoscale on the pool - * `evaluate` will evaluate the autoscale formula in the pool configuration - file - * `lastexec` will query the last execution information for autoscale +* `autoscale disable` will disable autoscale on the pool +* `autoscale enable` will enable autoscale on the pool +* `autoscale evaluate` will evaluate the autoscale formula in the pool +configuration file +* `autoscale lastexec` will query the last execution information for autoscale * `del` will delete the pool defined in the pool configuration file from the Batch account along with associated metadata in Azure Storage used by Batch Shipyard. It is recommended to use this command instead of deleting @@ -518,22 +547,28 @@ Azure Storage. * `--poolid` will delete the specified pool instead of the pool from the pool configuration file * `--wait` will wait for deletion to complete -* `delnode` will delete the specified node from the pool +* `images list` will query the nodes in the pool for Docker images. Common +and mismatched images will be listed. Requires a provisioned SSH user and +private key. +* `images update` will update container images on all compute nodes of the +pool. This command may require a valid SSH user. + * `--docker-image` will restrict the update to just the Docker image or + image:tag + * `--docker-image-digest` will restrict the update to just the Docker + image or image:tag and a specific digest + * `--singularity-image` will restrict the update to just the Singularity + image or image:tag +* `list` will list all pools in the Batch account +* `nodes del` will delete the specified node from the pool * `--all-start-task-failed` will delete all nodes in the start task failed state * `--all-starting` will delete all nodes in the starting state * `--all-unusable` will delete all nodes in the unusable state * `--nodeid` is the node id to delete -* `dsu` will delete the SSH user defined in the pool configuration file -from all nodes in the specified pool -* `grls` will retrieve all of the remote login settings for every node +* `nodes grls` will retrieve all of the remote login settings for every node in the specified pool -* `list` will list all pools in the Batch account -* `listimages` will query the nodes in the pool for Docker images. Common -and mismatched images will be listed. Requires a provisioned SSH user and -private key. -* `listnodes` will list all nodes in the specified pool -* `rebootnode` will reboot a specified node in the pool +* `nodes list` will list all nodes in the specified pool +* `nodes reboot` will reboot a specified node in the pool * `--all-start-task-failed` will reboot all nodes in the start task failed state * `--nodeid` is the node id to reboot @@ -552,11 +587,10 @@ configuration file * `stats` will generate a statistics summary of the pool * `--poolid` will query the specified pool instead of the pool from the pool configuration file -* `udi` will update Docker images on all compute nodes of the pool. This -command requires a valid SSH user. - * `--image` will restrict the update to just the image or image:tag - * `--digest` will restrict the update to just the image or image:tag and - a specific digest +* `user add` will add an SSH or RDP user defined in the pool +configuration file to all nodes in the specified pool +* `user del` will delete the SSH or RDP user defined in the pool +configuration file from all nodes in the specified pool ## `storage` Command The `storage` command has the following sub-commands: @@ -578,6 +612,8 @@ for metadata purposes shipyard pool add --credentials credentials.yaml --config config.yaml --pool pool.yaml # ... or if all config files are in the current working directory named as above ... +# (note this is strictly not necessary as Batch Shipyard will search the +# current working directory if the options above are not explicitly specified shipyard pool add --configdir . diff --git a/docs/70-batch-shipyard-data-movement.md b/docs/70-batch-shipyard-data-movement.md index 40027f6..cf5ca48 100644 --- a/docs/70-batch-shipyard-data-movement.md +++ b/docs/70-batch-shipyard-data-movement.md @@ -138,6 +138,8 @@ Note that `files` is an array, therefore, Batch Shipyard accepts any number of `source`/`destination` pairings and even mixed GlusterFS and Azure Storage ingress objects. +Data ingress from on-premises to Windows pools is not supported. + ### From Azure Storage (Blob and File) Data from Azure Storage can be ingressed to compute nodes in many different ways with Batch Shipyard. The recommended method when using Batch Shipyard diff --git a/docs/97-faq.md b/docs/97-faq.md index 66c9d54..c4bf4cb 100644 --- a/docs/97-faq.md +++ b/docs/97-faq.md @@ -85,5 +85,5 @@ possible. * Yes, please see [the guide](63-batch-shipyard-custom-images.md). #### Does Batch Shipyard support Windows Server Containers? -* Not at this time, we are tracking the issue -[here](https://github.com/Azure/batch-shipyard/issues/7). +* Yes, but with some feature, configuration, and CLI limitations. Please see +the [current limitations](99-current-limitations.md) doc for more information. diff --git a/docs/99-current-limitations.md b/docs/99-current-limitations.md index b24d5ae..ea59226 100644 --- a/docs/99-current-limitations.md +++ b/docs/99-current-limitations.md @@ -49,8 +49,12 @@ no longer performed before a pool is created and will instead result in a ResizeError on the pool if not all compute nodes can be allocated. * Data movement between Batch tasks as defined by `input_data`:`azure_batch` is restricted to Batch accounts with keys (non-AAD). -* Windows Server 2016, Clear Linux, and Oracle Linux are not supported with -Batch Shipyard at this time. +* Clear Linux and Oracle Linux are not supported with Batch Shipyard at this +time. +* Certain features, configuration options and usage commands are not +supported with Windows compute node pools such as on-premises to compute +node direct data ingress, storage cluster mounting, and credential +encryption support. * Task dependencies are incompatible with multi-instance tasks. This is a current limitation of the underlying Azure Batch service. * Only Intel MPI (or polling-based IB uverbs) can be used in conjunction diff --git a/shipyard.py b/shipyard.py index 5ead3c5..0b87301 100755 --- a/shipyard.py +++ b/shipyard.py @@ -1472,7 +1472,7 @@ def jobs_del( @aad_options @pass_cli_context def jobs_cmi(ctx, delete): - """Cleanup multi-instance jobs""" + """Cleanup non-native multi-instance jobs""" ctx.initialize_for_batch() convoy.fleet.action_jobs_cmi(ctx.batch_client, ctx.config, delete)