From 487223e8fa410677787b351ec56b8eb8578ec59a Mon Sep 17 00:00:00 2001 From: Fred Park Date: Thu, 6 Oct 2016 11:03:10 -0700 Subject: [PATCH] Change pool config ssh_docker_tunnel to ssh --- CHANGELOG.md | 11 ++++++ config_templates/pool.json | 5 ++- docs/10-batch-shipyard-configuration.md | 25 ++++++++----- .../config/multinode/pool.json | 2 +- .../config/singlenode/pool.json | 2 +- .../config/multinode-multigpu/pool.json | 2 +- .../config/singlenode-multigpu/pool.json | 2 +- .../config/singlenode-singlegpu/pool.json | 2 +- recipes/Caffe-CPU/config/pool.json | 2 +- recipes/Caffe-GPU/config/pool.json | 2 +- recipes/FFmpeg-GPU/config/pool.json | 2 +- recipes/NAMD-GPU/config/pool.json | 2 +- .../NAMD-Infiniband-IntelMPI/config/pool.json | 2 +- recipes/NAMD-TCP/config/pool.json | 2 +- .../config/pool.json | 2 +- recipes/OpenFOAM-TCP-OpenMPI/config/pool.json | 2 +- recipes/TensorFlow-CPU/config/pool.json | 2 +- .../TensorFlow-Distributed/config/pool.json | 2 +- recipes/TensorFlow-GPU/config/pool.json | 2 +- recipes/Torch-CPU/config/pool.json | 2 +- recipes/Torch-GPU/config/pool.json | 2 +- shipyard.py | 37 +++++++++++-------- 22 files changed, 69 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e82d3f..dd2b4b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ # Change Log ## [Unreleased] +### Added +- Experimental support for OpenSSH HPN on Ubuntu + +### Changed +- **Breaking Change:** `ssh_docker_tunnel` in the `pool_specification` has +been replaced by the `ssh` property. Please see the configuration doc for +more information. + +### Fixed +- GlusterFS mount ownership/permissions fixed such that SSH users can +read/write ## [1.1.0] - 2016-10-05 ### Added diff --git a/config_templates/pool.json b/config_templates/pool.json index 3f1750f..c0c99ea 100644 --- a/config_templates/pool.json +++ b/config_templates/pool.json @@ -10,11 +10,12 @@ "sku": "16.04.0-LTS", "reboot_on_start_task_failed": true, "block_until_all_global_resources_loaded": true, - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "expiry_days": 7, "ssh_public_key": null, - "generate_tunnel_script": true + "generate_tunnel_script": true, + "hpn_server_swap": false }, "gpu": { "nvidia_driver": { diff --git a/docs/10-batch-shipyard-configuration.md b/docs/10-batch-shipyard-configuration.md index f56f9a6..75b1218 100644 --- a/docs/10-batch-shipyard-configuration.md +++ b/docs/10-batch-shipyard-configuration.md @@ -11,7 +11,8 @@ settings 3. [Pool](#pool) - Azure Batch pool configuration 4. [Jobs](#jobs) - Azure Batch jobs and tasks configuration -Each property is marked with required or optional. +Each property is marked with required or optional. Properties marked with +experimental should be considered as features for testing only. Example config templates can be found in [this directory](../config\_templates) of the repository. @@ -246,11 +247,12 @@ The pool schema is as follows: "sku": "7.1", "reboot_on_start_task_failed": true, "block_until_all_global_resources_loaded": true, - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "expiry_days": 7, "ssh_public_key": null, - "generate_tunnel_script": true + "generate_tunnel_script": true, + "hpn_server_swap": false }, "gpu": { "nvidia_driver": { @@ -285,18 +287,23 @@ network timeout or resolution failure). This defaults to `false`. * (optional) `block_until_all_global_resources_loaded` will block the node from entering ready state until all Docker images are loaded. This defaults to `true`. -* (optional) `ssh_docker_tunnel` is the property for creating a user to -accomodate SSH tunneling to the Docker Host on compute nodes. If this property -is absent, then SSH tunnel creation is skipped. +* (optional) `ssh` is the property for creating a user to accomodate SSH +sessions to compute nodes. If this property is absent, then an SSH user is not +created with pool creation. * `username` is the user to create on the compute nodes. * `expiry_days` is the number of days from now for the account on the compute nodes to expire. The default is 7 days from invocation time. * `ssh_public_key` is the path to an existing ssh public key to use. If not specified, a public/private key pair will be automatically generated only - only Linux. If this is `null` or not specified on Windows, SSH tunnel - creation will be disabled. + only Linux. If this is `null` or not specified on Windows, the SSH user is + not created. * `generate_tunnel_script` property directs script to generate an SSH tunnel -script for use with the compute nodes in the pool. + script that can be used to connect to the remote Docker engine running on + a compute node. + * (experimental) `hpn_server_swap` property enables an OpenSSH server with + [HPN patches](http://www.psc.edu/index.php/hpn-ssh) to be swapped with the + standard distribution OpenSSH server. This is not supported on all + Linux distributions and may be force disabled. * (required for N-Series VM instances) `gpu` property defines additional information for nVidia GPU-enabled VMs: * `nvidia_driver` property contains the following required members: diff --git a/recipes/CNTK-CPU-OpenMPI/config/multinode/pool.json b/recipes/CNTK-CPU-OpenMPI/config/multinode/pool.json index 6eb4d2b..15dd687 100644 --- a/recipes/CNTK-CPU-OpenMPI/config/multinode/pool.json +++ b/recipes/CNTK-CPU-OpenMPI/config/multinode/pool.json @@ -7,7 +7,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/CNTK-CPU-OpenMPI/config/singlenode/pool.json b/recipes/CNTK-CPU-OpenMPI/config/singlenode/pool.json index 3e11543..ac4a874 100644 --- a/recipes/CNTK-CPU-OpenMPI/config/singlenode/pool.json +++ b/recipes/CNTK-CPU-OpenMPI/config/singlenode/pool.json @@ -7,7 +7,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/CNTK-GPU-OpenMPI/config/multinode-multigpu/pool.json b/recipes/CNTK-GPU-OpenMPI/config/multinode-multigpu/pool.json index 1039362..487099d 100644 --- a/recipes/CNTK-GPU-OpenMPI/config/multinode-multigpu/pool.json +++ b/recipes/CNTK-GPU-OpenMPI/config/multinode-multigpu/pool.json @@ -7,7 +7,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/CNTK-GPU-OpenMPI/config/singlenode-multigpu/pool.json b/recipes/CNTK-GPU-OpenMPI/config/singlenode-multigpu/pool.json index 99a61c3..cc1a9fa 100644 --- a/recipes/CNTK-GPU-OpenMPI/config/singlenode-multigpu/pool.json +++ b/recipes/CNTK-GPU-OpenMPI/config/singlenode-multigpu/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/CNTK-GPU-OpenMPI/config/singlenode-singlegpu/pool.json b/recipes/CNTK-GPU-OpenMPI/config/singlenode-singlegpu/pool.json index 9319d07..a68bc13 100644 --- a/recipes/CNTK-GPU-OpenMPI/config/singlenode-singlegpu/pool.json +++ b/recipes/CNTK-GPU-OpenMPI/config/singlenode-singlegpu/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/Caffe-CPU/config/pool.json b/recipes/Caffe-CPU/config/pool.json index a100110..97552d9 100644 --- a/recipes/Caffe-CPU/config/pool.json +++ b/recipes/Caffe-CPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/Caffe-GPU/config/pool.json b/recipes/Caffe-GPU/config/pool.json index 28d6333..6151093 100644 --- a/recipes/Caffe-GPU/config/pool.json +++ b/recipes/Caffe-GPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/FFmpeg-GPU/config/pool.json b/recipes/FFmpeg-GPU/config/pool.json index ce93bbb..cf0a2c9 100644 --- a/recipes/FFmpeg-GPU/config/pool.json +++ b/recipes/FFmpeg-GPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/NAMD-GPU/config/pool.json b/recipes/NAMD-GPU/config/pool.json index 7324119..e9db81c 100644 --- a/recipes/NAMD-GPU/config/pool.json +++ b/recipes/NAMD-GPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/NAMD-Infiniband-IntelMPI/config/pool.json b/recipes/NAMD-Infiniband-IntelMPI/config/pool.json index ffec0b4..36ec173 100644 --- a/recipes/NAMD-Infiniband-IntelMPI/config/pool.json +++ b/recipes/NAMD-Infiniband-IntelMPI/config/pool.json @@ -7,7 +7,7 @@ "publisher": "OpenLogic", "offer": "CentOS-HPC", "sku": "7.1", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/NAMD-TCP/config/pool.json b/recipes/NAMD-TCP/config/pool.json index ffb0863..aa9944c 100644 --- a/recipes/NAMD-TCP/config/pool.json +++ b/recipes/NAMD-TCP/config/pool.json @@ -7,7 +7,7 @@ "publisher": "OpenLogic", "offer": "CentOS", "sku": "7.2", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/OpenFOAM-Infiniband-IntelMPI/config/pool.json b/recipes/OpenFOAM-Infiniband-IntelMPI/config/pool.json index 8a7c66c..6d7c681 100644 --- a/recipes/OpenFOAM-Infiniband-IntelMPI/config/pool.json +++ b/recipes/OpenFOAM-Infiniband-IntelMPI/config/pool.json @@ -7,7 +7,7 @@ "publisher": "OpenLogic", "offer": "CentOS-HPC", "sku": "7.1", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/OpenFOAM-TCP-OpenMPI/config/pool.json b/recipes/OpenFOAM-TCP-OpenMPI/config/pool.json index efa3487..ee9bfb5 100644 --- a/recipes/OpenFOAM-TCP-OpenMPI/config/pool.json +++ b/recipes/OpenFOAM-TCP-OpenMPI/config/pool.json @@ -7,7 +7,7 @@ "publisher": "OpenLogic", "offer": "CentOS", "sku": "7.2", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/TensorFlow-CPU/config/pool.json b/recipes/TensorFlow-CPU/config/pool.json index 0b7af8c..14a0dbb 100644 --- a/recipes/TensorFlow-CPU/config/pool.json +++ b/recipes/TensorFlow-CPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/TensorFlow-Distributed/config/pool.json b/recipes/TensorFlow-Distributed/config/pool.json index c5bccdc..b4cd336 100644 --- a/recipes/TensorFlow-Distributed/config/pool.json +++ b/recipes/TensorFlow-Distributed/config/pool.json @@ -7,7 +7,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/TensorFlow-GPU/config/pool.json b/recipes/TensorFlow-GPU/config/pool.json index d367d8e..cb021be 100644 --- a/recipes/TensorFlow-GPU/config/pool.json +++ b/recipes/TensorFlow-GPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/Torch-CPU/config/pool.json b/recipes/Torch-CPU/config/pool.json index c195c0f..aae9cc8 100644 --- a/recipes/Torch-CPU/config/pool.json +++ b/recipes/Torch-CPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/recipes/Torch-GPU/config/pool.json b/recipes/Torch-GPU/config/pool.json index 8703691..c0fd65b 100644 --- a/recipes/Torch-GPU/config/pool.json +++ b/recipes/Torch-GPU/config/pool.json @@ -6,7 +6,7 @@ "publisher": "Canonical", "offer": "UbuntuServer", "sku": "16.04.0-LTS", - "ssh_docker_tunnel": { + "ssh": { "username": "docker", "generate_tunnel_script": true }, diff --git a/shipyard.py b/shipyard.py index 7959adc..b15b742 100755 --- a/shipyard.py +++ b/shipyard.py @@ -708,7 +708,7 @@ def add_pool(batch_client, blob_client, config): if gluster: _setup_glusterfs(batch_client, blob_client, config, nodes) # create admin user on each node if requested - add_ssh_tunnel_user(batch_client, config, nodes) + add_ssh_user(batch_client, config, nodes) # log remote login settings get_remote_login_settings(batch_client, config, nodes) @@ -790,7 +790,7 @@ def _setup_glusterfs(batch_client, blob_client, config, nodes): batchtask.id, job_id)) -def add_ssh_tunnel_user(batch_client, config, nodes=None): +def add_ssh_user(batch_client, config, nodes=None): # type: (batch.BatchServiceClient, dict, # List[batchmodels.ComputeNode]) -> None """Add an SSH user to node and optionally generate an SSH tunneling script @@ -801,23 +801,20 @@ def add_ssh_tunnel_user(batch_client, config, nodes=None): """ pool_id = config['pool_specification']['id'] try: - docker_user = config[ - 'pool_specification']['ssh_docker_tunnel']['username'] + docker_user = config['pool_specification']['ssh']['username'] if docker_user is None: raise KeyError() except KeyError: - logger.info('not creating ssh tunnel user on pool {}'.format(pool_id)) + logger.info('not creating ssh user on pool {}'.format(pool_id)) else: ssh_priv_key = None try: - ssh_pub_key = config[ - 'pool_specification']['ssh_docker_tunnel']['ssh_public_key'] + ssh_pub_key = config['pool_specification']['ssh']['ssh_public_key'] except KeyError: ssh_pub_key = None try: gen_tunnel_script = config[ - 'pool_specification']['ssh_docker_tunnel'][ - 'generate_tunnel_script'] + 'pool_specification']['ssh']['generate_tunnel_script'] except KeyError: gen_tunnel_script = False # generate ssh key pair if not specified @@ -957,7 +954,7 @@ def add_admin_user_to_compute_node( pool_id = config['pool_specification']['id'] expiry = datetime.datetime.utcnow() try: - td = config['pool_specification']['ssh_docker_tunnel']['expiry_days'] + td = config['pool_specification']['ssh']['expiry_days'] expiry += datetime.timedelta(days=td) except KeyError: expiry += datetime.timedelta(days=7) @@ -1158,18 +1155,26 @@ def _adjust_settings_for_pool_creation(config): logger.warning( 'force enabling inter-node communication due to peer-to-peer ' 'transfer') + # hpn-ssh can only be used for Ubuntu currently + try: + if (config['pool_specification']['ssh']['hpn_server_swap'] and + publisher != 'canonical' and offer != 'ubuntuserver'): + logger.warning('cannot enable HPN SSH swap on {} {} {}'.format( + publisher, offer, sku)) + config['pool_specification']['ssh']['hpn_server_swap'] = False + except KeyError: + pass # adjust ssh settings on windows if _ON_WINDOWS: try: - ssh_pub_key = config[ - 'pool_specification']['ssh_docker_tunnel']['ssh_public_key'] + ssh_pub_key = config['pool_specification']['ssh']['ssh_public_key'] except KeyError: ssh_pub_key = None if ssh_pub_key is None: logger.warning( - 'disabling ssh docker tunnel creation due to script being ' - 'run from Windows') - config['pool_specification'].pop('ssh_docker_tunnel', None) + 'disabling ssh user creation due to script being run ' + 'from Windows and no public key is specified') + config['pool_specification'].pop('ssh', None) def resize_pool(batch_client, config): @@ -2290,7 +2295,7 @@ def main(): elif args.action == 'delpool': del_pool(batch_client, config) elif args.action == 'addsshuser': - add_ssh_tunnel_user(batch_client, config) + add_ssh_user(batch_client, config) get_remote_login_settings(batch_client, config) elif args.action == 'delnode': del_node(batch_client, config, args.nodeid)