From 35fb3f588bedd710a962833b476ad9ca2b04680a Mon Sep 17 00:00:00 2001 From: Fred Park Date: Sun, 28 Aug 2016 19:43:53 -0700 Subject: [PATCH] Add support for more host OSes - Ubuntu 14.04, Debian 8, CentOS 7.x, RHEL 7.x, OpenSUSE 13.2/42.1, SLES 12/12-sp1 - Improve graphing - Prevent metadata clear on existing pool --- cascade/cascade.py | 2 +- cascade/graph.py | 31 ++++--- cascade/setup_private_registry.py | 3 +- scripts/nodeprep.sh | 130 +++++++++++++++++++++++++++--- shipyard.py | 81 +++++++++++++++---- 5 files changed, 212 insertions(+), 35 deletions(-) diff --git a/cascade/cascade.py b/cascade/cascade.py index cd6113d..bbb8e5c 100755 --- a/cascade/cascade.py +++ b/cascade/cascade.py @@ -302,7 +302,7 @@ def _renew_queue_message_lease( msg_id) -def scantree(path) -> os.DirEntry: +def scantree(path): """Recursively scan a directory tree :param str path: path to scan :rtype: os.DirEntry diff --git a/cascade/graph.py b/cascade/graph.py index db9f294..f50570c 100755 --- a/cascade/graph.py +++ b/cascade/graph.py @@ -163,7 +163,7 @@ def coalesce_data(table_client: azuretable.TableService) -> tuple: """Coalesce perf data from table :param azure.storage.table.TableService table_client: table client :rtype: tuple - :return: (timing, sizes) + :return: (timing, sizes, offer, sku) """ print('graphing data from {} with pk={}'.format( _TABLE_NAME, _PARTITION_KEY)) @@ -189,7 +189,12 @@ def coalesce_data(table_client: azuretable.TableService) -> tuple: data[nodeid][event].append(ev) del entities sizes = {} + offer = None + sku = None for nodeid in data: + if offer is None: + offer = data[nodeid]['nodeprep:start'][0]['message']['offer'] + sku = data[nodeid]['nodeprep:start'][0]['message']['sku'] # calculate dt timings timing = { 'nodeprep': _compute_delta_t( @@ -252,13 +257,15 @@ def coalesce_data(table_client: azuretable.TableService) -> tuple: data[nodeid].pop('cascade:load-start', None) data[nodeid].pop('cascade:load-end', None) data[nodeid]['timing'] = timing - return data, sizes + return data, sizes, offer, sku -def graph_data(data: dict, sizes: dict): +def graph_data(data: dict, sizes: dict, offer: str, sku: str): """Graph data via gnuplot :param dict data: timing data :param dict sizes: size data + :param str offer: offer + :param str sku: sku """ print(sizes) # create data file @@ -324,16 +331,20 @@ def graph_data(data: dict, sizes: dict): with open(plot_fname, 'w') as f: f.write('set terminal pngcairo enhanced transparent crop\n') f.write( - 'set key top left outside horizontal autotitle columnhead ' - 'font ", 9"\n') - f.write('set xtics rotate by 45 right font ", 8"\n') - f.write('set ytics\n') + ('set title "Shipyard Performance for {} ({} {})" ' + 'font ", 10" \n').format( + _PARTITION_KEY.split('$')[-1], offer, sku)) + f.write( + 'set key top right horizontal autotitle columnhead ' + 'font ", 7"\n') + f.write('set xtics rotate by 45 right font ", 7"\n') + f.write('set ytics font ", 8"\n') f.write('set xlabel "Node Prep Start Time" font ", 8"\n') f.write('set ylabel "Seconds" font ", 8"\n') f.write('set format x "%H:%M:%.3S"\n') f.write('set xdata time\n') f.write('set timefmt "%Y-%m-%d-%H:%M:%S"\n') - f.write('set style fill solid border -1\n') + f.write('set style fill solid\n') f.write('set boxwidth {0:.5f} absolute\n'.format( (maxtime - mintime) / 100.0)) f.write('plot "{}" using 1:($3+$4+$5+$6) with boxes, \\\n'.format( @@ -387,8 +398,8 @@ def main(): # create storage credentials table_client = _create_credentials(config) # graph data - data, sizes = coalesce_data(table_client) - graph_data(data, sizes) + data, sizes, offer, sku = coalesce_data(table_client) + graph_data(data, sizes, offer, sku) def parseargs(): diff --git a/cascade/setup_private_registry.py b/cascade/setup_private_registry.py index bf82201..28131fa 100755 --- a/cascade/setup_private_registry.py +++ b/cascade/setup_private_registry.py @@ -107,7 +107,8 @@ async def _start_private_registry_instance_async( if proc.returncode != 0: raise RuntimeError('docker load non-zero rc: {}'.format( proc.returncode)) - sa, ep, sakey = os.environ['PRIVATE_REGISTRY_STORAGE_ENV'].split(':') + sa, ep, sakey = os.environ[ + 'CASCADE_PRIVATE_REGISTRY_STORAGE_ENV'].split(':') registry_cmd = [ 'docker', 'run', '-d', '-p', '{p}:{p}'.format(p=_DEFAULT_PRIVATE_REGISTRY_PORT), diff --git a/scripts/nodeprep.sh b/scripts/nodeprep.sh index 632c34b..8abc600 100755 --- a/scripts/nodeprep.sh +++ b/scripts/nodeprep.sh @@ -65,7 +65,7 @@ fi if command -v python3 > /dev/null 2>&1; then npstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'` else - npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow()))'` + npstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'` fi # set python env vars @@ -87,10 +87,14 @@ fi cp jpdockerblock.sh $AZ_BATCH_NODE_SHARED_DIR # install docker host engine -if [ $offer == "ubuntuserver" ]; then +if [ $offer == "ubuntuserver" ] || [ $offer == "debian" ]; then DEBIAN_FRONTEND=noninteractive name= if [[ $sku == 14.04.* ]]; then + if [ $azurefile -eq 1 ]; then + echo "azure file docker volume driver not supported on this sku: $sku and offer: $offer" + exit 1 + fi name=ubuntu-trusty srvstart="service docker start" srvstop="service docker stop" @@ -98,8 +102,17 @@ if [ $offer == "ubuntuserver" ]; then name=ubuntu-xenial srvstart="systemctl start docker.service" srvstop="systemctl stop docker.service" + srvenable="systemctl enable docker.service" afdvdenable="systemctl enable azurefile-dockervolumedriver" afdvdstart="systemctl start azurefile-dockervolumedriver" + elif [[ $sku == "8" ]]; then + if [ $azurefile -eq 1 ]; then + echo "azure file docker volume driver not supported on this sku: $sku and offer: $offer" + exit 1 + fi + name=debian-jessie + srvstart="systemctl start docker.service" + srvstop="systemctl stop docker.service" else echo "unsupported sku: $sku for offer: $offer" exit 1 @@ -107,18 +120,26 @@ if [ $offer == "ubuntuserver" ]; then # check if docker apt source list file exists aptsrc=/etc/apt/sources.list.d/docker.list if [ ! -e $aptsrc ] || [ ! -s $aptsrc ]; then + # refresh package index + apt-get update + # install required software first + if [ $offer == "debian" ]; then + apt-get install -y -q -o Dpkg::Options::="--force-confnew" apt-transport-https ca-certificates + else + apt-get install -y -q -o Dpkg::Options::="--force-confnew" linux-image-extra-$(uname -r) linux-image-extra-virtual + fi apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D echo deb https://apt.dockerproject.org/repo $name main > /etc/apt/sources.list.d/docker.list - # update package index and purge old docker if it exists + # update package index with docker repo and purge old docker if it exists apt-get update apt-get purge -y -q lxc-docker fi - # install required software - apt-get install -y -q -o Dpkg::Options::="--force-confnew" linux-image-extra-$(uname -r) docker-engine # ensure docker opts service modifications are idempotent set +e grep '^DOCKER_OPTS=' /etc/default/docker if [ $? -ne 0 ]; then + # install docker engine + apt-get install -y -q -o Dpkg::Options::="--force-confnew" docker-engine set -e $srvstop # set up azure file docker volume driver if instructed @@ -132,12 +153,13 @@ if [ $offer == "ubuntuserver" ]; then fi set +e rm -f /var/lib/docker/network/files/local-kv.db - echo DOCKER_OPTS="-H tcp://127.0.0.1:2375 -H unix:///var/run/docker.sock" >> /etc/default/docker + echo DOCKER_OPTS=\"-H tcp://127.0.0.1:2375 -H unix:///var/run/docker.sock\" >> /etc/default/docker if [[ $sku == 16.04.* ]]; then sed -i '/^\[Service\]/a EnvironmentFile=-/etc/default/docker' /lib/systemd/system/docker.service sed -i '/^ExecStart=/ s/$/ $DOCKER_OPTS/' /lib/systemd/system/docker.service set -e systemctl daemon-reload + $srvenable set +e fi set -e @@ -178,6 +200,98 @@ if [ $offer == "ubuntuserver" ]; then fi fi fi +elif [[ $offer == centos* ]] || [[ $offer == "rhel" ]] || [[ $offer == "oracle-linux" ]]; then + # ensure container only support + if [ $cascadecontainer -eq 0 ]; then + echo "only supported through shipyard container" + exit 1 + fi + # check for azure file docker volume driver disabled + if [ $azurefile -eq 1 ]; then + echo "azure file docker volume driver not supported on this sku: $sku and offer: $offer" + exit 1 + fi + if [[ $sku == 7.* ]]; then + srvstart="systemctl start docker.service" + srvstop="systemctl stop docker.service" + if [[ $offer == "oracle-linux" ]]; then + srvenable="systemctl enable docker.service" + else + srvenable="chkconfig docker on" + fi + else + echo "unsupported sku: $sku for offer: $offer" + exit 1 + fi + # add docker repo to yum + if [[ $offer == "oracle-linux" ]]; then + # TODO, in order to support docker > 1.9, need to upgrade to UEKR4 + echo "oracle linux is not supported at this time" + exit 1 +cat > /etc/yum.repos.d/docker.repo << EOF +[dockerrepo] +name=Docker Repository +baseurl=https://yum.dockerproject.org/repo/main/oraclelinux/7 +enabled=1 +gpgcheck=1 +gpgkey=https://yum.dockerproject.org/gpg +EOF + else +cat > /etc/yum.repos.d/docker.repo << EOF +[dockerrepo] +name=Docker Repository +baseurl=https://yum.dockerproject.org/repo/main/centos/7/ +enabled=1 +gpgcheck=1 +gpgkey=https://yum.dockerproject.org/gpg +EOF + fi + # update yum repo and install docker engine + yum install -y docker-engine + # start docker service and enable docker daemon on boot + $srvstart + $srvenable +elif [[ $offer == opensuse* ]] || [[ $offer == sles* ]]; then + # ensure container only support + if [ $cascadecontainer -eq 0 ]; then + echo "only supported through shipyard container" + exit 1 + fi + # check for azure file docker volume driver disabled + if [ $azurefile -eq 1 ]; then + echo "azure file docker volume driver not supported on this sku: $sku and offer: $offer" + exit 1 + fi + # set service commands + srvstart="systemctl start docker" + srvstop="systemctl stop docker" + srvenable="systemctl enable docker" + # add Virtualization:containers repo for recent docker builds + repodir= + if [[ $offer == opensuse* ]]; then + if [[ $sku == "13.2" ]]; then + repodir=openSUSE_13.2 + elif [[ $sku == "42.1" ]]; then + repodir=openSUSE_Leap_42.1 + fi + elif [[ $offer == sles* ]]; then + if [[ $sku == "12" ]]; then + repodir=SLE_12_SP1 + elif [[ $sku == "12-sp1" ]]; then + repodir=SLE_12 + fi + fi + if [ -z $repodir ]; then + echo "unsupported sku: $sku for offer: $offer" + exit 1 + fi + # update zypper repo and install docker engine + zypper addrepo http://download.opensuse.org/repositories/Virtualization:containers/$repodir/Virtualization:containers.repo + zypper -n --no-gpg-checks ref + zypper -n in docker-1.12.0-143.1.x86_64 + # start docker service and enable docker daemon on boot + $srvstart + $srvenable else echo "unsupported offer: $offer (sku: $sku)" exit 1 @@ -203,7 +317,7 @@ if [ $cascadecontainer -eq 1 ]; then if command -v python3 > /dev/null 2>&1; then drpstart=`python3 -c 'import datetime;print(datetime.datetime.utcnow().timestamp())'` else - drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow()))'` + drpstart=`python -c 'import datetime;import time;print(time.mktime(datetime.datetime.utcnow().timetuple()))'` fi # create env file envfile=.docker_cascade_envfile @@ -216,7 +330,6 @@ npstart=$npstart drpstart=$drpstart privatereg=$privatereg p2p=$p2p -PRIVATE_REGISTRY_STORAGE_ENV=$PRIVATE_REGISTRY_STORAGE_ENV `env | grep CASCADE_` `env | grep AZ_BATCH_` `env | grep DOCKER_LOGIN_` @@ -244,4 +357,3 @@ fi if [ -z "$p2p" ]; then wait fi - diff --git a/shipyard.py b/shipyard.py index a5f1ce3..7e47dff 100755 --- a/shipyard.py +++ b/shipyard.py @@ -317,6 +317,7 @@ def setup_azurefile_volume_driver(blob_client, config): """ # check to see if binary is downloaded bin = pathlib.Path('resources/azurefile-dockervolumedriver') + bin.parent.mkdir(mode=0o750, parents=True, exist_ok=True) if (not bin.exists() or compute_md5_for_file(bin, False) != _AZUREFILE_DVD_MD5): response = urllibreq.urlopen(_AZUREFILE_DVD_URL) @@ -467,10 +468,6 @@ def add_pool(batch_client, blob_client, config): prefix = None except KeyError: prefix = None - # TODO for now, only support Ubuntu 16.04 - if (publisher != 'Canonical' or offer != 'UbuntuServer' or - sku < '16.04.0-LTS'): - raise ValueError('Unsupported Docker Host VM Config') # pick latest sku node_agent_skus = batch_client.account.list_node_agent_skus() skus_to_use = [ @@ -491,8 +488,9 @@ def add_pool(batch_client, blob_client, config): # handle azurefile docker volume driver if azurefile_vd: # only ubuntu 16.04 is supported for azurefile dvd - if (publisher != 'Canonical' or offer != 'UbuntuServer' or - sku < '16.04.0-LTS'): + if (publisher.lower() != 'canonical' or + offer.lower() != 'ubuntuserver' or + sku.lower() < '16.04.0-lts'): raise ValueError( 'Unsupported Docker Host VM Config with Azurefile ' 'Docker Volume Driver') @@ -559,7 +557,7 @@ def add_pool(batch_client, blob_client, config): ssel = config['docker_registry']['private']['storage_account_settings'] pool.start_task.environment_settings.append( batchmodels.EnvironmentSetting( - 'PRIVATE_REGISTRY_STORAGE_ENV', + 'CASCADE_PRIVATE_REGISTRY_STORAGE_ENV', '{}:{}:{}'.format( config['credentials']['storage'][ssel]['account'], config['credentials']['storage'][ssel]['endpoint'], @@ -628,6 +626,7 @@ def add_ssh_tunnel_user(batch_client, config, nodes=None): except KeyError: logger.info('not creating ssh tunnel user on pool {}'.format(pool_id)) else: + ssh_priv_key = None try: ssh_pub_key = config[ 'pool_specification']['ssh_docker_tunnel']['ssh_public_key'] @@ -650,12 +649,15 @@ def add_ssh_tunnel_user(batch_client, config, nodes=None): batch_client, pool_id, node, docker_user, ssh_pub_key) # generate tunnel script if requested if gen_tunnel_script: - ssh_args = [ - 'ssh', '-o', 'StrictHostKeyChecking=no', '-o', - 'UserKnownHostsFile=/dev/null', '-i', ssh_priv_key, + ssh_args = ['ssh'] + if ssh_priv_key is not None: + ssh_args.append('-i') + ssh_args.append(ssh_priv_key) + ssh_args.extend([ + '-o', 'StrictHostKeyChecking=no', + '-o', 'UserKnownHostsFile=/dev/null', '-p', '$2', '-N', '-L', '2375:localhost:2375', - '{}@$1'.format(docker_user) - ] + '{}@$1'.format(docker_user)]) with open(_SSH_TUNNEL_SCRIPT, 'w') as fd: fd.write('#!/usr/bin/env bash\n') fd.write('set -e\n') @@ -874,13 +876,59 @@ def _adjust_settings_for_pool_creation(config): """Adjust settings for pool creation :param dict config: configuration dict """ - publisher = config['pool_specification']['publisher'] + publisher = config['pool_specification']['publisher'].lower() + offer = config['pool_specification']['offer'].lower() + sku = config['pool_specification']['sku'].lower() + # enforce publisher/offer/sku restrictions + allowed = False + shipyard_container_required = True + if publisher == 'canonical': + if offer == 'ubuntuserver': + if sku >= '14.04.0-lts': + allowed = True + if sku >= '16.04.0-lts': + shipyard_container_required = False + elif publisher == 'credativ': + if offer == 'debian': + if sku >= '8': + allowed = True + elif publisher == 'openlogic': + if offer.startswith('centos'): + if sku >= '7': + allowed = True + elif publisher == 'redhat': + if offer == 'rhel': + if sku >= '7': + allowed = True + elif publisher == 'suse': + if offer.startswith('sles'): + if sku >= '12': + allowed = True + elif offer == 'opensuse-leap': + if sku >= '42': + allowed = True + elif offer == 'opensuse': + if sku == '13.2': + allowed = True + # oracle linux is not supported due to UEKR4 requirement + if not allowed: + raise ValueError( + ('Unsupported Docker Host VM Config, publisher={} offer={} ' + 'sku={}').format(publisher, offer, sku)) + # adjust for shipyard container requirement + if shipyard_container_required: + config['use_shipyard_docker_image'] = True + logger.warning( + ('forcing shipyard docker image to be used due to ' + 'VM config, publisher={} offer={} sku={}').format( + publisher, offer, sku)) + # adjust inter node comm setting vm_count = int(config['pool_specification']['vm_count']) try: p2p = config['data_replication']['peer_to_peer']['enabled'] except KeyError: p2p = True - max_vms = 20 if publisher.lower() == 'microsoftwindowsserver' else 40 + max_vms = 20 if publisher == 'microsoftwindowsserver' else 40 if p2p and vm_count > max_vms: logger.warning( ('disabling peer-to-peer transfer as pool size of {} exceeds ' @@ -1456,6 +1504,11 @@ def main(): _create_credentials(config) if args.action == 'addpool': + # first check if pool exists to prevent accidential metadata clear + if batch_client.pool.exists(config['pool_specification']['id']): + raise RuntimeError( + 'attempting to create a pool that already exists: {}'.format( + config['pool_specification']['id'])) create_storage_containers( blob_client, queue_client, table_client, config) clear_storage_containers(