diff --git a/appveyor.yml b/appveyor.yml index c63de5b..01811b2 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -226,7 +226,7 @@ deploy: description: 'Batch Shipyard release' auth_token: secure: +f4N6Qsv3HvJyii0Bs+8qBx3YS7+7FJUWbFSiAdEIUDubFQnNkJgFnBw0Ew2SLkv - artifact: /.*\.exe/,/.*\.exe.sha256/ + artifact: /.*\.exe.*/ draft: true prerelease: true force_update: true diff --git a/config_templates/monitor.yaml b/config_templates/monitor.yaml index d7bc368..97e8a7e 100644 --- a/config_templates/monitor.yaml +++ b/config_templates/monitor.yaml @@ -45,4 +45,4 @@ monitoring: port: 9090 scrape_interval: 10s grafana: - additional_dashboards: null + additional_dashboards: {} diff --git a/convoy/batch.py b/convoy/batch.py index 749c499..3e77d17 100644 --- a/convoy/batch.py +++ b/convoy/batch.py @@ -686,6 +686,8 @@ def _block_for_nodes_ready( suppress_confirm=True) unusable_delete = True else: + # list nodes to dump exact error + list_nodes(batch_client, config, pool_id=pool_id, nodes=nodes) raise RuntimeError( ('Unusable nodes detected in pool {}. You can delete ' 'unusable nodes with "pool nodes del --all-unusable" ' diff --git a/convoy/data.py b/convoy/data.py index 331b833..57f6041 100644 --- a/convoy/data.py +++ b/convoy/data.py @@ -59,7 +59,7 @@ from .version import __version__ logger = logging.getLogger(__name__) util.setup_logger(logger) # global defines -_BLOBXFER_VERSION = '1.3.0' +_BLOBXFER_VERSION = '1.3.1' _MEGABYTE = 1048576 _MAX_READ_BLOCKSIZE_BYTES = 4194304 _FILE_SPLIT_PREFIX = '_shipyard-' diff --git a/docker/windows/cargo/Dockerfile b/docker/windows/cargo/Dockerfile index 920a22a..2c6c132 100644 --- a/docker/windows/cargo/Dockerfile +++ b/docker/windows/cargo/Dockerfile @@ -1,7 +1,7 @@ # Dockerfile for Azure/batch-shipyard Cargo (Windows) # Adapted from: https://github.com/StefanScherer/dockerfiles-windows/blob/master/python/Dockerfile -FROM python:3.6.5-windowsservercore-ltsc2016 +FROM python:3.6.6-windowsservercore-ltsc2016 MAINTAINER Fred Park ENV chocolateyUseWindowsCompression false @@ -30,8 +30,8 @@ COPY --from=0 /batch-shipyard/cargo /batch-shipyard SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -ENV PYTHON_VERSION 3.6.5 -ENV PYTHON_PIP_VERSION 9.0.3 +ENV PYTHON_VERSION 3.6.6 +ENV PYTHON_PIP_VERSION 10.0.1 RUN $env:PATH = 'C:\Python;C:\Python\Scripts;{0}' -f $env:PATH ; \ Set-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment\' -Name Path -Value $env:PATH ; \ diff --git a/docker/windows/cli/Dockerfile b/docker/windows/cli/Dockerfile index 17a1d65..a769816 100644 --- a/docker/windows/cli/Dockerfile +++ b/docker/windows/cli/Dockerfile @@ -1,7 +1,7 @@ # Dockerfile for Azure/batch-shipyard CLI (Windows) # Adapted from: https://github.com/StefanScherer/dockerfiles-windows/blob/master/python/Dockerfile -FROM python:3.6.5-windowsservercore-ltsc2016 +FROM python:3.6.6-windowsservercore-ltsc2016 MAINTAINER Fred Park ENV chocolateyUseWindowsCompression false @@ -34,8 +34,8 @@ COPY --from=0 /batch-shipyard /batch-shipyard SHELL ["powershell", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"] -ENV PYTHON_VERSION 3.6.5 -ENV PYTHON_PIP_VERSION 9.0.3 +ENV PYTHON_VERSION 3.6.6 +ENV PYTHON_PIP_VERSION 10.0.1 RUN $env:PATH = 'C:\Python;C:\Python\Scripts;{0}' -f $env:PATH ; \ Set-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Control\Session Manager\Environment\' -Name Path -Value $env:PATH ; \ diff --git a/docs/14-batch-shipyard-configuration-jobs.md b/docs/14-batch-shipyard-configuration-jobs.md index b12ef7b..fab9050 100644 --- a/docs/14-batch-shipyard-configuration-jobs.md +++ b/docs/14-batch-shipyard-configuration-jobs.md @@ -448,7 +448,13 @@ to both compute node A and B. However, if `job-1`:`task-3` is then run on compute node A after `job-1`:`task-1`, then the `input_data` is not transferred again. This object currently supports `azure_batch` and `azure_storage` as members. - * `azure_batch` contains the following members: + * `azure_batch` will transfer data from a compute node that has run the + specified task. Note that there is no implied dependency. It is + recommended to specify a `depends_on` in order to ensure that the + specified task runs before this one (note that `depends_on` requires + that the upstream task must exist within the same job). Additionally, + the compute node which ran the task must not have been deleted or + resized out of the pool. * (required) `job_id` the job id of the task * (required) `task_id` the id of the task to fetch files from * (optional) `include` is an array of include filters @@ -669,7 +675,13 @@ ingressed for this specific task. This object currently supports `azure_batch` and `azure_storage` as members. Note for multi-instance tasks, transfer of `input_data` is only applied to the task running the application command. - * `azure_batch` contains the following members: + * `azure_batch` will transfer data from a compute node that has run the + specified task. Note that there is no implied dependency. It is + recommended to specify a `depends_on` in order to ensure that the + specified task runs before this one (note that `depends_on` requires + that the upstream task must exist within the same job). Additionally, + the compute node which ran the task must not have been deleted or + resized out of the pool. * (required) `job_id` the job id of the task * (required) `task_id` the id of the task to fetch files from * (optional) `include` is an array of include filters diff --git a/docs/16-batch-shipyard-configuration-monitor.md b/docs/16-batch-shipyard-configuration-monitor.md index 94a8370..2219917 100644 --- a/docs/16-batch-shipyard-configuration-monitor.md +++ b/docs/16-batch-shipyard-configuration-monitor.md @@ -46,7 +46,7 @@ monitoring: port: 9090 scrape_interval: 10s grafana: - additional_dashboards: null + additional_dashboards: {} ``` The `monitoring` property has the following members: @@ -113,7 +113,8 @@ resource monitor. be exclusive to the resource monitor and cannot be shared with other resources, including Batch compute nodes. Batch compute nodes and storage clusters can co-exist on the same virtual network, but should be in - separate subnets. + separate subnets. It's recommended that the monitor VM be in a separate + subnet as well. * (required) `name` is the subnet name. * (required) `address_prefix` is the subnet address prefix to use for allocation of the resource monitor virtual machine to. @@ -126,7 +127,7 @@ to the resource monitoring virtual machine. * (optional) `grafana` rule allows grafana HTTPS (443) server port to be exposed to the specified address prefix. Multiple address prefixes can be specified. - * (optional) `prometheus` rule allows the Prometheus server pot to be + * (optional) `prometheus` rule allows the Prometheus server port to be exposed to the specified address prefix. Multiple address prefixes can be specified. * (optional) `custom_inbound_rules` are custom inbound rules for other diff --git a/docs/66-batch-shipyard-resource-monitoring.md b/docs/66-batch-shipyard-resource-monitoring.md index 247fd97..1d1b0c1 100644 --- a/docs/66-batch-shipyard-resource-monitoring.md +++ b/docs/66-batch-shipyard-resource-monitoring.md @@ -278,10 +278,12 @@ shipyard monitor add --poolid mybatchpool ``` After the monitor is added, you can point your web browser at the -monitoring resource FQDN emitted above. You can remove individual -resources to monitor with the command `shipyard monitor remove`. -Once you have no need for your monitoring resource, you can either suspend -it or remove it altogether. +monitoring resource FQDN emitted above. Note that there will be a delay +between `monitor add` and the resource showing up in Grafana. + +You can remove individual resources to monitor with the command +`monitor remove`. Once you have no need for your monitoring resource, you +can either suspend it or destroy it entirely. ```shell # remove the prior Batch pool monitor diff --git a/requirements.txt b/requirements.txt index 632938a..a5d855c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ adal==1.0.2 azure-batch==4.1.3 azure-cosmosdb-table==1.0.3 -azure-keyvault==1.0.0b1 +azure-keyvault==1.0.0 azure-mgmt-authorization==0.50.0 azure-mgmt-batch==5.0.1 azure-mgmt-compute==3.1.0rc3 @@ -11,7 +11,7 @@ azure-mgmt-storage==2.0.0rc3 azure-storage-blob==1.3.0 azure-storage-common==1.3.0 azure-storage-file==1.3.0 -blobxfer==1.3.0 +blobxfer==1.3.1 click==6.7 future==0.16.0 futures==3.2.0; python_version < '3' diff --git a/scripts/shipyard_nodeprep.sh b/scripts/shipyard_nodeprep.sh index de112df..62ca115 100755 --- a/scripts/shipyard_nodeprep.sh +++ b/scripts/shipyard_nodeprep.sh @@ -10,7 +10,7 @@ DOCKER_CE_VERSION_DEBIAN=18.03.1 DOCKER_CE_VERSION_CENTOS=18.03.1 DOCKER_CE_VERSION_SLES=17.09.1 NVIDIA_DOCKER_VERSION=2.0.3 -GLUSTER_VERSION_DEBIAN=4.0 +GLUSTER_VERSION_DEBIAN=4.1 GLUSTER_VERSION_CENTOS=40 # consts diff --git a/scripts/shipyard_remotefs_bootstrap.sh b/scripts/shipyard_remotefs_bootstrap.sh index 44a125d..7060a31 100755 --- a/scripts/shipyard_remotefs_bootstrap.sh +++ b/scripts/shipyard_remotefs_bootstrap.sh @@ -6,7 +6,7 @@ set -o pipefail export DEBIAN_FRONTEND=noninteractive # constants -GLUSTER_VERSION=4.0 +GLUSTER_VERSION=4.1 gluster_brick_mountpath=/gluster/brick gluster_brick_location=$gluster_brick_mountpath/brick0 ipaddress=$(ip addr list eth0 | grep "inet " | cut -d' ' -f6 | cut -d/ -f1)