diff --git a/convoy/batch.py b/convoy/batch.py index f5fe4ac..b32fd78 100644 --- a/convoy/batch.py +++ b/convoy/batch.py @@ -4402,8 +4402,11 @@ def _construct_mpi_command(pool, task): ), '-ppn $({})'.format(processes_per_node) ]) + if task.infiniband and settings.is_sriov_rdma_pool(pool.vm_size): + ib_pkey_file = '$AZ_BATCH_NODE_STARTUP_DIR/wd/UCX_IB_PKEY' + mpi_opts.append( + '-env $(cat {})'.format(ib_pkey_file)) elif task.multi_instance.mpi.runtime == 'openmpi': - mpi_opts.append('--mca btl_tcp_if_include eth0') if isinstance(processes_per_node, int): mpi_opts.extend([ '--oversubscribe', @@ -4425,6 +4428,16 @@ def _construct_mpi_command(pool, task): '--map-by ppr:$({}):node'.format( processes_per_node) ]) + if task.infiniband and settings.is_sriov_rdma_pool(pool.vm_size): + ib_pkey_file = '$AZ_BATCH_NODE_STARTUP_DIR/wd/UCX_IB_PKEY' + mpi_opts.extend([ + '--mca pml ucx', + '--mca btl ^vader,tcp,openib', + '-x UCX_NET_DEVICES=mlx5_0:1', + '-x $(cat {})'.format(ib_pkey_file) + ]) + else: + mpi_opts.append('--mca btl_tcp_if_include eth0') is_singularity = util.is_not_empty(task.singularity_image) if is_singularity: # build the singularity mpi command diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/README.md b/recipes/OpenFOAM-Infiniband-OpenMPI/README.md new file mode 100644 index 0000000..7ede70d --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/README.md @@ -0,0 +1,62 @@ +# OpenFOAM-Infiniband-OpenMPI +This recipe shows how to run [OpenFOAM](http://www.openfoam.org/) +on Linux using OpenMPI over Infiniband in an Azure Batch compute pool. +Execution of this distributed workload requires the use of +[multi-instance tasks](../../docs/80-batch-shipyard-multi-instance-tasks.md). + +## Configuration +Please see refer to this [set of sample configuration files](./config) for +this recipe. + +### Pool Configuration +The pool configuration should enable the following properties: +* `inter_node_communication_enabled` must be set to `true` +* `per_job_auto_scratch` must be set to `true`. A job autoscratch is needed to + share a common input data set between the nodes. +* `max_tasks_per_node` must be set to 1 or omitted +* `vm_configuration` must be defined + * `platform_image` must be defined + * `offer` must be set to `CentOS-HPC` + * `publisher` must be set to `OpenLogic` + * `sku` must be set to `CentOS-HPC` +* `vm_size` must be set to `7.6` + +### Global Configuration +The global configuration should set the following properties: +* `docker_images` array must have a reference to a valid OpenFOAM image +that can be run with Open MPI and Infiniband in a Docker container context. +This can be `vincentlabo/openfoam:openmpi-ib` which is published on +[Docker Hub](https://hub.docker.com/r/vincentlabo/openfoam). + +### Jobs Configuration +The jobs configuration should set the following properties within the `tasks` +array which should have a task definition containing: +* `docker_image` should be the name of the Docker image for this container invocation. +For this example, this should be `vincentlabo/openfoam:openmpi-ib`. +* `resource_files` should contain the `set_up_sample.sh` script which set up +the sample and export environement variables used by `mpi` `options`. +* `additional_docker_run_options` should contain the `--cap-add=sys_nice` +option. +* `multi_instance` property must be defined + * `num_instances` should be set to `pool_specification_vm_count_dedicated`, + `pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or + `pool_current_low_priority` + * `coordination_command` should be unset or `null`. For pools with + `native` container support, this command should be supplied if + a non-standard `sshd` is required. + * `resource_files` array can be empty + * `pre_execution_command` should source the `set_up_sample.sh` script. + * `mpi` property must be defined + * `runtime` should be set to `openmpi` + * `options` should contains `-np $np`, `--hostfile $hostfile`, and + `-x $mpienvopts`. These options use the environemnt variables set by + the `set_up_sample.sh` script. +* `command` should contain the command to pass to the `mpirun` invocation. +For this example, the application `command` to run would be: +`simpleFoam -parallel` + +## Dockerfile and supplementary files +The `Dockerfile` for the Docker image can be found [here](./docker). Please +note that you must agree with the +[OpenFOAM license](http://openfoam.org/licence/) before using this Docker +image. diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/config/config.yaml b/recipes/OpenFOAM-Infiniband-OpenMPI/config/config.yaml new file mode 100644 index 0000000..e97d7b8 --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/config/config.yaml @@ -0,0 +1,5 @@ +batch_shipyard: + storage_account_settings: mystorageaccount +global_resources: + docker_images: + - vincentlabo/openfoam:openmpi-ib diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/config/credentials.yaml b/recipes/OpenFOAM-Infiniband-OpenMPI/config/credentials.yaml new file mode 100644 index 0000000..517051e --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/config/credentials.yaml @@ -0,0 +1,9 @@ +credentials: + batch: + account_key: + account_service_url: + storage: + mystorageaccount: + account: + account_key: + endpoint: core.windows.net diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/config/jobs.yaml b/recipes/OpenFOAM-Infiniband-OpenMPI/config/jobs.yaml new file mode 100644 index 0000000..1424734 --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/config/jobs.yaml @@ -0,0 +1,21 @@ +job_specifications: +- id: docker-openfoam-openmpi-ib-job + auto_complete: true + auto_scratch: true + tasks: + - docker_image: vincentlabo/openfoam:openmpi-ib + additional_docker_run_options: + - --cap-add=sys_nice + resource_files: + - blob_source: https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/OpenFOAM-Infiniband-OpenMPI/docker/set_up_sample.sh + file_path: set_up_sample.sh + multi_instance: + num_instances: pool_current_dedicated + pre_execution_command: source set_up_sample.sh + mpi: + runtime: openmpi + options: + - -np $np + - --hostfile $hostfile + - -x $mpienvopts + command: simpleFoam -parallel diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/config/pool.yaml b/recipes/OpenFOAM-Infiniband-OpenMPI/config/pool.yaml new file mode 100644 index 0000000..d975750 --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/config/pool.yaml @@ -0,0 +1,15 @@ +pool_specification: + id: docker-openfoam-openmpi-ib + vm_configuration: + platform_image: + offer: CentOS-HPC + publisher: OpenLogic + sku: '7.6' + vm_count: + dedicated: 2 + low_priority: 0 + vm_size: STANDARD_HB60rs + inter_node_communication_enabled: true + per_job_auto_scratch: true + ssh: + username: shipyard diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/docker/Dockerfile b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/Dockerfile new file mode 100644 index 0000000..7635543 --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/Dockerfile @@ -0,0 +1,91 @@ +FROM centos:7.6.1810 + +# set up base +COPY ssh_config /root/.ssh/config +RUN yum install -y epel-release \ + && yum groupinstall -y "Development tools" \ + && yum install -y \ + ca-certificates wget openssh-server openssh-clients net-tools \ + numactl-devel gtk2 atk cairo tcsh libnl3 tcl libmnl tk cmake3 \ + libXt-devel qt-devel qt5-qtbase-devel qt5-qtx11extras-devel \ + qt5-qttools-devel +ENV PATH=$PATH:/usr/lib64/qt5/bin + +# configure cmake alias +RUN alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 10 \ + --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 \ + --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 \ + --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 \ + --family cmake + +# set up ssh keys +RUN mkdir -p /var/run/sshd \ + && ssh-keygen -A \ + && sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config \ + && sed -i 's/#PermitRootLogin yes/PermitRootLogin yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#RSAAuthentication yes/RSAAuthentication yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/g' /etc/ssh/sshd_config \ + && ssh-keygen -f /root/.ssh/id_rsa -t rsa -N '' \ + && chmod 600 /root/.ssh/config \ + && chmod 700 /root/.ssh \ + && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + +# download and install mlnx +RUN wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-4.6-1.0.1.1/MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64.tgz | tar -xzf - \ + && ./MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64/mlnxofedinstall --user-space-only --without-fw-update --all --force \ + && rm -rf MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64 + +# download and install HPC-X +ENV HPCX_VERSION="v2.4.1" +RUN cd /opt && \ + wget -q -O - ftp://bgate.mellanox.com/uploads/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64.tbz | tar -xjf - \ + && HPCX_PATH=/opt/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64 \ + && HCOLL_PATH=${HPCX_PATH}/hcoll \ + && UCX_PATH=${HPCX_PATH}/ucx + +# download and install OpenMPI +ENV OMPI_VERSION="4.0.1" +RUN wget -q -O - https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OMPI_VERSION}.tar.gz | tar -xzf - \ + && cd openmpi-${OMPI_VERSION} \ + && ./configure --with-ucx=${UCX_PATH} --with-hcoll=${HCOLL_PATH} --enable-mpirun-prefix-by-default \ + && make -j 8 && make install \ + && cd .. \ + && rm -rf openmpi-${OMPI_VERSION} + +# download and isntall OpenFOAM +RUN mkdir -p /opt/OpenFOAM \ + && cd /opt/OpenFOAM \ + && wget -q -O - http://dl.openfoam.org/source/7 | tar xz \ + && wget -q -O - http://dl.openfoam.org/third-party/7 | tar xz \ + && mv OpenFOAM-7-version-7 OpenFOAM-7 \ + && mv ThirdParty-7-version-7 ThirdParty-7 \ + && sed -i 's/FOAM_INST_DIR=$HOME\/\$WM_PROJECT/FOAM_INST_DIR=\/opt\/\$WM_PROJECT/' /opt/OpenFOAM/OpenFOAM-7/etc/bashrc \ + && source /opt/OpenFOAM/OpenFOAM-7/etc/bashrc \ + # install OpenFOAM dependency - Scotch/PT-Scotch + && /opt/OpenFOAM/ThirdParty-7/Allwmake \ + # install OpenFOAM dependency - ParaView + && /opt/OpenFOAM/ThirdParty-7/makeParaView -config \ + && sed -i '/DOCUMENTATION_DIR "\${CMAKE_CURRENT_SOURCE_DIR}\/doc"/d' /opt/OpenFOAM/ThirdParty-7/ParaView-5.6.0/Plugins/MOOSETools/CMakeLists.txt \ + && sed -i '/DOCUMENTATION_DIR "\${CMAKE_CURRENT_SOURCE_DIR}\/doc"/d' /opt/OpenFOAM/ThirdParty-7/ParaView-5.6.0/Plugins/SurfaceLIC/CMakeLists.txt \ + && /opt/OpenFOAM/ThirdParty-7/makeParaView \ + # install OpenFOAM + && wmRefresh \ + && /opt/OpenFOAM/OpenFOAM-7/Allwmake -j \ + # hack to make sure that sourcing /opt/OpenFOAM/OpenFOAM-7/etc/bashrc does not fail with `set -e` + && sed -i 's/unalias wmRefresh 2> \/dev\/null/unalias wmRefresh 2> \/dev\/null || true/' /opt/OpenFOAM/OpenFOAM-7/etc/config.sh/aliases \ + # remove intermediate build files + && rm -rf \ + /opt/OpenFOAM/OpenFOAM-7/platforms/*/applications \ + /opt/OpenFOAM/OpenFOAM-7/platforms/*/src \ + /opt/OpenFOAM/ThirdParty-7/build \ + /opt/OpenFOAM/ThirdParty-7/gcc-* \ + /opt/OpenFOAM/ThirdParty-7/gmp-* \ + /opt/OpenFOAM/ThirdParty-7/mpfr-* \ + /opt/OpenFOAM/ThirdParty-7/binutils-* \ + /opt/OpenFOAM/ThirdParty-7/boost* \ + /opt/OpenFOAM/ThirdParty-7/ParaView-* \ + /opt/OpenFOAM/ThirdParty-7/qt-* + +# set up sshd on port 23 +EXPOSE 23 +CMD ["/usr/sbin/sshd", "-D", "-p", "23"] diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/docker/README.md b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/README.md new file mode 100644 index 0000000..21f603a --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/README.md @@ -0,0 +1,3 @@ +# Dockerfile for OpenFOAM-Infiniband-OpenMPI +You must agree to the [OpenFOAM license](http://openfoam.org/licence/) +prior to use. diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/docker/set_up_sample.sh b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/set_up_sample.sh new file mode 100644 index 0000000..418a524 --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/set_up_sample.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail + +# set up openfoam env +OPENFOAM_DIR=/opt/OpenFOAM/OpenFOAM-7 +source $OPENFOAM_DIR/etc/bashrc + +# copy sample into auto scratch shared area +AUTO_SCRATCH_DIR=$AZ_BATCH_TASK_DIR/auto_scratch +cd $AUTO_SCRATCH_DIR +cp -r $OPENFOAM_DIR/tutorials/incompressible/simpleFoam/pitzDaily . +cp $OPENFOAM_DIR/tutorials/incompressible/simpleFoam/pitzDailyExptInlet/system/decomposeParDict pitzDaily/system/ + +# get nodes and compute number of processors +IFS=',' read -ra HOSTS <<< "$AZ_BATCH_HOST_LIST" +nodes=${#HOSTS[@]} +ppn=`nproc` +np=$(($nodes * $ppn)) + +# substitute proper number of subdomains +sed -i -e "s/^numberOfSubdomains 4/numberOfSubdomains $np;/" pitzDaily/system/decomposeParDict +root=`python -c "import math; x=int(math.sqrt($np)); print x if x*x==$np else -1"` +if [ $root -eq -1 ]; then + sed -i -e "s/\s*n\s*(2 2 1)/ n ($ppn $nodes 1)/g" pitzDaily/system/decomposeParDict +else + sed -i -e "s/\s*n\s*(2 2 1)/ n ($root $root 1)/g" pitzDaily/system/decomposeParDict +fi + +# decompose +cd pitzDaily +blockMesh +decomposePar -force + +# create hostfile +hostfile="hostfile" +touch $hostfile +>| $hostfile +for node in "${HOSTS[@]}" +do + echo $node slots=$ppn max-slots=$ppn >> $hostfile +done + +# export parameters +mpienvopts=`echo \`env | grep "WM_\|FOAM_" | sed -e "s/=.*$//"\` | sed -e "s/ / -x /g"` +mpienvopts="$mpienvopts -x PATH -x LD_LIBRARY_PATH -x MPI_BUFFER_SIZE" +export mpienvopts +export np +export hostfile diff --git a/recipes/OpenFOAM-Infiniband-OpenMPI/docker/ssh_config b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/ssh_config new file mode 100644 index 0000000..e0e128e --- /dev/null +++ b/recipes/OpenFOAM-Infiniband-OpenMPI/docker/ssh_config @@ -0,0 +1,4 @@ +Host * + Port 23 + StrictHostKeyChecking no + UserKnownHostsFile /dev/null diff --git a/recipes/OpenFOAM-TCP-OpenMPI/README.md b/recipes/OpenFOAM-TCP-OpenMPI/README.md index 5112c28..b1d2c83 100644 --- a/recipes/OpenFOAM-TCP-OpenMPI/README.md +++ b/recipes/OpenFOAM-TCP-OpenMPI/README.md @@ -22,12 +22,6 @@ that can be run with MPI in a Docker container context. This can be `alfpark/openfoam:4.0-gcc-openmpi` or `alfpark/openfoam:v1606plus-gcc-openmpi` which are published on [Docker Hub](https://hub.docker.com/r/alfpark/openfoam). -* `volumes` must be populated with the following: - * `shared_data_volumes` should contain an Azure File Docker volume driver, - a GlusterFS share or a manually configured NFS share. Batch - Shipyard has automatic support for setting up Azure File Docker Volumes - and GlusterFS, please refer to the - [Batch Shipyard Configuration doc](../../docs/10-batch-shipyard-configuration.md). ### Jobs Configuration The jobs configuration should set the following properties within the `tasks` @@ -36,11 +30,9 @@ array which should have a task definition containing: For this example, this should be `alfpark/openfoam:4.0-gcc-openmpi`. * `resource_files` should contain the `set_up_sample.sh` script which set up the sample and export environement variables used by `mpi` `options`. -* `shared_data_volumes` should have a valid volume name as defined in the -global configuration file. Please see the previous section for details. * `multi_instance` property must be defined * `num_instances` should be set to `pool_specification_vm_count_dedicated`, - `pool_vm_count_low_priority`, `pool_current_dedicated`, or + `pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or `pool_current_low_priority` * `coordination_command` should be unset or `null`. For pools with `native` container support, this command should be supplied if diff --git a/recipes/OpenFOAM-TCP-OpenMPI/config/jobs.yaml b/recipes/OpenFOAM-TCP-OpenMPI/config/jobs.yaml index ef99c80..a32ec6a 100644 --- a/recipes/OpenFOAM-TCP-OpenMPI/config/jobs.yaml +++ b/recipes/OpenFOAM-TCP-OpenMPI/config/jobs.yaml @@ -5,7 +5,7 @@ job_specifications: tasks: - docker_image: alfpark/openfoam:4.0-gcc-openmpi resource_files: - - blob_source: https://raw.githubusercontent.com/vincentlabonte/batch-shipyard/singularity3/recipes/OpenFOAM-TCP-OpenMPI/docker/set_up_sample.sh + - blob_source: https://raw.githubusercontent.com/Azure/batch-shipyard/master/recipes/OpenFOAM-TCP-OpenMPI/docker/set_up_sample.sh file_path: set_up_sample.sh multi_instance: num_instances: pool_current_dedicated diff --git a/recipes/mpiBench-Infiniband-MPICH/README.md b/recipes/mpiBench-Infiniband-MPICH/README.md new file mode 100644 index 0000000..eb15d19 --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/README.md @@ -0,0 +1,52 @@ +# mpiBench-Infiniband-MPICH +This recipe shows how to run the +[mpiBench](https://github.com/LLNL/mpiBench) benchmark +on Linux using MPICH and Infiniband over Azure VM instances in an Azure +Batch compute pool. Execution of this distributed workload requires the use of +[multi-instance tasks](../../docs/80-batch-shipyard-multi-instance-tasks.md). + +## Configuration +Please see refer to the [set of sample configuration files](./config) for +this recipe. + +### Pool Configuration +The pool configuration should enable the following properties: +* `inter_node_communication_enabled` must be set to `true` +* `max_tasks_per_node` must be set to 1 or omitted +* `vm_configuration` must be defined + * `platform_image` must be defined + * `offer` must be set to `CentOS-HPC` + * `publisher` must be set to `OpenLogic` + * `sku` must be set to `CentOS-HPC` +* `vm_size` must be set to `STANDARD_HB60rs` + +### Global Configuration +The global configuration should set the following properties: +* `docker_images` array must have a reference to a valid mpiBench image that +can be run with MPICH. This can be `vincentlabo/mpibench:mpich-ib` which +is published on [Docker Hub](https://hub.docker.com/r/vincentlabo/mpibench). + +### Jobs Configuration +The jobs configuration should set the following properties within the `tasks` +array which should have a task definition containing: +* `docker_image` should be the name of the Docker image for this container +invocation. For this example, this should be `vincentlabo/mpibench:mpich-ib`. +* `command` should contain the command to pass to the `mpirun` invocation. +For this example, we will run mpiBench with an ending message size of 1kB. +The application `command` to run would be: `/mpiBench/mpiBench -e 1K` +* `multi_instance` property must be defined + * `num_instances` should be set to `pool_specification_vm_count_dedicated`, + `pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or + `pool_current_low_priority` + * `coordination_command` should be unset or `null`. For pools with + `native` container support, this command should be supplied if + a non-standard `sshd` is required. + * `resource_files` should be unset or the array can be empty + * `mpi` property must be defined + * `runtime` should be set to `mpich` + * `processes_per_node` should be set to `nproc` + +## Supplementary files +The `Dockerfile` for the Docker image can be found [here](./docker). +The Singularity Definition file for the Singularity image can be found +[here](./singularity). diff --git a/recipes/mpiBench-Infiniband-MPICH/config/config.yaml b/recipes/mpiBench-Infiniband-MPICH/config/config.yaml new file mode 100644 index 0000000..685fd10 --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/config/config.yaml @@ -0,0 +1,5 @@ +batch_shipyard: + storage_account_settings: mystorageaccount +global_resources: + docker_images: + - vincentlabo/mpibench:mpich-ib diff --git a/recipes/mpiBench-Infiniband-MPICH/config/credentials.yaml b/recipes/mpiBench-Infiniband-MPICH/config/credentials.yaml new file mode 100644 index 0000000..517051e --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/config/credentials.yaml @@ -0,0 +1,9 @@ +credentials: + batch: + account_key: + account_service_url: + storage: + mystorageaccount: + account: + account_key: + endpoint: core.windows.net diff --git a/recipes/mpiBench-Infiniband-MPICH/config/jobs.yaml b/recipes/mpiBench-Infiniband-MPICH/config/jobs.yaml new file mode 100644 index 0000000..367d25a --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/config/jobs.yaml @@ -0,0 +1,14 @@ +job_specifications: +- id: mpibench-mpich-ib + auto_complete: true + shm_size: 256m + tasks: + - docker_image: vincentlabo/mpibench:mpich-ib + additional_docker_run_options: + - --privileged + multi_instance: + num_instances: pool_current_dedicated + mpi: + runtime: mpich + processes_per_node: nproc + command: /mpiBench/mpiBench -e 1K diff --git a/recipes/mpiBench-Infiniband-MPICH/config/pool.yaml b/recipes/mpiBench-Infiniband-MPICH/config/pool.yaml new file mode 100644 index 0000000..36f90c5 --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/config/pool.yaml @@ -0,0 +1,14 @@ +pool_specification: + id: docker-mpibench-mpich-ib + vm_configuration: + platform_image: + offer: CentOS-HPC + publisher: OpenLogic + sku: '7.6' + vm_count: + dedicated: 2 + low_priority: 0 + vm_size: STANDARD_HB60rs + inter_node_communication_enabled: true + ssh: + username: shipyard diff --git a/recipes/mpiBench-Infiniband-MPICH/docker/Dockerfile b/recipes/mpiBench-Infiniband-MPICH/docker/Dockerfile new file mode 100644 index 0000000..ca5df4e --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/docker/Dockerfile @@ -0,0 +1,57 @@ +FROM centos:7.6.1810 + +# set up base +COPY ssh_config /root/.ssh/config +RUN yum install -y epel-release \ + && yum groupinstall -y "Development tools" \ + && yum install -y \ + ca-certificates wget openssh-server openssh-clients net-tools \ + numactl-devel gtk2 atk cairo tcsh libnl3 tcl libmnl tk + +# set up ssh keys +RUN mkdir -p /var/run/sshd \ + && ssh-keygen -A \ + && sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config \ + && sed -i 's/#PermitRootLogin yes/PermitRootLogin yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#RSAAuthentication yes/RSAAuthentication yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/g' /etc/ssh/sshd_config \ + && ssh-keygen -f /root/.ssh/id_rsa -t rsa -N '' \ + && chmod 600 /root/.ssh/config \ + && chmod 700 /root/.ssh \ + && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + +# set up workdir +ENV INSTALL_PREFIX=/opt +WORKDIR /tmp/mpi + +# download and install mlnx +RUN wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-4.6-1.0.1.1/MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64.tgz | tar -xzf - \ + && ./MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64/mlnxofedinstall --user-space-only --without-fw-update --all --force \ + && rm -rf MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64 + +# download and install HPC-X +ENV HPCX_VERSION="v2.4.1" +RUN cd ${INSTALL_PREFIX} && \ + wget -q -O - ftp://bgate.mellanox.com/uploads/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64.tbz | tar -xjf - \ + && HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64 \ + && HCOLL_PATH=${HPCX_PATH}/hcoll \ + && UCX_PATH=${HPCX_PATH}/ucx + +# download and install OpenMPI +ENV MPICH_VERSION="3.3" +RUN wget -q -O - wget https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz | tar -xzf - \ + && cd mpich-${MPICH_VERSION} \ + && ./configure --with-ucx=${UCX_PATH} --with-hcoll=${HCOLL_PATH} --enable-mpirun-prefix-by-default --with-device=ch4:ucx \ + && make -j 8 && make install \ + && cd .. \ + && rm -rf mpich-${MPICH_VERSION} + +# download and install mpiBench +RUN wget -q -O - https://codeload.github.com/LLNL/mpiBench/tar.gz/master | tar -xzf - \ + && mv ./mpiBench-master /mpiBench \ + && cd /mpiBench \ + && make + +# set up sshd on port 23 +EXPOSE 23 +CMD ["/usr/sbin/sshd", "-D", "-p", "23"] diff --git a/recipes/mpiBench-Infiniband-MPICH/docker/README.md b/recipes/mpiBench-Infiniband-MPICH/docker/README.md new file mode 100644 index 0000000..0d7801b --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/docker/README.md @@ -0,0 +1,3 @@ +# Dockerfile for mpiBench-Infiniband-MPICH +This image can be found on +[Docker Hub](https://hub.docker.com/r/vincentlabo/mpibench). \ No newline at end of file diff --git a/recipes/mpiBench-Infiniband-MPICH/docker/ssh_config b/recipes/mpiBench-Infiniband-MPICH/docker/ssh_config new file mode 100644 index 0000000..e0e128e --- /dev/null +++ b/recipes/mpiBench-Infiniband-MPICH/docker/ssh_config @@ -0,0 +1,4 @@ +Host * + Port 23 + StrictHostKeyChecking no + UserKnownHostsFile /dev/null diff --git a/recipes/mpiBench-Infiniband-OpenMPI/README.md b/recipes/mpiBench-Infiniband-OpenMPI/README.md new file mode 100644 index 0000000..b1f34a6 --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/README.md @@ -0,0 +1,52 @@ +# mpiBench-Infiniband-OpenMPI +This recipe shows how to run the +[mpiBench](https://github.com/LLNL/mpiBench) benchmark +on Linux using Open MPI and Infiniband over Azure VM instances in an Azure +Batch compute pool. Execution of this distributed workload requires the use of +[multi-instance tasks](../../docs/80-batch-shipyard-multi-instance-tasks.md). + +## Configuration +Please see refer to the [set of sample configuration files](./config) for +this recipe. + +### Pool Configuration +The pool configuration should enable the following properties: +* `inter_node_communication_enabled` must be set to `true` +* `max_tasks_per_node` must be set to 1 or omitted +* `vm_configuration` must be defined + * `platform_image` must be defined + * `offer` must be set to `CentOS-HPC` + * `publisher` must be set to `OpenLogic` + * `sku` must be set to `CentOS-HPC` +* `vm_size` must be set to `STANDARD_HB60rs` + +### Global Configuration +The global configuration should set the following properties: +* `docker_images` array must have a reference to a valid mpiBench image that +can be run with Open MPI. This can be `vincentlabo/mpibench:openmpi-ib` which +is published on [Docker Hub](https://hub.docker.com/r/vincentlabo/mpibench). + +### Jobs Configuration +The jobs configuration should set the following properties within the `tasks` +array which should have a task definition containing: +* `docker_image` should be the name of the Docker image for this container +invocation. For this example, this should be `vincentlabo/mpibench:openmpi-ib`. +* `command` should contain the command to pass to the `mpirun` invocation. +For this example, we will run mpiBench with an ending message size of 1kB. +The application `command` to run would be: `/mpiBench/mpiBench -e 1K` +* `multi_instance` property must be defined + * `num_instances` should be set to `pool_specification_vm_count_dedicated`, + `pool_specification_vm_count_low_priority`, `pool_current_dedicated`, or + `pool_current_low_priority` + * `coordination_command` should be unset or `null`. For pools with + `native` container support, this command should be supplied if + a non-standard `sshd` is required. + * `resource_files` should be unset or the array can be empty + * `mpi` property must be defined + * `runtime` should be set to `openmpi` + * `processes_per_node` should be set to `nproc` + +## Supplementary files +The `Dockerfile` for the Docker image can be found [here](./docker). +The Singularity Definition file for the Singularity image can be found +[here](./singularity). diff --git a/recipes/mpiBench-Infiniband-OpenMPI/config/config.yaml b/recipes/mpiBench-Infiniband-OpenMPI/config/config.yaml new file mode 100644 index 0000000..b1f8faf --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/config/config.yaml @@ -0,0 +1,5 @@ +batch_shipyard: + storage_account_settings: mystorageaccount +global_resources: + docker_images: + - vincentlabo/mpibench:openmpi-ib diff --git a/recipes/mpiBench-Infiniband-OpenMPI/config/credentials.yaml b/recipes/mpiBench-Infiniband-OpenMPI/config/credentials.yaml new file mode 100644 index 0000000..517051e --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/config/credentials.yaml @@ -0,0 +1,9 @@ +credentials: + batch: + account_key: + account_service_url: + storage: + mystorageaccount: + account: + account_key: + endpoint: core.windows.net diff --git a/recipes/mpiBench-Infiniband-OpenMPI/config/jobs.yaml b/recipes/mpiBench-Infiniband-OpenMPI/config/jobs.yaml new file mode 100644 index 0000000..fcf1ec0 --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/config/jobs.yaml @@ -0,0 +1,13 @@ +job_specifications: +- id: mpibench-openmpi-ib + auto_complete: true + tasks: + - docker_image: vincentlabo/mpibench:openmpi-ib + additional_docker_run_options: + - --privileged + multi_instance: + num_instances: pool_current_dedicated + mpi: + runtime: openmpi + processes_per_node: nproc + command: /mpiBench/mpiBench -e 1K diff --git a/recipes/mpiBench-Infiniband-OpenMPI/config/pool.yaml b/recipes/mpiBench-Infiniband-OpenMPI/config/pool.yaml new file mode 100644 index 0000000..d27174f --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/config/pool.yaml @@ -0,0 +1,14 @@ +pool_specification: + id: docker-mpibench-openmpi-ib + vm_configuration: + platform_image: + offer: CentOS-HPC + publisher: OpenLogic + sku: '7.6' + vm_count: + dedicated: 2 + low_priority: 0 + vm_size: STANDARD_HB60rs + inter_node_communication_enabled: true + ssh: + username: shipyard diff --git a/recipes/mpiBench-Infiniband-OpenMPI/docker/Dockerfile b/recipes/mpiBench-Infiniband-OpenMPI/docker/Dockerfile new file mode 100644 index 0000000..c8c3e47 --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/docker/Dockerfile @@ -0,0 +1,57 @@ +FROM centos:7.6.1810 + +# set up base +COPY ssh_config /root/.ssh/config +RUN yum install -y epel-release \ + && yum groupinstall -y "Development tools" \ + && yum install -y \ + ca-certificates wget openssh-server openssh-clients net-tools \ + numactl-devel gtk2 atk cairo tcsh libnl3 tcl libmnl tk + +# set up ssh keys +RUN mkdir -p /var/run/sshd \ + && ssh-keygen -A \ + && sed -i 's/UsePAM yes/UsePAM no/g' /etc/ssh/sshd_config \ + && sed -i 's/#PermitRootLogin yes/PermitRootLogin yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#RSAAuthentication yes/RSAAuthentication yes/g' /etc/ssh/sshd_config \ + && sed -i 's/#PubkeyAuthentication yes/PubkeyAuthentication yes/g' /etc/ssh/sshd_config \ + && ssh-keygen -f /root/.ssh/id_rsa -t rsa -N '' \ + && chmod 600 /root/.ssh/config \ + && chmod 700 /root/.ssh \ + && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys + +# set up workdir +ENV INSTALL_PREFIX=/opt +WORKDIR /tmp/mpi + +# download and install mlnx +RUN wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-4.6-1.0.1.1/MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64.tgz | tar -xzf - \ + && ./MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64/mlnxofedinstall --user-space-only --without-fw-update --all --force \ + && rm -rf MLNX_OFED_LINUX-4.6-1.0.1.1-rhel7.6-x86_64 + +# download and install HPC-X +ENV HPCX_VERSION="v2.4.1" +RUN cd ${INSTALL_PREFIX} && \ + wget -q -O - ftp://bgate.mellanox.com/uploads/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64.tbz | tar -xjf - \ + && HPCX_PATH=${INSTALL_PREFIX}/hpcx-${HPCX_VERSION}-gcc-MLNX_OFED_LINUX-4.6-1.0.1.1-redhat7.6-x86_64 \ + && HCOLL_PATH=${HPCX_PATH}/hcoll \ + && UCX_PATH=${HPCX_PATH}/ucx + +# download and install OpenMPI +ENV OMPI_VERSION="4.0.1" +RUN wget -q -O - https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-${OMPI_VERSION}.tar.gz | tar -xzf - \ + && cd openmpi-${OMPI_VERSION} \ + && ./configure --with-ucx=${UCX_PATH} --with-hcoll=${HCOLL_PATH} --enable-mpirun-prefix-by-default \ + && make -j 8 && make install \ + && cd .. \ + && rm -rf openmpi-${OMPI_VERSION} + +# download and install mpiBench +RUN wget -q -O - https://codeload.github.com/LLNL/mpiBench/tar.gz/master | tar -xzf - \ + && mv ./mpiBench-master /mpiBench \ + && cd /mpiBench \ + && make + +# set up sshd on port 23 +EXPOSE 23 +CMD ["/usr/sbin/sshd", "-D", "-p", "23"] diff --git a/recipes/mpiBench-Infiniband-OpenMPI/docker/README.md b/recipes/mpiBench-Infiniband-OpenMPI/docker/README.md new file mode 100644 index 0000000..3965069 --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/docker/README.md @@ -0,0 +1,3 @@ +# Dockerfile for mpiBench-Infiniband-OpenMPI +This image can be found on +[Docker Hub](https://hub.docker.com/r/vincentlabo/mpibench). \ No newline at end of file diff --git a/recipes/mpiBench-Infiniband-OpenMPI/docker/ssh_config b/recipes/mpiBench-Infiniband-OpenMPI/docker/ssh_config new file mode 100644 index 0000000..e0e128e --- /dev/null +++ b/recipes/mpiBench-Infiniband-OpenMPI/docker/ssh_config @@ -0,0 +1,4 @@ +Host * + Port 23 + StrictHostKeyChecking no + UserKnownHostsFile /dev/null diff --git a/scripts/shipyard_nodeprep.sh b/scripts/shipyard_nodeprep.sh index f9a2074..b7c7561 100755 --- a/scripts/shipyard_nodeprep.sh +++ b/scripts/shipyard_nodeprep.sh @@ -26,6 +26,7 @@ NVIDIA_DOCKER_PACKAGE_CENTOS="nvidia-docker2-${NVIDIA_DOCKER_VERSION}-1.docker${ MOUNTS_PATH=$AZ_BATCH_NODE_ROOT_DIR/mounts VOLATILE_PATH=$AZ_BATCH_NODE_ROOT_DIR/volatile IB_PKEY_FILE=$AZ_BATCH_TASK_WORKING_DIR/IB_PKEY +UCX_IB_PKEY_FILE=$AZ_BATCH_TASK_WORKING_DIR/UCX_IB_PKEY # status file consts lisinstalled=${VOLATILE_PATH}/.batch_shipyard_lis_installed @@ -1398,11 +1399,11 @@ export_ib_pkey() else IB_PKEY=$key1 fi - - UCX_IB_PKEY=$(printf '0x%04x' "$((IB_PKEY & 0x0FFF))") - cat > "$IB_PKEY_FILE" << EOF IB_PKEY=$IB_PKEY +EOF + UCX_IB_PKEY=$(printf '0x%04x' "$((IB_PKEY & 0x0FFF))") +cat > "$UCX_IB_PKEY_FILE" << EOF UCX_IB_PKEY=$UCX_IB_PKEY EOF }