Update dependencies and Dockerfile (#371)

Update dependencies and Dockerfile:
* upgrade nccl-tests and rccl-tests to current latest version to match
  NCCL/RCCL versions
* unify image tag names on DockerHub
* remove verbose output in Dockerfile and minor fix some flags
This commit is contained in:
Yifan Xiong 2022-07-06 18:31:41 +08:00 коммит произвёл GitHub
Родитель a94ead34b0
Коммит 9f03d5687a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 22 добавлений и 24 удалений

4
.github/workflows/build-image.yml поставляемый
Просмотреть файл

@ -39,12 +39,12 @@ jobs:
BASE_IMAGE=rocm/pytorch:rocm5.1.1_ubuntu20.04_py3.7_pytorch_1.10.0
- name: rocm5.0.1
dockerfile: rocm5.0.x
tags: superbench/main:rocm5.0.1-pytorch1.9.0
tags: superbench/main:rocm5.0.1
extra_args: >-
BASE_IMAGE=rocm/pytorch:rocm5.0.1_ubuntu18.04_py3.7_pytorch_1.9.0
- name: rocm5.0
dockerfile: rocm5.0.x
tags: superbench/main:rocm5.0-pytorch1.9.0
tags: superbench/main:rocm5.0
extra_args: >-
BASE_IMAGE=rocm/pytorch:rocm5.0_ubuntu18.04_py3.7_pytorch_1.9.0
steps:

Просмотреть файл

@ -51,7 +51,7 @@ ARG NUM_MAKE_JOBS=
# Install Docker
ENV DOCKER_VERSION=20.10.8
RUN cd /tmp && \
wget https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \
rm docker.tgz
@ -103,8 +103,8 @@ RUN cd /tmp && \
# Install Intel MLC
RUN cd /tmp && \
curl https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz -o mlc.tgz && \
tar xzvf mlc.tgz Linux/mlc && \
wget -q https://downloadmirror.intel.com/736634/mlc_v3.9a.tgz -O mlc.tgz && \
tar xzf mlc.tgz Linux/mlc && \
cp ./Linux/mlc /usr/local/bin/ && \
rm -rf ./Linux mlc.tgz

Просмотреть файл

@ -59,7 +59,7 @@ ARG NUM_MAKE_JOBS=
# Install Docker
ENV DOCKER_VERSION=20.10.8
RUN cd /tmp && \
wget https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \
rm docker.tgz
@ -95,8 +95,8 @@ RUN cd /tmp && \
# Install Intel MLC
RUN cd /tmp && \
curl https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz -o mlc.tgz && \
tar xzvf mlc.tgz Linux/mlc && \
wget -q https://downloadmirror.intel.com/736634/mlc_v3.9a.tgz -O mlc.tgz && \
tar xzf mlc.tgz Linux/mlc && \
cp ./Linux/mlc /usr/local/bin/ && \
rm -rf ./Linux mlc.tgz

Просмотреть файл

@ -58,20 +58,19 @@ ARG NUM_MAKE_JOBS=
# Upgrade CMake from 3.16 to 3.23
ENV CMAKE_VERSION=3.23.1
ENV CMAKE_REPO="https://github.com/Kitware/CMake/releases/download/v3.23.1/"
RUN wget -nv ${CMAKE_REPO}/cmake-${CMAKE_VERSION}.tar.gz && \
tar -xvf cmake-${CMAKE_VERSION}.tar.gz && \
RUN cd /tmp && \
wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz && \
tar xzf cmake-${CMAKE_VERSION}.tar.gz && \
cd cmake-${CMAKE_VERSION} && \
./bootstrap --prefix=/usr --no-system-curl --parallel=16 && \
make -j16 && \
sudo make install && \
cd .. && \
rm -rf cmake-${CMAKE_VERSION}.tar.gz cmake-${CMAKE_VERSION}
./bootstrap --prefix=/usr --no-system-curl --parallel=16 && \
make -j ${NUM_MAKE_JOBS} && \
make install && \
rm -rf /tmp/cmake-${CMAKE_VERSION}*
# Install Docker
ENV DOCKER_VERSION=20.10.8
RUN cd /tmp && \
wget https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \
tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \
rm docker.tgz
@ -108,8 +107,8 @@ RUN cd /tmp && \
# Install Intel MLC
RUN cd /tmp && \
curl https://www.intel.com/content/dam/develop/external/us/en/documents/mlc_v3.9a.tgz -o mlc.tgz && \
tar xzvf mlc.tgz Linux/mlc && \
wget -q https://downloadmirror.intel.com/736634/mlc_v3.9a.tgz -O mlc.tgz && \
tar xzf mlc.tgz Linux/mlc && \
cp ./Linux/mlc /usr/local/bin/ && \
rm -rf ./Linux mlc.tgz

5
third_party/Makefile поставляемый
Просмотреть файл

@ -40,8 +40,7 @@ cuda_bandwidthTest: sb_micro_path
cd ./cuda-samples/Samples/bandwidthTest && make clean && make TARGET_ARCH=x86_64 SMS="70 75 80 86"
cp -v ./cuda-samples/Samples/bandwidthTest/bandwidthTest $(SB_MICRO_PATH)/bin/
# Build nccl-tests.
# The version we use is commit 44df0bf from master branch, since it didn't update release tag for long time.
# Build nccl-tests from commit 8274cb4 of default branch.
cuda_nccl_tests: sb_micro_path
ifneq (,$(wildcard nccl-tests/Makefile))
cd ./nccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) -j
@ -65,7 +64,7 @@ ifneq (,$(wildcard fio/Makefile))
cd ./fio && ./configure --prefix=$(SB_MICRO_PATH) --disable-native && make -j && make install
endif
# Build rccl-tests from commit 602b745 of develop branch (default branch).
# Build rccl-tests from commit 2a18737 of default branch.
rocm_rccl_tests: sb_micro_path
ifneq (, $(wildcard rccl-tests/Makefile))
cd ./rccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) HIP_HOME=$(HIP_HOME) RCCL_HOME=$(RCCL_HOME) -j

2
third_party/nccl-tests поставляемый

@ -1 +1 @@
Subproject commit 44df0bf010dcc95e840ca0fb7466c67cff3f1f0f
Subproject commit 8274cb47b6dc70ce4411e7f114b77173d3892414

2
third_party/rccl-tests поставляемый

@ -1 +1 @@
Subproject commit 602b745ff48f4aa7d73e8fd946442b666fae344f
Subproject commit 2a18737dc681e03ce82c046caa71b28db65017b5