diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 0f6dd995..ed821d65 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -40,6 +40,11 @@ jobs: tags: superbench/main:rocm5.7 runner: [self-hosted, rocm-build] build_args: "NUM_MAKE_JOBS=64" + - name: rocm6.0 + dockerfile: rocm6.0.x + tags: superbench/main:rocm6.0 + runner: [self-hosted, rocm-build] + build_args: "NUM_MAKE_JOBS=64" steps: - name: Checkout uses: actions/checkout@v2 diff --git a/dockerfile/rocm5.7.x.dockerfile b/dockerfile/rocm5.7.x.dockerfile index ce87e9fc..6f2ede3a 100644 --- a/dockerfile/rocm5.7.x.dockerfile +++ b/dockerfile/rocm5.7.x.dockerfile @@ -110,21 +110,18 @@ RUN bash -c 'echo -e "gfx90a:xnack-\ngfx90a:xnac+\ngfx940\ngfx941\ngfx942\ngfx10 # Install OpenMPI ENV OPENMPI_VERSION=4.1.x # Check if Open MPI is installed -RUN [ -d /usr/local/bin/mpirun ] || { \ - echo "Open MPI not found. Installing Open MPI..." && \ - cd /tmp && \ +RUN cd /tmp && \ git clone --recursive https://github.com/open-mpi/ompi.git -b v${OPENMPI_VERSION} && \ cd ompi && \ ./autogen.pl && \ mkdir build && \ cd build && \ - ../configure --prefix=/usr/local --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --with-rocm=/opt/rocm && \ + ../configure --prefix=/usr/local/mpi --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --with-rocm=/opt/rocm && \ make -j $(nproc) && \ make -j $(nproc) install && \ ldconfig && \ cd / && \ - rm -rf /tmp/openmpi-${OPENMPI_VERSION}* ;\ - } + rm -rf /tmp/openmpi-${OPENMPI_VERSION}* # Install Intel MLC RUN cd /tmp && \ @@ -140,8 +137,8 @@ RUN cd /opt/ && \ mkdir build && \ cd build && \ CXX=/opt/rocm/bin/hipcc cmake -DHIP_COMPILER=clang -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE=1 \ - -DCMAKE_PREFIX_PATH="${ROCM_PATH}/hsa;${ROCM_PATH}/hip;${ROCM_PATH}/share/rocm/cmake/;${ROCM_PATH}" \ - .. && \ + -DCMAKE_PREFIX_PATH="${ROCM_PATH}/hsa;${ROCM_PATH}/hip;${ROCM_PATH}/share/rocm/cmake/;${ROCM_PATH}" \ + .. && \ make -j${NUM_MAKE_JOBS} # Install AMD SMI Python Library @@ -166,11 +163,13 @@ RUN apt install rocm-cmake -y && \ WORKDIR ${SB_HOME} ADD third_party third_party -RUN make RCCL_HOME=/opt/rccl/build/ MPI_HOME=/usr/local ROCBLAS_BRANCH=release/rocm-rel-5.7.1.1 HIPBLASLT_BRANCH=release-staging/rocm-rel-5.7 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm +# Apply patch +RUN cd third_party/perftest && \ + git apply ../perftest_rocm6.patch +RUN make RCCL_HOME=/opt/rccl/build/ ROCBLAS_BRANCH=release/rocm-rel-5.7.1.1 HIPBLASLT_BRANCH=release/rocm-rel-5.7 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm ADD . . #ENV USE_HIPBLASLT_DATATYPE=1 -ENV CXX=/opt/rocm/bin/hipcc RUN python3 -m pip install .[amdworker] && \ - make cppbuild && \ + CXX=/opt/rocm/bin/hipcc make cppbuild && \ make postinstall diff --git a/dockerfile/rocm6.0.x.dockerfile b/dockerfile/rocm6.0.x.dockerfile new file mode 100644 index 00000000..55f0d2b5 --- /dev/null +++ b/dockerfile/rocm6.0.x.dockerfile @@ -0,0 +1,180 @@ +ARG BASE_IMAGE=rocm/pytorch:rocm6.0_ubuntu22.04_py3.9_pytorch_2.0.1 + +FROM ${BASE_IMAGE} + +# OS: +# - Ubuntu: 22.04 +# - Docker Client: 20.10.8 +# ROCm: +# - ROCm: 6.0 +# Lib: +# - torch: 2.0.1 +# - rccl: 2.18.3+hip6.0 develop:7e1cbb4 +# - hipblaslt: 950ca43 +# - openmpi: 4.1.x +# - apex: 1.0.0 +# Intel: +# - mlc: v3.10 + +LABEL maintainer="SuperBench" + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get -q install -y --no-install-recommends \ + autoconf \ + automake \ + bc \ + build-essential \ + curl \ + dmidecode \ + git \ + hipify-clang \ + iproute2 \ + jq \ + libaio-dev \ + libboost-program-options-dev \ + libcap2 \ + libcurl4-openssl-dev \ + libnuma-dev \ + libpci-dev \ + libssl-dev \ + libtinfo5 \ + libtool \ + lshw \ + net-tools \ + numactl \ + openssh-client \ + openssh-server \ + pciutils \ + python3-mpi4py \ + rsync \ + sudo \ + util-linux \ + vim \ + wget \ + && \ + rm -rf /tmp/* + +ARG NUM_MAKE_JOBS=64 + +# Check if CMake is installed and its version +RUN cmake_version=$(cmake --version 2>/dev/null | grep -oP "(?<=cmake version )(\d+\.\d+)" || echo "0.0") && \ + required_version="3.24.1" && \ + if [ "$(printf "%s\n" "$required_version" "$cmake_version" | sort -V | head -n 1)" != "$required_version" ]; then \ + echo "existing cmake version is ${cmake_version}" && \ + cd /tmp && \ + wget -q https://github.com/Kitware/CMake/releases/download/v${required_version}/cmake-${required_version}.tar.gz && \ + tar xzf cmake-${required_version}.tar.gz && \ + cd cmake-${required_version} && \ + ./bootstrap --prefix=/usr --no-system-curl --parallel=16 && \ + make -j ${NUM_MAKE_JOBS} && \ + make install && \ + rm -rf /tmp/cmake-${required_version}* \ + else \ + echo "CMake version is greater than or equal to 3.23"; \ + fi + +# Install Docker +ENV DOCKER_VERSION=20.10.8 +RUN cd /tmp && \ + wget -q https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}.tgz -O docker.tgz && \ + tar --extract --file docker.tgz --strip-components 1 --directory /usr/local/bin/ && \ + rm docker.tgz + +# Update system config +RUN mkdir -p /root/.ssh && \ + touch /root/.ssh/authorized_keys && \ + mkdir -p /var/run/sshd && \ + sed -i "s/[# ]*PermitRootLogin prohibit-password/PermitRootLogin yes/" /etc/ssh/sshd_config && \ + sed -i "s/[# ]*PermitUserEnvironment no/PermitUserEnvironment yes/" /etc/ssh/sshd_config && \ + sed -i "s/[# ]*Port.*/Port 22/" /etc/ssh/sshd_config && \ + echo "* soft nofile 1048576\n* hard nofile 1048576" >> /etc/security/limits.conf && \ + echo "root soft nofile 1048576\nroot hard nofile 1048576" >> /etc/security/limits.conf + + +# Get Ubuntu version and set as an environment variable +RUN export UBUNTU_VERSION=$(lsb_release -r -s) +RUN echo "Ubuntu version: $UBUNTU_VERSION" +ENV UBUNTU_VERSION=${UBUNTU_VERSION} + +# Install OFED +ENV OFED_VERSION=5.9-0.5.6.0 +# Check if ofed_info is present and has a version +RUN if ! command -v ofed_info >/dev/null 2>&1; then \ + echo "OFED not found. Installing OFED..."; \ + cd /tmp && \ + wget -q http://content.mellanox.com/ofed/MLNX_OFED-${OFED_VERSION}/MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64.tgz && \ + tar xzf MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64.tgz && \ + PATH=/usr/bin:${PATH} MLNX_OFED_LINUX-${OFED_VERSION}-ubuntu${UBUNTU_VERSION}-x86_64/mlnxofedinstall --user-space-only --without-fw-update --force --all && \ + rm -rf MLNX_OFED_LINUX-${OFED_VERSION}* ; \ + fi + +# Add target file to help determine which device(s) to build for +ENV ROCM_PATH=/opt/rocm +RUN bash -c 'echo -e "gfx90a:xnack-\ngfx90a:xnac+\ngfx940\ngfx941\ngfx942:sramecc+:xnack-\n" >> ${ROCM_PATH}/bin/target.lst' + +# Install OpenMPI +ENV OPENMPI_VERSION=4.1.x +# Check if Open MPI is installed +RUN cd /tmp && \ + git clone --recursive https://github.com/open-mpi/ompi.git -b v${OPENMPI_VERSION} && \ + cd ompi && \ + ./autogen.pl && \ + mkdir build && \ + cd build && \ + ../configure --prefix=/usr/local/mpi --enable-orterun-prefix-by-default --enable-mpirun-prefix-by-default --enable-prte-prefix-by-default --with-rocm=/opt/rocm && \ + make -j $(nproc) && \ + make -j $(nproc) install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/openmpi-${OPENMPI_VERSION}* + +# Install Intel MLC +RUN cd /tmp && \ + wget -q https://downloadmirror.intel.com/763324/mlc_v3.10.tgz -O mlc.tgz && \ + tar xzf mlc.tgz Linux/mlc && \ + cp ./Linux/mlc /usr/local/bin/ && \ + rm -rf ./Linux mlc.tgz + +# Install RCCL +RUN cd /opt/ && \ + git clone https://github.com/ROCmSoftwarePlatform/rccl.git && \ + cd rccl && \ + mkdir build && \ + cd build && \ + CXX=/opt/rocm/bin/hipcc cmake -DHIP_COMPILER=clang -DCMAKE_BUILD_TYPE=Release -DCMAKE_VERBOSE_MAKEFILE=1 \ + -DCMAKE_PREFIX_PATH="${ROCM_PATH}/hsa;${ROCM_PATH}/hip;${ROCM_PATH}/share/rocm/cmake/;${ROCM_PATH}" \ + .. && \ + make -j${NUM_MAKE_JOBS} + +ENV PATH="/opt/superbench/bin:/usr/local/bin/:/opt/rocm/hip/bin/:/opt/rocm/bin/:${PATH}" \ + LD_PRELOAD="/opt/rccl/build/librccl.so:$LD_PRELOAD" \ + LD_LIBRARY_PATH="/usr/local/lib/:/opt/rocm/lib:${LD_LIBRARY_PATH}" \ + SB_HOME=/opt/superbench \ + SB_MICRO_PATH=/opt/superbench \ + ANSIBLE_DEPRECATION_WARNINGS=FALSE \ + ANSIBLE_COLLECTIONS_PATH=/usr/share/ansible/collections + +RUN echo PATH="$PATH" > /etc/environment && \ + echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \ + echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment + +RUN apt install rocm-cmake -y && \ + python3 -m pip install --upgrade pip wheel setuptools==65.7 + +WORKDIR ${SB_HOME} + +ADD third_party third_party +# Apply patch +RUN cd third_party/perftest && \ + git apply ../perftest_rocm6.patch +RUN make RCCL_HOME=/opt/rccl/build/ ROCBLAS_BRANCH=release/rocm-rel-6.0 HIPBLASLT_BRANCH=release/rocm-rel-6.0 ROCM_VER=rocm-5.5.0 -C third_party rocm -o cpu_hpl -o cpu_stream -o megatron_lm +RUN cd third_party/Megatron/Megatron-DeepSpeed && \ + git apply ../megatron_deepspeed_rocm6.patch + +ADD . . +ENV USE_HIP_DATATYPE=1 +ENV USE_HIPBLAS_COMPUTETYPE=1 +RUN python3 -m pip install .[amdworker] && \ + CXX=/opt/rocm/bin/hipcc make cppbuild && \ + make postinstall diff --git a/third_party/Makefile b/third_party/Makefile index b69259da..1b965854 100755 --- a/third_party/Makefile +++ b/third_party/Makefile @@ -7,10 +7,12 @@ MPI_HOME ?= /usr/local/mpi HIP_HOME ?= /opt/rocm/hip RCCL_HOME ?= /opt/rocm/rccl HPCX_HOME ?= /opt/hpcx +ROCM_PATH ?= /opt/rocm CUDA_VER ?= $(shell nvcc --version | grep 'release' | awk '{print $$6}' | cut -c2- | cut -d '.' -f1-2) ROCBLAS_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3) HIPBLASLT_BRANCH ?= rocm-$(shell dpkg -l | grep 'rocm-dev ' | awk '{print $$3}' | cut -d '.' -f1-3) +ROCM_VER ?= $(shell hipconfig -R | grep -oP '\d+\.\d+\.\d+' || echo "0.0.0") .PHONY: all cuda_with_msccl cuda rocm common cuda_cutlass cuda_bandwidthTest cuda_nccl_tests cuda_perftest cuda_msccl rocm_perftest fio rocm_rccl_tests rocm_rocblas rocm_bandwidthTest gpcnet cuda_gpuburn cpu_stream cpu_hpl directx_amf_encoding_latency directx_amd rocm_hipblaslt megatron_lm megatron_deepspeed apex_rocm diff --git a/third_party/Megatron/megatron_deepspeed_rocm6.patch b/third_party/Megatron/megatron_deepspeed_rocm6.patch new file mode 100644 index 00000000..39a1dc27 --- /dev/null +++ b/third_party/Megatron/megatron_deepspeed_rocm6.patch @@ -0,0 +1,39 @@ +diff --git a/megatron/fused_kernels/scaled_masked_softmax_cuda.cu b/megatron/fused_kernels/scaled_masked_softmax_cuda.cu +index 76086de..1533648 100644 +--- a/megatron/fused_kernels/scaled_masked_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_masked_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include +diff --git a/megatron/fused_kernels/scaled_softmax_cuda.cu b/megatron/fused_kernels/scaled_softmax_cuda.cu +index 90e1c9f..d217aec 100644 +--- a/megatron/fused_kernels/scaled_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include +diff --git a/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu b/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu +index 74c9f3d..03b5fc8 100644 +--- a/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu ++++ b/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu +@@ -4,7 +4,7 @@ + #include + #include + #include +-#ifndef __HIP_PLATFORM_HCC__ ++#ifndef __HIP_PLATFORM_AMD__ + #include + #endif + #include diff --git a/third_party/perftest b/third_party/perftest index 5fb4f10a..dffd1dd8 160000 --- a/third_party/perftest +++ b/third_party/perftest @@ -1 +1 @@ -Subproject commit 5fb4f10a7e7827ed15e53c25810a10be279d6e23 +Subproject commit dffd1dd8b8a26dad2634a546e7e4d082dc882fbc diff --git a/third_party/perftest_rocm6.patch b/third_party/perftest_rocm6.patch new file mode 100644 index 00000000..fe15bd25 --- /dev/null +++ b/third_party/perftest_rocm6.patch @@ -0,0 +1,28 @@ +diff --git a/configure.ac b/configure.ac +index 20eceda..c8f0c07 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -237,7 +237,7 @@ AC_ARG_WITH([rocm], + ], + [AS_CASE([$with_rocm], + [yes|no], [], +- [CPPFLAGS="-I$with_rocm/include $CPPFLAGS" ++ [CPPFLAGS="-I$with_rocm/include -D__HIP_PLATFORM_AMD__=1 $CPPFLAGS" + LDFLAGS="-L$with_rocm/lib64 -Wl,-rpath=$with_rocm/lib64 -L$with_rocm/lib -Wl,-rpath=$with_rocm/lib -lamdhip64 $LDFLAGS"]) + ]) + +diff --git a/src/rocm_memory.c b/src/rocm_memory.c +index e9a9136..b6cb23a 100644 +--- a/src/rocm_memory.c ++++ b/src/rocm_memory.c +@@ -44,8 +44,8 @@ static int init_rocm(int device_id) { + + hipDeviceProp_t prop = {0}; + ROCM_CHECK(hipGetDeviceProperties(&prop, device_id)); +- printf("Using ROCm Device with ID: %d, Name: %s, PCI Bus ID: 0x%x, GCN Arch: %d\n", +- device_id, prop.name, prop.pciBusID, prop.gcnArch); ++ printf("Using ROCm Device with ID: %d, Name: %s, PCI Bus ID: 0x%x, GCN Arch: %s\n", ++ device_id, prop.name, prop.pciBusID, prop.gcnArchName); + + return SUCCESS; + }