From ce0bb4655c9e881f1037b28980a041a585da5eed Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 29 May 2024 07:54:28 -0700 Subject: [PATCH] util-docker,gpu,gpu-compute: Improve GCN-GPU Dockerfile (#1170) * The GCC used in the GCN-GPU images was increase from version 8 to version 10. This was necessary due to PR #1145 which made GCC require GCC >=10. This patch was previously part of #1161 but has been merged into this PR. * A patch has been applied to ROCm-OpenCL-Runtime to fix a linking error in which there were multiple definitions of `ret_val`. This issue is highlighted here: https://github.com/ROCm/ROCm-OpenCL-Runtime/issues/113. This was previously part #1161 but has been moved into this PR. * The Dockerfile's `RUN` command (built to layers in the Docker image) have been refactored so sources and built objects are deleted in the same RUN command as where they were built and installed. This reduces the size of the image substantially: from 16.3GB down to 6.6GB. * The `apt upgrade` has been removed. This step (previously at the start of the file) did nothing of importance. Removing it saves both time building the image and reduces the size of the image by a small amount. * `--depth=1` is used when cloning repositories so the entire commit tree tree is not pulled each time. This saves some time when building the image. * `apt -y update` has been added where `apt -y install` is used so CACHED image layers do not become an issue in the future if the image were to be rebuilt. --- util/dockerfiles/gcn-gpu/Dockerfile | 204 +++++++++++++++++----------- 1 file changed, 128 insertions(+), 76 deletions(-) diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile index 91bad99c8b..5aac3c28d8 100644 --- a/util/dockerfiles/gcn-gpu/Dockerfile +++ b/util/dockerfiles/gcn-gpu/Dockerfile @@ -23,108 +23,160 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + FROM --platform=${BUILDPLATFORM} ubuntu:20.04 ENV DEBIAN_FRONTEND=noninteractive -RUN apt -y update && apt -y upgrade && \ - apt -y install build-essential git m4 scons zlib1g zlib1g-dev \ + +RUN apt -y update && \ + apt -y install \ + # General gem5 requirements when building/running in Ubuntu 20.04, also + # some utilities needed to compile and obtain the right packages that + # make up this Docker image. + build-essential git m4 scons zlib1g zlib1g-dev \ libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \ python3-dev python-is-python3 doxygen libboost-all-dev \ - libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config gdb + libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config gdb \ + cmake wget gnupg2 rpm \ + # Requirements for ROCm + mesa-common-dev libgflags-dev libgoogle-glog-dev \ + # ROCm dependencies and requirements to get ROCm repo and build packages. + # Upgrade to gcc 10: gem5 supports gcc 10+. + gcc-10 g++-10 cpp-10 -# Requirements for ROCm -RUN apt -y install cmake mesa-common-dev libgflags-dev libgoogle-glog-dev - -# Needed to get ROCm repo, build packages -RUN apt -y install wget gnupg2 rpm +# Update the default compiler to gcc 10 +RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++-10 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-10 100 +# Add the ROCm apt key. RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - # ROCm webpage says to use debian main, but the individual versions -# only have xenial -RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.0.1/ xenial main' | tee /etc/apt/sources.list.d/rocm.list +# only have xenial. We set the APT source here to obtain thecorrect +# ROCm packages. +RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.0.1/ xenial main' | \ + tee /etc/apt/sources.list.d/rocm.list -RUN apt-get update && apt -y install hsakmt-roct hsakmt-roct-dev +# Install/apt/4.0.1 packages. +RUN apt -y update && apt -y install hsakmt-roct hsakmt-roct-dev + +# Create symbolic link: /opt/rocm -> /opt/rocm-4.0.1. RUN ln -s /opt/rocm-4.0.1 /opt/rocm -RUN git clone -b rocm-4.0.0 https://github.com/RadeonOpenCompute/ROCR-Runtime.git && \ - mkdir -p /ROCR-Runtime/src/build +# Download, build, and install the ROCR-Runtime. +# Sources and build files deleted after install to reduce the size of this +# layer. +RUN git clone -b rocm-4.0.0 --depth=1 \ + https://github.com/RadeonOpenCompute/ROCR-Runtime.git && \ + mkdir -p /ROCR-Runtime/src/build && \ + cd /ROCR-Runtime/src/build && \ + # We need MEMFD_CREATE=OFF as MEMFD_CREATE syscall isn't implemented. + cmake -DIMAGE_SUPPORT=OFF -DHAVE_MEMFD_CREATE=OFF -DCMAKE_BUILD_TYPE=Release .. && \ + make -j$(nproc) && \ + make package && \ + apt -y install ./hsa-rocr-dev*.deb && \ + # Cleanup + cd / && \ + rm -rf /ROCR-Runtime -WORKDIR /ROCR-Runtime/src/build -# need MEMFD_CREATE=OFF as MEMFD_CREATE syscall isn't implemented -RUN cmake -DIMAGE_SUPPORT=OFF -DHAVE_MEMFD_CREATE=OFF \ - -DCMAKE_BUILD_TYPE=Release .. && make -j$(nproc) && make package -RUN apt -y install ./hsa-rocr-dev*.deb -WORKDIR / +# This giant run encapsulates the cloning, building, installation of HIP, +# ROCm-OpenCL and ROCclr. These all depend upon one another's sources and build +# files, ergo they are all pulled, built, installed in a single RUN command. +# Once all three are installed their sources and build files are deleted. +RUN git clone -b rocm-4.0.0 --depth=1 \ + https://github.com/ROCm-Developer-Tools/HIP.git && \ + git clone -b rocm-4.0.0 --depth=1 \ + https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime.git && \ + git clone -b rocm-4.0.0 --depth=1 \ + https://github.com/ROCm-Developer-Tools/ROCclr.git && \ + # Get ROCclr dependencies + apt -y update && \ + apt -y install llvm-amdgpu libncurses5 libtinfo-dev rocm-device-libs \ + comgr && \ + mkdir -p ROCclr/build && \ + cd ROCclr && \ + # The patch allows us to avoid building blit kernels on-the-fly in gem5 + wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | \ + git apply -v && \ + # Build and install ROCclr. + cd /ROCclr/build && \ + cmake \ + -DOPENCL_DIR="/ROCm-OpenCL-Runtime" \ + -DCMAKE_BUILD_TYPE=Release .. && \ + make -j$(nproc) && \ + make install && \ + # We apply a patch to avoid a linking error -- "multiple definition of + # 'ret_val'". + # Issue here: https://github.com/ROCm/ROCm-OpenCL-Runtime/issues/113 + cd /ROCm-OpenCL-Runtime/khronos/icd && \ + wget -q -O - https://github.com/KhronosGroup/OpenCL-ICD-Loader/pull/101/commits/319ba95eb08aa7c622efae50cb62c7cd7de14c1b.patch | \ + patch -p1 && \ + # Build and install ROCm OpenCL. + cd /ROCm-OpenCL-Runtime && \ + mkdir build && \ + cd build && \ + cmake \ + -DUSE_COMGR_LIBRARY=ON \ + -DCMAKE_PREFIX_PATH="/opt/rocm" \ + -DCMAKE_BUILD_TYPE=Release .. && \ + make -j$(nproc) && \ + make package && \ + apt -y install ./rocm-opencl-2.0.0-amd64.deb \ + ./rocm-opencl-dev-2.0.0-amd64.deb && \ + # Build and install HIP. + mkdir -p /HIP/build && \ + cd /HIP/build && \ + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DHSA_PATH=/usr/hsa \ + -DHIP_COMPILER=clang \ + -DHIP_PLATFORM=rocclr \ + -DCMAKE_PREFIX_PATH="/opt/rocm" .. && \ + make -j$(nproc) && \ + make package && \ + apt -y install ./hip-base*.deb ./hip-rocclr*.deb && \ + cp -r /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* \ + /opt/rocm/hip/lib/cmake/hip/ && \ + # Cleanup. + cd / && \ + rm -rf /HIP /ROCclr /ROCm-OpenCL-Runtime -# Dependencies for ROCclr -RUN apt -y install llvm-amdgpu libncurses5 libtinfo-dev rocm-device-libs comgr -RUN git clone -b rocm-4.0.0 \ - https://github.com/ROCm-Developer-Tools/ROCclr.git && \ - mkdir -p ROCclr/build - -RUN git clone -b rocm-4.0.0 \ - https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime.git && \ - mkdir -p ROCm-OpenCL-Runtime/build - -WORKDIR /ROCclr -# The patch allows us to avoid building blit kernels on-the-fly in gem5 -RUN wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | git apply -v - -WORKDIR /ROCclr/build -RUN cmake -DOPENCL_DIR="/ROCm-OpenCL-Runtime" \ - -DCMAKE_BUILD_TYPE=Release .. && \ - make -j$(nproc) && make install -WORKDIR / - -WORKDIR ROCm-OpenCL-Runtime/build -RUN cmake -DUSE_COMGR_LIBRARY=ON -DCMAKE_PREFIX_PATH="/opt/rocm" \ - -DCMAKE_BUILD_TYPE=Release .. && \ - make -j$(nproc) && make package -RUN apt -y install ./rocm-opencl-2.0.0-amd64.deb ./rocm-opencl-dev-2.0.0-amd64.deb -WORKDIR / - -RUN git clone -b rocm-4.0.0 \ - https://github.com/ROCm-Developer-Tools/HIP.git && mkdir -p HIP/build - -WORKDIR HIP/build -RUN cmake -DCMAKE_BUILD_TYPE=Release -DHSA_PATH=/usr/hsa \ - -DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr -DCMAKE_PREFIX_PATH="/opt/rocm"\ - .. && make -j$(nproc) && make package -RUN apt -y install ./hip-base*.deb ./hip-rocclr*.deb -# These files here are needed but don't get installed through the .deb file, -# even though they seem to be included in the packaging, so symlink them -RUN ln -s /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* /opt/rocm/hip/lib/cmake/hip/ -WORKDIR / - -# rocBLAS downloads the most recent rocm-cmake if it isn't installed before -# building -RUN apt install rocm-cmake - -RUN git clone -b rocm-4.0.0 \ - https://github.com/ROCmSoftwarePlatform/rocBLAS.git && mkdir rocBLAS/build - -ENV HCC_AMDGPU_TARGET=gfx900,gfx902 -WORKDIR rocBLAS +# Clone, build, and install rocBLAS. +# Sources and build files are deleted after the install. # rocBLAS needs to be built from source otherwise certain gfx versions get errors in HIP # about there being no GPU binary available -RUN ./install.sh -d -i -WORKDIR / +ENV HCC_AMDGPU_TARGET=gfx900,gfx902 +RUN git clone -b rocm-4.0.0 --depth=1 \ + https://github.com/ROCmSoftwarePlatform/rocBLAS.git && \ + # rocBLAS downloads the most recent rocm-cmake if it isn't installed before + # building + apt -y update && \ + apt -y install rocm-cmake && \ + mkdir rocBLAS/build && \ + cd rocBLAS && \ + ./install.sh -d -i && \ + cd / && \ + rm -rf rocBLAS # MIOpen dependencies + MIOpen -RUN apt install rocm-clang-ocl miopen-hip +RUN apt -y update && \ + apt -y install rocm-clang-ocl miopen-hip # Clone MIOpen repo so that we have the kernel sources available -RUN git clone -b rocm-4.0.1 https://github.com/ROCmSoftwarePlatform/MIOpen.git +RUN git clone -b rocm-4.0.1 --depth=1 \ + https://github.com/ROCmSoftwarePlatform/MIOpen.git # Make the MIOpen cache dir ahead of time and symlink for easier access # when linking in the database file -RUN mkdir -p /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 && \ - ln -s /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 /root/.cache/miopen/2.9.0 +RUN mkdir -p /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 +RUN ln -s /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 \ + /root/.cache/miopen/2.9.0 # Add commands from halofinder Dockerfile -RUN apt-get update && apt-get -y install libopenmpi-dev libomp-dev - +RUN apt -y update && \ + apt -y install libopenmpi-dev libomp-dev ENV HIPCC_BIN=/opt/rocm/bin ENV MPI_INCLUDE=/usr/lib/x86_64-linux-gnu/openmpi/include