util-docker,gpu,gpu-compute: Improve GCN-GPU Dockerfile (#1170)

* The GCC used in the GCN-GPU images was increase from version 8 to 
  version 10. This was necessary due to PR #1145 which made GCC require
GCC >=10. This patch was previously part of #1161 but has been merged
into
  this PR.
* A patch has been applied to ROCm-OpenCL-Runtime to fix a linking error
  in which there were multiple definitions of `ret_val`. This issue is
highlighted here:
https://github.com/ROCm/ROCm-OpenCL-Runtime/issues/113.
  This was previously part #1161 but has been moved into this PR.
* The Dockerfile's `RUN` command (built to layers in the Docker image)
  have been refactored so sources and built objects are deleted in the
  same RUN command as where they were built and installed. This reduces
  the size of the image substantially: from 16.3GB down to 6.6GB.
* The `apt upgrade` has been removed. This step (previously at the start
  of the file) did nothing of importance. Removing it saves both time
building the image and reduces the size of the image by a small amount.
* `--depth=1` is used when cloning repositories so the entire commit
tree
  tree is not pulled each time. This saves some time when building the
  image.
* `apt -y update` has been added  where `apt -y install` is used so
  CACHED image layers do not become an issue in the future if the image
  were to be rebuilt.
This commit is contained in:
Bobby R. Bruce
2024-05-29 07:54:28 -07:00
committed by GitHub
parent a54d3198a8
commit ce0bb4655c

View File

@@ -23,108 +23,160 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FROM --platform=${BUILDPLATFORM} ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt -y update && apt -y upgrade && \
apt -y install build-essential git m4 scons zlib1g zlib1g-dev \
RUN apt -y update && \
apt -y install \
# General gem5 requirements when building/running in Ubuntu 20.04, also
# some utilities needed to compile and obtain the right packages that
# make up this Docker image.
build-essential git m4 scons zlib1g zlib1g-dev \
libprotobuf-dev protobuf-compiler libprotoc-dev libgoogle-perftools-dev \
python3-dev python-is-python3 doxygen libboost-all-dev \
libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config gdb
libhdf5-serial-dev python3-pydot libpng-dev libelf-dev pkg-config gdb \
cmake wget gnupg2 rpm \
# Requirements for ROCm
mesa-common-dev libgflags-dev libgoogle-glog-dev \
# ROCm dependencies and requirements to get ROCm repo and build packages.
# Upgrade to gcc 10: gem5 supports gcc 10+.
gcc-10 g++-10 cpp-10
# Requirements for ROCm
RUN apt -y install cmake mesa-common-dev libgflags-dev libgoogle-glog-dev
# Needed to get ROCm repo, build packages
RUN apt -y install wget gnupg2 rpm
# Update the default compiler to gcc 10
RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100 && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 && \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++-10 100 && \
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc-10 100
# Add the ROCm apt key.
RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
# ROCm webpage says to use debian main, but the individual versions
# only have xenial
RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.0.1/ xenial main' | tee /etc/apt/sources.list.d/rocm.list
# only have xenial. We set the APT source here to obtain thecorrect
# ROCm packages.
RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/4.0.1/ xenial main' | \
tee /etc/apt/sources.list.d/rocm.list
RUN apt-get update && apt -y install hsakmt-roct hsakmt-roct-dev
# Install/apt/4.0.1 packages.
RUN apt -y update && apt -y install hsakmt-roct hsakmt-roct-dev
# Create symbolic link: /opt/rocm -> /opt/rocm-4.0.1.
RUN ln -s /opt/rocm-4.0.1 /opt/rocm
RUN git clone -b rocm-4.0.0 https://github.com/RadeonOpenCompute/ROCR-Runtime.git && \
mkdir -p /ROCR-Runtime/src/build
# Download, build, and install the ROCR-Runtime.
# Sources and build files deleted after install to reduce the size of this
# layer.
RUN git clone -b rocm-4.0.0 --depth=1 \
https://github.com/RadeonOpenCompute/ROCR-Runtime.git && \
mkdir -p /ROCR-Runtime/src/build && \
cd /ROCR-Runtime/src/build && \
# We need MEMFD_CREATE=OFF as MEMFD_CREATE syscall isn't implemented.
cmake -DIMAGE_SUPPORT=OFF -DHAVE_MEMFD_CREATE=OFF -DCMAKE_BUILD_TYPE=Release .. && \
make -j$(nproc) && \
make package && \
apt -y install ./hsa-rocr-dev*.deb && \
# Cleanup
cd / && \
rm -rf /ROCR-Runtime
WORKDIR /ROCR-Runtime/src/build
# need MEMFD_CREATE=OFF as MEMFD_CREATE syscall isn't implemented
RUN cmake -DIMAGE_SUPPORT=OFF -DHAVE_MEMFD_CREATE=OFF \
-DCMAKE_BUILD_TYPE=Release .. && make -j$(nproc) && make package
RUN apt -y install ./hsa-rocr-dev*.deb
WORKDIR /
# This giant run encapsulates the cloning, building, installation of HIP,
# ROCm-OpenCL and ROCclr. These all depend upon one another's sources and build
# files, ergo they are all pulled, built, installed in a single RUN command.
# Once all three are installed their sources and build files are deleted.
RUN git clone -b rocm-4.0.0 --depth=1 \
https://github.com/ROCm-Developer-Tools/HIP.git && \
git clone -b rocm-4.0.0 --depth=1 \
https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime.git && \
git clone -b rocm-4.0.0 --depth=1 \
https://github.com/ROCm-Developer-Tools/ROCclr.git && \
# Get ROCclr dependencies
apt -y update && \
apt -y install llvm-amdgpu libncurses5 libtinfo-dev rocm-device-libs \
comgr && \
mkdir -p ROCclr/build && \
cd ROCclr && \
# The patch allows us to avoid building blit kernels on-the-fly in gem5
wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | \
git apply -v && \
# Build and install ROCclr.
cd /ROCclr/build && \
cmake \
-DOPENCL_DIR="/ROCm-OpenCL-Runtime" \
-DCMAKE_BUILD_TYPE=Release .. && \
make -j$(nproc) && \
make install && \
# We apply a patch to avoid a linking error -- "multiple definition of
# 'ret_val'".
# Issue here: https://github.com/ROCm/ROCm-OpenCL-Runtime/issues/113
cd /ROCm-OpenCL-Runtime/khronos/icd && \
wget -q -O - https://github.com/KhronosGroup/OpenCL-ICD-Loader/pull/101/commits/319ba95eb08aa7c622efae50cb62c7cd7de14c1b.patch | \
patch -p1 && \
# Build and install ROCm OpenCL.
cd /ROCm-OpenCL-Runtime && \
mkdir build && \
cd build && \
cmake \
-DUSE_COMGR_LIBRARY=ON \
-DCMAKE_PREFIX_PATH="/opt/rocm" \
-DCMAKE_BUILD_TYPE=Release .. && \
make -j$(nproc) && \
make package && \
apt -y install ./rocm-opencl-2.0.0-amd64.deb \
./rocm-opencl-dev-2.0.0-amd64.deb && \
# Build and install HIP.
mkdir -p /HIP/build && \
cd /HIP/build && \
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DHSA_PATH=/usr/hsa \
-DHIP_COMPILER=clang \
-DHIP_PLATFORM=rocclr \
-DCMAKE_PREFIX_PATH="/opt/rocm" .. && \
make -j$(nproc) && \
make package && \
apt -y install ./hip-base*.deb ./hip-rocclr*.deb && \
cp -r /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* \
/opt/rocm/hip/lib/cmake/hip/ && \
# Cleanup.
cd / && \
rm -rf /HIP /ROCclr /ROCm-OpenCL-Runtime
# Dependencies for ROCclr
RUN apt -y install llvm-amdgpu libncurses5 libtinfo-dev rocm-device-libs comgr
RUN git clone -b rocm-4.0.0 \
https://github.com/ROCm-Developer-Tools/ROCclr.git && \
mkdir -p ROCclr/build
RUN git clone -b rocm-4.0.0 \
https://github.com/RadeonOpenCompute/ROCm-OpenCL-Runtime.git && \
mkdir -p ROCm-OpenCL-Runtime/build
WORKDIR /ROCclr
# The patch allows us to avoid building blit kernels on-the-fly in gem5
RUN wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | git apply -v
WORKDIR /ROCclr/build
RUN cmake -DOPENCL_DIR="/ROCm-OpenCL-Runtime" \
-DCMAKE_BUILD_TYPE=Release .. && \
make -j$(nproc) && make install
WORKDIR /
WORKDIR ROCm-OpenCL-Runtime/build
RUN cmake -DUSE_COMGR_LIBRARY=ON -DCMAKE_PREFIX_PATH="/opt/rocm" \
-DCMAKE_BUILD_TYPE=Release .. && \
make -j$(nproc) && make package
RUN apt -y install ./rocm-opencl-2.0.0-amd64.deb ./rocm-opencl-dev-2.0.0-amd64.deb
WORKDIR /
RUN git clone -b rocm-4.0.0 \
https://github.com/ROCm-Developer-Tools/HIP.git && mkdir -p HIP/build
WORKDIR HIP/build
RUN cmake -DCMAKE_BUILD_TYPE=Release -DHSA_PATH=/usr/hsa \
-DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr -DCMAKE_PREFIX_PATH="/opt/rocm"\
.. && make -j$(nproc) && make package
RUN apt -y install ./hip-base*.deb ./hip-rocclr*.deb
# These files here are needed but don't get installed through the .deb file,
# even though they seem to be included in the packaging, so symlink them
RUN ln -s /HIP/build/rocclr/CMakeFiles/Export/_opt/rocm/hip/lib/cmake/hip/* /opt/rocm/hip/lib/cmake/hip/
WORKDIR /
# rocBLAS downloads the most recent rocm-cmake if it isn't installed before
# building
RUN apt install rocm-cmake
RUN git clone -b rocm-4.0.0 \
https://github.com/ROCmSoftwarePlatform/rocBLAS.git && mkdir rocBLAS/build
ENV HCC_AMDGPU_TARGET=gfx900,gfx902
WORKDIR rocBLAS
# Clone, build, and install rocBLAS.
# Sources and build files are deleted after the install.
# rocBLAS needs to be built from source otherwise certain gfx versions get errors in HIP
# about there being no GPU binary available
RUN ./install.sh -d -i
WORKDIR /
ENV HCC_AMDGPU_TARGET=gfx900,gfx902
RUN git clone -b rocm-4.0.0 --depth=1 \
https://github.com/ROCmSoftwarePlatform/rocBLAS.git && \
# rocBLAS downloads the most recent rocm-cmake if it isn't installed before
# building
apt -y update && \
apt -y install rocm-cmake && \
mkdir rocBLAS/build && \
cd rocBLAS && \
./install.sh -d -i && \
cd / && \
rm -rf rocBLAS
# MIOpen dependencies + MIOpen
RUN apt install rocm-clang-ocl miopen-hip
RUN apt -y update && \
apt -y install rocm-clang-ocl miopen-hip
# Clone MIOpen repo so that we have the kernel sources available
RUN git clone -b rocm-4.0.1 https://github.com/ROCmSoftwarePlatform/MIOpen.git
RUN git clone -b rocm-4.0.1 --depth=1 \
https://github.com/ROCmSoftwarePlatform/MIOpen.git
# Make the MIOpen cache dir ahead of time and symlink for easier access
# when linking in the database file
RUN mkdir -p /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 && \
ln -s /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 /root/.cache/miopen/2.9.0
RUN mkdir -p /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314
RUN ln -s /root/.cache/miopen/2.9.0.8252-rocm-rel-4.0-26-64506314 \
/root/.cache/miopen/2.9.0
# Add commands from halofinder Dockerfile
RUN apt-get update && apt-get -y install libopenmpi-dev libomp-dev
RUN apt -y update && \
apt -y install libopenmpi-dev libomp-dev
ENV HIPCC_BIN=/opt/rocm/bin
ENV MPI_INCLUDE=/usr/lib/x86_64-linux-gnu/openmpi/include