From 61648352301f98360b613619ba7bb64dba52b9bf Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 15 May 2024 17:00:37 -0700 Subject: [PATCH] configs: GPUFS: MI300X Add a config capable of simulating MI300X ISA (gfx942). This is similar to the mi200.py config and uses the same scripts followed by some tuneable parameters. This config optionally lets the user call the runMI300GPU function with gem5 resources. This allows for something like the following before a VIPER stdlib python is available: ``` import mi300 from gem5.resources.resource import obtain_resource disk = obtain_resource("x86-gpu-fs-img") kernel = obtain_resource("x86-linux-kernel-5.4.0-105-generic") app = obtain_resource("square-gpu-test") mi300.runMI300GPUFS("X86KvmCPU", disk, kernel, app) ``` Tested cold boot config, checkpoint create and restore, and using gem5 resources. Change-Id: I50a13d7a3d207786b779bf7fd47a5645256b1e6a --- configs/example/gpufs/mi300.py | 172 +++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 configs/example/gpufs/mi300.py diff --git a/configs/example/gpufs/mi300.py b/configs/example/gpufs/mi300.py new file mode 100644 index 0000000000..9e0e0da622 --- /dev/null +++ b/configs/example/gpufs/mi300.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +""" This file creates an X86 system with a KVM CPU and GPU device capable of +running the MI300 ISA (gfx942). Most of this file sets up a runscript which +will load in a binary, shell script, or python file from the host and run that +within gem5. Jump to line 146 for list of system parameters to configure. +""" + +import argparse +import base64 +import os +import sys +import tempfile +from typing import Optional + +import runfs +from amd import AmdGPUOptions +from common import ( + GPUTLBOptions, + Options, +) +from ruby import Ruby + +import m5 + +from gem5.resources.resource import AbstractResource + +demo_runscript_without_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx942 +export HSA_OVERRIDE_GFX_VERSION="9.4.2" +dmesg -n8 +cat /proc/cpuinfo +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +./myapp {} +/sbin/m5 exit +""" + +demo_runscript_with_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx942 +export HSA_OVERRIDE_GFX_VERSION="9.4.2" +dmesg -n8 +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +/sbin/m5 checkpoint +./myapp {} +/sbin/m5 exit +""" + + +def addDemoOptions(parser): + parser.add_argument( + "-a", "--app", default=None, help="GPU application to run" + ) + parser.add_argument( + "-o", "--opts", default="", help="GPU application arguments" + ) + + +def runMI300GPUFS( + cpu_type, + disk: Optional[AbstractResource] = None, + kernel: Optional[AbstractResource] = None, + app: Optional[AbstractResource] = None, +): + parser = argparse.ArgumentParser() + runfs.addRunFSOptions(parser) + Options.addCommonOptions(parser) + AmdGPUOptions.addAmdGPUOptions(parser) + Ruby.define_options(parser) + GPUTLBOptions.tlb_options(parser) + addDemoOptions(parser) + + # Parse now so we can override options + args = parser.parse_args() + demo_runscript = "" + + if disk != None: + args.disk_image = disk.get_local_path() + if kernel != None: + args.kernel = kernel.get_local_path() + if app != None: + args.app = app.get_local_path() + + # Create temp script to run application + if not os.path.isfile(args.app): + print("Could not find applcation", args.app) + sys.exit(1) + + # Choose runscript Based on whether any checkpointing args are set + if args.checkpoint_dir is not None: + demo_runscript = demo_runscript_with_checkpoint + else: + demo_runscript = demo_runscript_without_checkpoint + + with open(os.path.abspath(args.app), "rb") as binfile: + encodedBin = base64.b64encode(binfile.read()).decode() + + _, tempRunscript = tempfile.mkstemp() + with open(tempRunscript, "w") as b64file: + runscriptStr = demo_runscript.format( + args.app, args.opts, encodedBin, args.opts + ) + b64file.write(runscriptStr) + + args.script = tempRunscript + + # Defaults for CPU + args.cpu_type = "X86KvmCPU" + args.mem_size = "8GB" + + # Defaults for MI300X + args.gpu_device = "MI300X" + args.dgpu_mem_size = "16GB" # GPU memory size, must be 16GB currently. + + # See: https://rocm.docs.amd.com/en/latest/conceptual/gpu-arch/mi300.html + # Topology for one XCD. Number of CUs is approximately 304 / 8, rounded + # up to 40 due to gem5 restriction of 4 CUs per SQC / scalar cache. + args.num_compute_units = 40 + args.gpu_topology = "Crossbar" + + # Run gem5 + runfs.runGpuFSSystem(args) + + +if __name__ == "__m5_main__": + runMI300GPUFS("X86KvmCPU")