From c644eae2ddd34cf449a9c4476730bd29703c4dd7 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 8 Jun 2023 10:50:23 -0500 Subject: [PATCH] configs,gpu-compute: Kernel dispatch-based exit events Add two kernel dispatch-based exit events that are useful for limiting the simulation and enabling debug flags at specific GPU kernels. Since the KVM CPU typically used with GPUFS is not deterministic, this help with enabling debug flags when the Tick number may vary. The exit at GPU kernel option can also limit simulation by only simulating a few hundred kernels, for example, and exit at a determined point. Change-Id: I81bae92a80c25fc38c41e999aa662e1417b7a20d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71418 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- configs/example/gpufs/runfs.py | 27 +++++++++++++++++++++++++++ src/gpu-compute/dispatcher.cc | 3 +++ 2 files changed, 30 insertions(+) diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index f8ef70d5a2..01203bbcbb 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -137,6 +137,20 @@ def addRunFSOptions(parser): "MI200 (gfx90a)", ) + parser.add_argument( + "--debug-at-gpu-kernel", + type=int, + default=-1, + help="Turn on debug flags starting with this kernel", + ) + + parser.add_argument( + "--exit-at-gpu-kernel", + type=int, + default=-1, + help="Exit simulation after running this many kernels", + ) + def runGpuFSSystem(args): """ @@ -184,6 +198,9 @@ def runGpuFSSystem(args): print("Running the simulation") sim_ticks = args.abs_max_tick + kernels_launched = 0 + if args.debug_at_gpu_kernel != -1: + m5.trace.disable() exit_event = m5.simulate(sim_ticks) @@ -199,11 +216,21 @@ def runGpuFSSystem(args): assert args.checkpoint_dir is not None m5.checkpoint(args.checkpoint_dir) break + elif "GPU Kernel Completed" in exit_event.getCause(): + kernels_launched += 1 else: print( f"Unknown exit event: {exit_event.getCause()}. Continuing..." ) + if kernels_launched == args.debug_at_gpu_kernel: + m5.trace.enable() + if kernels_launched == args.exit_at_gpu_kernel: + print(f"Exiting @ GPU kernel {kernels_launched}") + break + + exit_event = m5.simulate(sim_ticks - m5.curTick()) + print( "Exiting @ tick %i because %s" % (m5.curTick(), exit_event.getCause()) ) diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc index a76ba7c0be..b19bccca50 100644 --- a/src/gpu-compute/dispatcher.cc +++ b/src/gpu-compute/dispatcher.cc @@ -40,6 +40,7 @@ #include "gpu-compute/hsa_queue_entry.hh" #include "gpu-compute/shader.hh" #include "gpu-compute/wavefront.hh" +#include "sim/sim_exit.hh" #include "sim/syscall_emul_buf.hh" #include "sim/system.hh" @@ -330,6 +331,8 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf) DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n", curTick(), kern_id); DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id); + + exitSimLoop("GPU Kernel Completed"); } if (!tickEvent.scheduled()) {