diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index 9dcc1187f3..fed155bc44 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -140,17 +140,35 @@ def addRunFSOptions(parser): ) parser.add_argument( - "--debug-at-gpu-kernel", + "--debug-at-gpu-task", type=int, default=-1, - help="Turn on debug flags starting with this kernel", + help="Turn on debug flags starting with this task (counting both blit" + " and non-blit kernels)", ) parser.add_argument( - "--exit-at-gpu-kernel", + "--exit-at-gpu-task", type=int, default=-1, - help="Exit simulation after running this many kernels", + help="Exit simulation after running this many tasks (counting both " + "blit and non-blit kernels)", + ) + + parser.add_argument( + "--exit-after-gpu-kernel", + type=int, + default=-1, + help="Exit simulation after completing this (non-blit) kernel", + ) + + parser.add_argument( + "--skip-until-gpu-kernel", + type=int, + default=0, + help="Skip (non-blit) kernels until reaching this kernel. Note that " + "this can impact correctness (the skipped kernels are completely " + "skipped, not fast forwarded)", ) parser.add_argument( @@ -230,8 +248,9 @@ def runGpuFSSystem(args): print("Running the simulation") sim_ticks = args.abs_max_tick - kernels_launched = 0 - if args.debug_at_gpu_kernel != -1: + kernels_completed = 0 + tasks_completed = 0 + if args.debug_at_gpu_task != -1: m5.trace.disable() exit_event = m5.simulate(sim_ticks) @@ -249,16 +268,27 @@ def runGpuFSSystem(args): m5.checkpoint(args.checkpoint_dir) break elif "GPU Kernel Completed" in exit_event.getCause(): - kernels_launched += 1 + if kernels_completed == args.exit_after_gpu_kernel: + print(f"Exiting after GPU kernel {kernels_completed}") + break + kernels_completed += 1 + tasks_completed += 1 + elif "GPU Blit Kernel Completed" in exit_event.getCause(): + tasks_completed += 1 + elif "Skipping GPU Kernel" in exit_event.getCause(): + print(f"Skipping GPU kernel {kernels_completed}") + kernels_completed += 1 + tasks_completed += 1 else: print( f"Unknown exit event: {exit_event.getCause()}. Continuing..." ) - if kernels_launched == args.debug_at_gpu_kernel: + if tasks_completed == args.debug_at_gpu_task: + print(f"Enabling debug flags @ GPU task {tasks_completed}") m5.trace.enable() - if kernels_launched == args.exit_at_gpu_kernel: - print(f"Exiting @ GPU kernel {kernels_launched}") + if tasks_completed == args.exit_at_gpu_task: + print(f"Exiting @ GPU task {tasks_completed}") break exit_event = m5.simulate(sim_ticks - m5.curTick()) diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 2803e10fb4..671d4efdc9 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -115,11 +115,19 @@ def makeGpuFSSystem(args): numHWQueues=args.num_hw_queues, walker=hsapp_pt_walker, ) - dispatcher_exit_events = True if args.exit_at_gpu_kernel > -1 else False + dispatcher_exit_events = False + if args.exit_at_gpu_task > -1: + dispatcher_exit_events = True + if args.exit_after_gpu_kernel > -1: + dispatcher_exit_events = True dispatcher = GPUDispatcher(kernel_exit_events=dispatcher_exit_events) cp_pt_walker = VegaPagetableWalker() + target_kernel = args.skip_until_gpu_kernel gpu_cmd_proc = GPUCommandProcessor( - hsapp=gpu_hsapp, dispatcher=dispatcher, walker=cp_pt_walker + hsapp=gpu_hsapp, + dispatcher=dispatcher, + walker=cp_pt_walker, + target_non_blit_kernel_id=target_kernel, ) shader.dispatcher = dispatcher shader.gpu_cmd_proc = gpu_cmd_proc diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index b9a13dc85b..41ff9e7893 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -360,6 +360,10 @@ class GPUCommandProcessor(DmaVirtDevice): walker = Param.VegaPagetableWalker( VegaPagetableWalker(), "Page table walker" ) + target_non_blit_kernel_id = Param.Int( + 0, + "Skip kernels until reaching this kernel (counting only non-blit kernels)", + ) class StorageClassType(Enum): diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc index 8a72fd73f4..e3cb53a342 100644 --- a/src/gpu-compute/dispatcher.cc +++ b/src/gpu-compute/dispatcher.cc @@ -324,7 +324,7 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf) DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id); if (kernelExitEvents) { - shader->requestKernelExitEvent(); + shader->requestKernelExitEvent(task->completionSignal()); } } diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index 3737f8a6ff..5093cc4ff2 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -36,6 +36,7 @@ #include "arch/amdgpu/vega/pagetable_walker.hh" #include "base/chunk_generator.hh" #include "debug/GPUCommandProc.hh" +#include "debug/GPUDisp.hh" #include "debug/GPUInitAbi.hh" #include "debug/GPUKernelInfo.hh" #include "dev/amdgpu/amdgpu_device.hh" @@ -48,6 +49,7 @@ #include "sim/full_system.hh" #include "sim/process.hh" #include "sim/proxy_ptr.hh" +#include "sim/sim_exit.hh" #include "sim/syscall_emul_buf.hh" namespace gem5 @@ -55,7 +57,8 @@ namespace gem5 GPUCommandProcessor::GPUCommandProcessor(const Params &p) : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr), - walker(p.walker), hsaPP(p.hsapp) + walker(p.walker), hsaPP(p.hsapp), + target_non_blit_kernel_id(p.target_non_blit_kernel_id) { assert(hsaPP); hsaPP->setDevice(this); @@ -259,10 +262,13 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, * APUs to implement asynchronous memcopy operations from 2 pointers in * host memory. I have no idea what BLIT stands for. * */ + bool is_blit_kernel; if (!disp_pkt->completion_signal) { kernel_name = "Some kernel"; + is_blit_kernel = false; } else { kernel_name = "Blit kernel"; + is_blit_kernel = true; } DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str()); @@ -273,6 +279,38 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, dynamic_task_id, raw_pkt, akc, host_pkt_addr, machine_code_addr, gfxVersion); + // The driver expects the start time to be in ns + Tick start_ts = curTick() / sim_clock::as_int::ns; + dispatchStartTime.insert({disp_pkt->completion_signal, start_ts}); + + // Potentially skip a non-blit kernel + if (!is_blit_kernel && (non_blit_kernel_id < target_non_blit_kernel_id)) { + DPRINTF(GPUCommandProc, "Skipping non-blit kernel %i (Task ID: %i)\n", + non_blit_kernel_id, dynamic_task_id); + + // Notify the HSA PP that this kernel is complete + hsaPacketProc().finishPkt(task->dispPktPtr(), task->queueId()); + if (task->completionSignal()) { + DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion " + "signal! Addr: %d\n", task->completionSignal()); + + sendCompletionSignal(task->completionSignal()); + } else { + DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion " + "signal\n"); + } + + ++dynamic_task_id; + ++non_blit_kernel_id; + + delete akc; + + // Notify the run script that a kernel has been skipped + exitSimLoop("Skipping GPU Kernel"); + + return; + } + DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), " "grid size (%dx%dx%d) kernarg addr: %#x, completion " "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x, @@ -288,10 +326,7 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt, initABI(task); ++dynamic_task_id; - - // The driver expects the start time to be in ns - Tick start_ts = curTick() / sim_clock::as_int::ns; - dispatchStartTime.insert({disp_pkt->completion_signal, start_ts}); + if (!is_blit_kernel) ++non_blit_kernel_id; delete akc; } diff --git a/src/gpu-compute/gpu_command_processor.hh b/src/gpu-compute/gpu_command_processor.hh index ac73c179d7..38b5257334 100644 --- a/src/gpu-compute/gpu_command_processor.hh +++ b/src/gpu-compute/gpu_command_processor.hh @@ -155,6 +155,12 @@ class GPUCommandProcessor : public DmaVirtDevice // Running counter of dispatched tasks int dynamic_task_id = 0; + // Running counter of dispatched user (non-blit) kernels + int non_blit_kernel_id = 0; + + // Skip all user (non-blit) kernels until reaching this kernel + int target_non_blit_kernel_id = 0; + // Keep track of start times for task dispatches. std::unordered_map dispatchStartTime; diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index b99950568e..6e3d556026 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -543,7 +543,11 @@ Shader::notifyCuSleep() { if (kernelExitRequested) { kernelExitRequested = false; - exitSimLoop("GPU Kernel Completed"); + if (blitKernel) { + exitSimLoop("GPU Blit Kernel Completed"); + } else { + exitSimLoop("GPU Kernel Completed"); + } } } } diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh index 89541a8ff4..0287ddc169 100644 --- a/src/gpu-compute/shader.hh +++ b/src/gpu-compute/shader.hh @@ -101,6 +101,9 @@ class Shader : public ClockedObject // shader to complete before actually exiting so that stats are updated. bool kernelExitRequested = false; + // Set to true by the dispatcher if the current kernel is a blit kernel + bool blitKernel = false; + public: typedef ShaderParams Params; enum hsail_mode_e {SIMT,VECTOR_SCALAR}; @@ -321,9 +324,10 @@ class Shader : public ClockedObject } void - requestKernelExitEvent() + requestKernelExitEvent(bool is_blit_kernel) { kernelExitRequested = true; + blitKernel = is_blit_kernel; } protected: