gpu-compute: Add support for skipping GPU kernels (#940)

gpu-compute: Add support for skipping GPU kernels

This commit adds two new command-line options:

--skip-until-gpu-kernel N
Skips (non-blit) GPU kernels until the target kernel is reached.
Execution continues normally from there. Blit kernels are not skipped
because they are responsible for copying the kernel code and metadata
for the non-blit kernels. Note that skipping kernels can impact
correctness; this feature is only useful if the kernel of interest has
no data-dependent behavior, or its data-dependent behavior is not based
on data generated by the skipped kernels.

--exit-after-gpu-kernel N
Ends the simulation after completing (non-blit) GPU kernel N.

This commit also renames two existing command-line options:
--debug-at-gpu-kernel -> --debug-at-gpu-task
--exit-at-gpu-kernel  -> --exit-at-gpu-task

These were renamed because they count GPU tasks, which include both
kernels launched by the application as well as blit kernels.

Change-Id: If250b3fd2db05c1222e369e9e3f779c4422074bc
This commit is contained in:
Michael Boyer
2024-03-21 07:46:27 -07:00
committed by GitHub
parent ba2f5615ba
commit acd9d3ff94
8 changed files with 111 additions and 20 deletions

View File

@@ -36,6 +36,7 @@
#include "arch/amdgpu/vega/pagetable_walker.hh"
#include "base/chunk_generator.hh"
#include "debug/GPUCommandProc.hh"
#include "debug/GPUDisp.hh"
#include "debug/GPUInitAbi.hh"
#include "debug/GPUKernelInfo.hh"
#include "dev/amdgpu/amdgpu_device.hh"
@@ -48,6 +49,7 @@
#include "sim/full_system.hh"
#include "sim/process.hh"
#include "sim/proxy_ptr.hh"
#include "sim/sim_exit.hh"
#include "sim/syscall_emul_buf.hh"
namespace gem5
@@ -55,7 +57,8 @@ namespace gem5
GPUCommandProcessor::GPUCommandProcessor(const Params &p)
: DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
walker(p.walker), hsaPP(p.hsapp)
walker(p.walker), hsaPP(p.hsapp),
target_non_blit_kernel_id(p.target_non_blit_kernel_id)
{
assert(hsaPP);
hsaPP->setDevice(this);
@@ -259,10 +262,13 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
* APUs to implement asynchronous memcopy operations from 2 pointers in
* host memory. I have no idea what BLIT stands for.
* */
bool is_blit_kernel;
if (!disp_pkt->completion_signal) {
kernel_name = "Some kernel";
is_blit_kernel = false;
} else {
kernel_name = "Blit kernel";
is_blit_kernel = true;
}
DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
@@ -273,6 +279,38 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
dynamic_task_id, raw_pkt, akc, host_pkt_addr, machine_code_addr,
gfxVersion);
// The driver expects the start time to be in ns
Tick start_ts = curTick() / sim_clock::as_int::ns;
dispatchStartTime.insert({disp_pkt->completion_signal, start_ts});
// Potentially skip a non-blit kernel
if (!is_blit_kernel && (non_blit_kernel_id < target_non_blit_kernel_id)) {
DPRINTF(GPUCommandProc, "Skipping non-blit kernel %i (Task ID: %i)\n",
non_blit_kernel_id, dynamic_task_id);
// Notify the HSA PP that this kernel is complete
hsaPacketProc().finishPkt(task->dispPktPtr(), task->queueId());
if (task->completionSignal()) {
DPRINTF(GPUDisp, "HSA AQL Kernel Complete with completion "
"signal! Addr: %d\n", task->completionSignal());
sendCompletionSignal(task->completionSignal());
} else {
DPRINTF(GPUDisp, "HSA AQL Kernel Complete! No completion "
"signal\n");
}
++dynamic_task_id;
++non_blit_kernel_id;
delete akc;
// Notify the run script that a kernel has been skipped
exitSimLoop("Skipping GPU Kernel");
return;
}
DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
"grid size (%dx%dx%d) kernarg addr: %#x, completion "
"signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
@@ -288,10 +326,7 @@ GPUCommandProcessor::dispatchKernelObject(AMDKernelCode *akc, void *raw_pkt,
initABI(task);
++dynamic_task_id;
// The driver expects the start time to be in ns
Tick start_ts = curTick() / sim_clock::as_int::ns;
dispatchStartTime.insert({disp_pkt->completion_signal, start_ts});
if (!is_blit_kernel) ++non_blit_kernel_id;
delete akc;
}