From 8722aef2e21620341c028e94bc1075d88ca9b989 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 20 Feb 2024 13:34:51 -0600 Subject: [PATCH] gpu-compute: Store accum_offset from code object in WF The accumulation offset is needed for some instructions. In order to access this value we need to place it somewhere instruction definitions can access. The most logical place is in the wavefront. This commit simply copies the value from the HSA task to the wavefront object. Change-Id: I44ef62ef32d2421953f096c431dd758e882245b4 --- src/gpu-compute/gpu_command_processor.cc | 1 - src/gpu-compute/hsa_queue_entry.hh | 13 +++++++++++++ src/gpu-compute/wavefront.cc | 3 +++ src/gpu-compute/wavefront.hh | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index dbb909f624..02b1bb174a 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -723,7 +723,6 @@ GPUCommandProcessor::sanityCheckAKC(AMDKernelCode *akc) warn_if(akc->kernarg_preload_spec_length || akc->kernarg_preload_spec_offset, "Kernarg preload not implemented\n"); - warn_if(akc->accum_offset, "ACC offset not implemented\n"); warn_if(akc->tg_split, "TG split not implemented\n"); } diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index a464e4882d..f015b091fc 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -122,6 +122,11 @@ class HSAQueueEntry } parseKernelCode(akc); + + // Offset of a first AccVGPR in the unified register file. + // Granularity 4. Value 0-63. 0 - accum-offset = 4, + // 1 - accum-offset = 8, ..., 63 - accum-offset = 256. + _accumOffset = (akc->accum_offset + 1) * 4; } const GfxVersion& @@ -394,6 +399,12 @@ class HSAQueueEntry assert(_outstandingWbs >= 0); } + unsigned + accumOffset() const + { + return _accumOffset; + } + private: void parseKernelCode(AMDKernelCode *akc) @@ -489,6 +500,8 @@ class HSAQueueEntry std::bitset initialVgprState; std::bitset initialSgprState; + + unsigned _accumOffset; }; } // namespace gem5 diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index cb8b6220e7..98d882b20e 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -430,6 +430,9 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) } } + // Save the offset to the first accumulation VGPR number from HSA task. + accumOffset = task->accumOffset(); + regInitIdx = 0; // VGPRs are initialized to the work item IDs for a given thread. There diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh index 43ac3e9ffc..82035f7d47 100644 --- a/src/gpu-compute/wavefront.hh +++ b/src/gpu-compute/wavefront.hh @@ -131,6 +131,8 @@ class Wavefront : public SimObject uint32_t maxVgprs; // number of SGPRs required by WF uint32_t maxSgprs; + // first accumulation vgpr number + uint32_t accumOffset; void freeResources(); GPUDynInstPtr nextInstr(); void setStatus(status_e newStatus);