diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index dbb909f624..02b1bb174a 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -723,7 +723,6 @@ GPUCommandProcessor::sanityCheckAKC(AMDKernelCode *akc) warn_if(akc->kernarg_preload_spec_length || akc->kernarg_preload_spec_offset, "Kernarg preload not implemented\n"); - warn_if(akc->accum_offset, "ACC offset not implemented\n"); warn_if(akc->tg_split, "TG split not implemented\n"); } diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index a464e4882d..f015b091fc 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -122,6 +122,11 @@ class HSAQueueEntry } parseKernelCode(akc); + + // Offset of a first AccVGPR in the unified register file. + // Granularity 4. Value 0-63. 0 - accum-offset = 4, + // 1 - accum-offset = 8, ..., 63 - accum-offset = 256. + _accumOffset = (akc->accum_offset + 1) * 4; } const GfxVersion& @@ -394,6 +399,12 @@ class HSAQueueEntry assert(_outstandingWbs >= 0); } + unsigned + accumOffset() const + { + return _accumOffset; + } + private: void parseKernelCode(AMDKernelCode *akc) @@ -489,6 +500,8 @@ class HSAQueueEntry std::bitset initialVgprState; std::bitset initialSgprState; + + unsigned _accumOffset; }; } // namespace gem5 diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index cb8b6220e7..98d882b20e 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -430,6 +430,9 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) } } + // Save the offset to the first accumulation VGPR number from HSA task. + accumOffset = task->accumOffset(); + regInitIdx = 0; // VGPRs are initialized to the work item IDs for a given thread. There diff --git a/src/gpu-compute/wavefront.hh b/src/gpu-compute/wavefront.hh index 43ac3e9ffc..82035f7d47 100644 --- a/src/gpu-compute/wavefront.hh +++ b/src/gpu-compute/wavefront.hh @@ -131,6 +131,8 @@ class Wavefront : public SimObject uint32_t maxVgprs; // number of SGPRs required by WF uint32_t maxSgprs; + // first accumulation vgpr number + uint32_t accumOffset; void freeResources(); GPUDynInstPtr nextInstr(); void setStatus(status_e newStatus);