From 3ecd28a222d8548579220980a725e1f94c8fd915 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 12 Jan 2022 15:52:56 -0600 Subject: [PATCH] arch-vega: Update FLAT memory access helpers to support LDS This patch ports the changes from a similar patch for arch-gcn3: https://gem5-review.googlesource.com/c/public/gem5/+/48343. Vega already has an helper function to send to the correct pipe depending on the scope, however the initMem helpers currently always assume global scope. In addition the MUBUF WBINVL1 instructions are updated similarly to the GCN3 patch. Change-Id: I612b9198cb56e226721a90e72bba64395c84ebcd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/55465 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 16 ++++- src/arch/amdgpu/vega/insts/op_encodings.hh | 82 ++++++++++++++++++++-- 2 files changed, 91 insertions(+), 7 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index bd7ef7051e..32d048e327 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -39848,7 +39848,13 @@ namespace VegaISA gpuDynInst->execUnitId = wf->execUnitId; gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } } // execute void @@ -39901,7 +39907,13 @@ namespace VegaISA gpuDynInst->execUnitId = wf->execUnitId; gpuDynInst->latency.init(gpuDynInst->computeUnit()); gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } } // execute void Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index da16dbdb4b..a6dc58c0d0 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -800,35 +800,107 @@ namespace VegaISA void initMemRead(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane] + = wf->ldsChunk->read(vaddr); + } + } + } } template void initMemRead(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::ReadReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + for (int i = 0; i < N; ++i) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * N + i] + = wf->ldsChunk->read( + vaddr + i*sizeof(VecElemU32)); + } + } + } + } } template void initMemWrite(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + wf->ldsChunk->write(vaddr, + (reinterpret_cast(gpuDynInst->d_data))[lane]); + } + } + } } template void initMemWrite(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::WriteReq); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + for (int i = 0; i < N; ++i) { + wf->ldsChunk->write( + vaddr + i*sizeof(VecElemU32), + (reinterpret_cast( + gpuDynInst->d_data))[lane * N + i]); + } + } + } + } } template void initAtomicAccess(GPUDynInstPtr gpuDynInst) { - initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); + } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { + Wavefront *wf = gpuDynInst->wavefront(); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + Addr vaddr = gpuDynInst->addr[lane]; + auto amo_op = + gpuDynInst->makeAtomicOpFunctor( + &(reinterpret_cast( + gpuDynInst->a_data))[lane], + &(reinterpret_cast( + gpuDynInst->x_data))[lane]); + + T tmp = wf->ldsChunk->read(vaddr); + (*amo_op)(reinterpret_cast(&tmp)); + wf->ldsChunk->write(vaddr, tmp); + (reinterpret_cast(gpuDynInst->d_data))[lane] = tmp; + } + } + } } void