From d7516a26dc004892ce03c9784222da5944ea2489 Mon Sep 17 00:00:00 2001 From: Alexandru Dutu Date: Fri, 7 Oct 2022 17:11:35 -0700 Subject: [PATCH] arch-vega: Implementing global_atomic_or Change-Id: I13065186313ca784054956e1165b1b2fd8ce4a19 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64511 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 54 +++++++++++++++++++++- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index f019dfd75e..987474fbfb 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -45112,8 +45112,60 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index dc2ee08f08..ddf228a76a 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -42800,6 +42800,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_OR class Inst_FLAT__FLAT_ATOMIC_XOR : public Inst_FLAT