From b83457df0bee2cec66bf377d17b95c867d8ef025 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Mon, 26 Dec 2022 09:54:20 -0800 Subject: [PATCH] arch-vega: Implement ds_add_u64 This instruction does an atomic add of an unsigned 64-bit data with a VGPR and value in LDS atomically without return. Change-Id: I6a7d6713b256607c4e69ddbdef5c83172493c077 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67075 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++-- src/arch/amdgpu/vega/insts/instructions.hh | 2 + 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 3d9808ac7c..a54f426837 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -36088,6 +36088,10 @@ namespace VegaISA Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) : Inst_DS(iFmt, "ds_add_u64") { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); } // Inst_DS__DS_ADD_U64 Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() @@ -36096,14 +36100,53 @@ namespace VegaISA // --- description from .arch file --- // 64b: - // tmp = MEM[ADDR]; // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. void Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc // --- Inst_DS__DS_SUB_U64 class methods --- Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 05a0002b25..f8fc98b647 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -33079,6 +33079,8 @@ namespace VegaISA } } // getOperandSize + void initiateAcc(GPUDynInstPtr gpuDynInst) override; + void completeAcc(GPUDynInstPtr gpuDynInst) override; void execute(GPUDynInstPtr) override; }; // Inst_DS__DS_ADD_U64