diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index 510d6d924b..74b6abee62 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -40585,8 +40585,87 @@ namespace VegaISA void Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 src(gpuDynInst, extData.VDATA); + ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1); + + rsrcDesc.read(); + offset.read(); + src.read(); + cmp.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->x_data))[lane] + = src[lane]; + (reinterpret_cast(gpuDynInst->a_data))[lane] + = cmp[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); } // execute + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods --- Inst_MUBUF__BUFFER_ATOMIC_ADD diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 0e4ec04764..ca349c365f 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -37220,6 +37220,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class Inst_MUBUF__BUFFER_ATOMIC_ADD : public Inst_MUBUF diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 613d78b25e..a1c5e99c91 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -713,6 +713,19 @@ namespace VegaISA gpuDynInst->exec_mask = old_exec_mask; } + template + void + initAtomicAccess(GPUDynInstPtr gpuDynInst) + { + // temporarily modify exec_mask to supress memory accesses to oob + // regions. Only issue memory requests for lanes that have their + // exec_mask set and are not out of bounds. + VectorMask old_exec_mask = gpuDynInst->exec_mask; + gpuDynInst->exec_mask &= ~oobMask; + initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); + gpuDynInst->exec_mask = old_exec_mask; + } + void injectGlobalMemFence(GPUDynInstPtr gpuDynInst) {