diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc index 7d3d707c56..e7bea7c33b 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/decoder.cc @@ -1679,9 +1679,9 @@ namespace VegaISA &Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC, &Decoder::decode_invalid, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_F64, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_MIN_F64, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_MAX_F64, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -1808,11 +1808,11 @@ namespace VegaISA &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR, &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_INC, &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -8441,6 +8441,24 @@ namespace VegaISA return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT); } // decode_OP_FLAT__FLAT_ATOMIC_DEC + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_ADD_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_ADD_F64 + + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_MIN_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MIN_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_MIN_F64 + + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_MAX_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MAX_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_MAX_F64 + GPUStaticInst* Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst iFmt) { @@ -8738,6 +8756,40 @@ namespace VegaISA return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT); } + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32(MachInst iFmt) + { + // Note: There is no flat_atomic_add_f32 as of MI200. However, gem5 + // impelements all global and scratch instructions as Inst_FLAT. + return new Inst_FLAT__FLAT_ATOMIC_ADD_F32(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_PK_ADD_F16(MachInst iFmt) + { + // Note: There is no flat_atomic_pk_add_f16 as of MI200. However, gem5 + // impelements all global and scratch instructions as Inst_FLAT. + return new Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_ADD_F64(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MIN_F64(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MAX_F64(&iFmt->iFmt_FLAT); + } + GPUStaticInst* Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP_X2(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index 11858f0375..4ebb95e5f4 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -698,6 +698,9 @@ namespace VegaISA GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_XOR(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_INC(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_DEC(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD_F64(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_MIN_F64(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_MAX_F64(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD_X2(MachInst); @@ -746,6 +749,11 @@ namespace VegaISA GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_INC(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_PK_ADD_F16(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP_X2(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_CMPSWAP_X2(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_X2(MachInst); diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index f0472835dd..9104f29228 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -44782,34 +44782,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -44821,18 +44794,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- @@ -44864,38 +44826,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); - - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -44907,18 +44838,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods --- @@ -44946,34 +44866,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -44985,18 +44878,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods --- @@ -45024,8 +44906,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods --- Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) @@ -45052,55 +44946,19 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) { - initAtomicAccess(gpuDynInst); + initAtomicAccess(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- @@ -45128,8 +44986,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods --- Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) @@ -45156,55 +45026,19 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) { - initAtomicAccess(gpuDynInst); + initAtomicAccess(gpuDynInst); } // initiateAcc void Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- @@ -45232,8 +45066,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_AND class methods --- Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) @@ -45260,8 +45106,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_OR class methods --- Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) @@ -45288,32 +45146,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -45325,18 +45158,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- @@ -45365,8 +45187,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_INC class methods --- Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) @@ -45393,8 +45227,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods --- Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) @@ -45421,8 +45267,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( @@ -45450,8 +45308,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( @@ -45481,38 +45351,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); - - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -45524,18 +45363,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods --- @@ -45564,34 +45392,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); + atomicExecute(gpuDynInst); } // execute void @@ -45603,19 +45404,7 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } + atomicComplete(gpuDynInst); } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods --- @@ -45644,8 +45433,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( @@ -45673,8 +45474,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( @@ -45702,8 +45515,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( @@ -45731,8 +45556,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( @@ -45760,8 +45597,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( @@ -45789,8 +45638,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( @@ -45818,8 +45679,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( @@ -45847,8 +45720,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( @@ -45876,8 +45761,20 @@ namespace VegaISA void Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) { - panicUnimplemented(); + atomicExecute(gpuDynInst); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods --- Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( @@ -45905,9 +45802,199 @@ namespace VegaISA // RETURN_DATA[0:1] = tmp. void Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::Inst_FLAT__FLAT_ATOMIC_ADD_F32( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f32") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::~Inst_FLAT__FLAT_ATOMIC_ADD_F32() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods --- + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16() + { + } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::execute(GPUDynInstPtr gpuDynInst) { panicUnimplemented(); } // execute + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::Inst_FLAT__FLAT_ATOMIC_ADD_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f64") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::~Inst_FLAT__FLAT_ATOMIC_ADD_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::Inst_FLAT__FLAT_ATOMIC_MIN_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_min_f64") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::~Inst_FLAT__FLAT_ATOMIC_MIN_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::Inst_FLAT__FLAT_ATOMIC_MAX_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_max_f64") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::~Inst_FLAT__FLAT_ATOMIC_MAX_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc // --- Inst_VOP3P__V_PK_MOV_B32 class methods --- Inst_VOP3P__V_PK_MOV_B32::Inst_VOP3P__V_PK_MOV_B32(InFmt_VOP3P *iFmt) diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index c41569f193..c65f4c1609 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -42580,6 +42580,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SUB class Inst_FLAT__FLAT_ATOMIC_SMIN : public Inst_FLAT @@ -42656,6 +42658,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMIN class Inst_FLAT__FLAT_ATOMIC_SMAX : public Inst_FLAT @@ -42732,6 +42736,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMAX class Inst_FLAT__FLAT_ATOMIC_AND : public Inst_FLAT @@ -42769,6 +42775,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_AND class Inst_FLAT__FLAT_ATOMIC_OR : public Inst_FLAT @@ -42845,6 +42853,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_XOR class Inst_FLAT__FLAT_ATOMIC_INC : public Inst_FLAT @@ -42882,6 +42892,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_INC class Inst_FLAT__FLAT_ATOMIC_DEC : public Inst_FLAT @@ -42919,6 +42931,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_DEC class Inst_FLAT__FLAT_ATOMIC_SWAP_X2 : public Inst_FLAT @@ -42956,6 +42970,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 : public Inst_FLAT @@ -43071,6 +43087,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SUB_X2 class Inst_FLAT__FLAT_ATOMIC_SMIN_X2 : public Inst_FLAT @@ -43108,6 +43126,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class Inst_FLAT__FLAT_ATOMIC_UMIN_X2 : public Inst_FLAT @@ -43145,6 +43165,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class Inst_FLAT__FLAT_ATOMIC_SMAX_X2 : public Inst_FLAT @@ -43182,6 +43204,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class Inst_FLAT__FLAT_ATOMIC_UMAX_X2 : public Inst_FLAT @@ -43219,6 +43243,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class Inst_FLAT__FLAT_ATOMIC_AND_X2 : public Inst_FLAT @@ -43256,6 +43282,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_AND_X2 class Inst_FLAT__FLAT_ATOMIC_OR_X2 : public Inst_FLAT @@ -43293,6 +43321,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_OR_X2 class Inst_FLAT__FLAT_ATOMIC_XOR_X2 : public Inst_FLAT @@ -43330,6 +43360,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_XOR_X2 class Inst_FLAT__FLAT_ATOMIC_INC_X2 : public Inst_FLAT @@ -43367,6 +43399,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_INC_X2 class Inst_FLAT__FLAT_ATOMIC_DEC_X2 : public Inst_FLAT @@ -43404,8 +43438,205 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_DEC_X2 + class Inst_FLAT__FLAT_ATOMIC_ADD_F32 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_ADD_F32(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_ADD_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 4; + case 2: //vgpr_dst or saddr + return isFlat() ? 4 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + class Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 4; + case 2: //vgpr_dst or saddr + return isFlat() ? 4 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + class Inst_FLAT__FLAT_ATOMIC_ADD_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_ADD_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_ADD_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + class Inst_FLAT__FLAT_ATOMIC_MIN_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_MIN_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_MIN_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + class Inst_FLAT__FLAT_ATOMIC_MAX_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_MAX_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_MAX_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_MAX_F64 + class Inst_VOP3P__V_PK_MOV_B32 : public Inst_VOP3P { public: diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index bf4ee2b0c9..d980eb90bc 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -1256,6 +1256,77 @@ namespace VegaISA } } + // Execute for atomics is identical besides the flag set in the + // constructor, except cmpswap. For cmpswap, the offset to the "cmp" + // register is needed. For all other operations this offset is zero + // and implies the atomic is not a cmpswap. + // RegT defines the type of GPU register (e.g., ConstVecOperandU32) + // LaneT defines the type of the register elements (e.g., VecElemU32) + template + void + atomicExecute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + RegT data(gpuDynInst, extData.DATA); + RegT cmp(gpuDynInst, extData.DATA + CmpRegOffset); + + data.read(); + if constexpr (CmpRegOffset) { + cmp.read(); + } + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if constexpr (CmpRegOffset) { + (reinterpret_cast( + gpuDynInst->x_data))[lane] = data[lane]; + (reinterpret_cast( + gpuDynInst->a_data))[lane] = cmp[lane]; + } else { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + } + + issueRequestHelper(gpuDynInst); + } + + // RegT defines the type of GPU register (e.g., ConstVecOperandU32) + // LaneT defines the type of the register elements (e.g., VecElemU32) + template + void + atomicComplete(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + RegT vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } + bool vgprIsOffset() {