From 6558821e2d15d177507ab56c629679c8edfc06ed Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 17 Jul 2024 15:23:54 -0700 Subject: [PATCH] arch-vega: Add SDWAB for v_cmp_{eq,ne}_u16 This shows an example of how to use the previous commit which adds an SDWAB helper. The execute() method of both are the same with the exception of the lambda function passed to the helper method. Change-Id: I5ffe361440b4020b9f7669c0ed946aa6b3bbec25 --- src/arch/amdgpu/vega/insts/vopc.cc | 36 ++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc index 2c386fec74..0e1fb04f75 100644 --- a/src/arch/amdgpu/vega/insts/vopc.cc +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -3782,13 +3782,21 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } + auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; }; - vcc.write(); + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } + } + + vcc.write(); + } } // execute // --- Inst_VOPC__V_CMP_LE_U16 class methods --- @@ -3881,10 +3889,20 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; }; + + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } } + + vcc.write(); } vcc.write();