diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc index 2c386fec74..0e1fb04f75 100644 --- a/src/arch/amdgpu/vega/insts/vopc.cc +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -3782,13 +3782,21 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } + auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; }; - vcc.write(); + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } + } + + vcc.write(); + } } // execute // --- Inst_VOPC__V_CMP_LE_U16 class methods --- @@ -3881,10 +3889,20 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; }; + + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } } + + vcc.write(); } vcc.write();