From 69338703e7913f04576efe2f4d0f29c822aa5079 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 17 Jul 2024 15:01:10 -0700 Subject: [PATCH 1/4] arch-vega: Implement SDWAB helper Implement a SDWAB helper which accepts a dynamic instruction and a lambda function defining a comparison function taking two values and returning a comparison result of 0 or 1 for false or true. Current instructions which implement SDWA do so on a per-instruction basis which adds a lot of redundant code. This allows for generic SDWAB implementations for VOPC instructions. All modifiers are implemented assuming that SDWBA VOPC instruction comparison types may be U32, I32, F32, U16, I16, F16 (which exist) but is extendible to I8, U8, or F8. Change-Id: Idab58a327c29dd19a1a5457237f3799a04f2031b --- src/arch/amdgpu/vega/insts/op_encodings.hh | 154 +++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 504946534f..c8ad4ef3d3 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -35,6 +35,7 @@ #include "arch/amdgpu/vega/gpu_decoder.hh" #include "arch/amdgpu/vega/gpu_mem_helpers.hh" #include "arch/amdgpu/vega/insts/gpu_static_inst.hh" +#include "arch/amdgpu/vega/insts/inst_util.hh" #include "arch/amdgpu/vega/operand.hh" #include "debug/GPUExec.hh" #include "debug/VEGA.hh" @@ -421,6 +422,159 @@ namespace VegaISA InstFormat extData; uint32_t varSize; + template + uint32_t + sdwabSelect(uint32_t dword, const SDWASelVals sel, + bool sign_ext, bool neg, bool abs) + { + // Use the gem5 bits() helper to select a sub region from the + // dword based on the select. Return a 32-bit unsigned which will + // be cast to the appropriate compare type in the lambda passed to + // sdwabHelper. + int low_bit = 0, high_bit = 0; + uint32_t rv = dword; + + if (sel < SDWA_WORD_0) { + // Selecting a sub-dword value smaller than a word (i.e., a + // byte). These values are 0-3 so multiplying by BITS_PER_BYTE + // gives the lower and upper bit easily. + low_bit = sel * VegaISA::BITS_PER_BYTE; + high_bit = low_bit + VegaISA::BITS_PER_BYTE - 1; + } else if (sel < SDWA_DWORD) { + // Selecting a sub-dword value of word size. Enum value is 4 + // or 5, so selecting the LSb and multiplying gives the lower + // and upper bit. + low_bit = (sel & 1) * VegaISA::BITS_PER_WORD; + high_bit = low_bit + VegaISA::MSB_PER_WORD - 1; + } else { + // We are selecting the whole dword. Assert that is true and + // set the bit locations for lower and upper based on dword + // size. + assert(sel == SDWA_DWORD); + low_bit = 0; + high_bit = sizeof(uint32_t) * VegaISA::BITS_PER_BYTE - 1; + } + + rv = bits(dword, high_bit, low_bit); + + uint32_t sign_bit = 1 << high_bit; + + // Panic on combinations which do not make sense. + if (std::is_integral_v && std::is_unsigned_v) { + panic_if(neg, "SWDAB negation operation on unsigned type!\n"); + panic_if(sign_ext, "SWDAB sign extend on unsigned type!\n"); + } + + // Apply ABS, then NEG, then SEXT. + if (abs) { + if (std::is_integral_v) { + // If sign is set, sign extend first then call std::abs. + if ((rv & sign_bit) && std::is_signed_v) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + rv = std::abs(static_cast(rv)) & 0xFFFFFFFF; + } + } else { + // Clear sign bit for FP types. + rv = rv & mask(high_bit); + } + } + + if (neg) { + if (std::is_integral_v) { + // If sign is set, sign extend first then call unary-. + if (rv & sign_bit) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + rv = -rv; + } + } else { + // Flip sign bit for FP types. + rv = rv ^ mask(high_bit); + } + } + + if (sign_ext) { + if (std::is_integral_v) { + if (rv & sign_bit) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + } + } else { + // It is not entirely clear what to do here. Literal + // extensions for FP operands append zeros to mantissa + // but specification does not state anything for SDWAB. + panic("SDWAB sign extend set for non-integral type!\n"); + } + } + + return rv; + } + + template + void + sdwabHelper(GPUDynInstPtr gpuDynInst, int (*cmpFunc)(T, T)) + { + DPRINTF(VEGA, "Handling %s SRC SDWA. SRC0: register %s[%d], " + "sDst s[%d], sDst type %s, SRC0_SEL: %d, SRC0_SEXT: %d " + "SRC0_NEG: %d, SRC0_ABS: %d, SRC1: register %s[%d], " + "SRC1_SEL: %d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: " + "%d\n", _opcode.c_str(), + (extData.iFmt_VOP_SDWAB.S0 ? "s" : "v"), + extData.iFmt_VOP_SDWAB.SRC0, + extData.iFmt_VOP_SDWAB.SDST, + (extData.iFmt_VOP_SDWAB.SD ? "SGPR" : "VCC"), + extData.iFmt_VOP_SDWAB.SRC0_SEL, + extData.iFmt_VOP_SDWAB.SRC0_SEXT, + extData.iFmt_VOP_SDWAB.SRC0_NEG, + extData.iFmt_VOP_SDWAB.SRC0_ABS, + (extData.iFmt_VOP_SDWAB.S1 ? "s" : "v"), + instData.VSRC1, + extData.iFmt_VOP_SDWAB.SRC1_SEL, + extData.iFmt_VOP_SDWAB.SRC1_SEXT, + extData.iFmt_VOP_SDWAB.SRC1_NEG, + extData.iFmt_VOP_SDWAB.SRC1_ABS); + + // Start with SRC0 and insert 9th bit for VGPR source (S0 == 0). + int src0_idx = extData.iFmt_VOP_SDWAB.SRC0; + src0_idx += (extData.iFmt_VOP_SDWAB.S0 == 0) ? 0x100 : 0; + + // Start with VSRC1[7:0], insert 9th bit for VGPR source (S1 == 0). + int src1_idx = instData.VSRC1; + src1_idx += (extData.iFmt_VOP_SDWAB.S1 == 0) ? 0x100 : 0; + + // SD == 0 if VCC is dest, else use SDST index. + int sdst_idx = (extData.iFmt_VOP_SDWAB.SD == 1) ? + int(extData.iFmt_VOP_SDWAB.SDST) : REG_VCC_LO; + + ConstVecOperandU32 src0(gpuDynInst, src0_idx); + ConstVecOperandU32 src1(gpuDynInst, src1_idx); + ScalarOperandU64 sdst(gpuDynInst, sdst_idx); + + // Use readSrc in case of scalar const register. + src0.readSrc(); + src1.readSrc(); + + // Select bits first, then cast to type, then apply modifiers. + const SDWASelVals src0_sel = + (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC0_SEL; + const SDWASelVals src1_sel = + (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC1_SEL; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->wavefront()->execMask(lane)) { + T a = sdwabSelect(src0[lane], src0_sel, + extData.iFmt_VOP_SDWAB.SRC0_SEXT, + extData.iFmt_VOP_SDWAB.SRC0_NEG, + extData.iFmt_VOP_SDWAB.SRC0_ABS); + T b = sdwabSelect(src1[lane], src1_sel, + extData.iFmt_VOP_SDWAB.SRC1_SEXT, + extData.iFmt_VOP_SDWAB.SRC1_NEG, + extData.iFmt_VOP_SDWAB.SRC1_ABS); + sdst.setBit(lane, cmpFunc(a, b)); + } + } + + sdst.write(); + } + private: bool hasSecondDword(InFmt_VOPC *); }; // Inst_VOPC From 6558821e2d15d177507ab56c629679c8edfc06ed Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 17 Jul 2024 15:23:54 -0700 Subject: [PATCH 2/4] arch-vega: Add SDWAB for v_cmp_{eq,ne}_u16 This shows an example of how to use the previous commit which adds an SDWAB helper. The execute() method of both are the same with the exception of the lambda function passed to the helper method. Change-Id: I5ffe361440b4020b9f7669c0ed946aa6b3bbec25 --- src/arch/amdgpu/vega/insts/vopc.cc | 36 ++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc index 2c386fec74..0e1fb04f75 100644 --- a/src/arch/amdgpu/vega/insts/vopc.cc +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -3782,13 +3782,21 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } + auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; }; - vcc.write(); + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } + } + + vcc.write(); + } } // execute // --- Inst_VOPC__V_CMP_LE_U16 class methods --- @@ -3881,10 +3889,20 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; }; + + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } } + + vcc.write(); } vcc.write(); From b75fe56da5a92d703b5f3c841435ff87fe1f2b61 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 24 Jul 2024 17:32:37 -0700 Subject: [PATCH 3/4] arch-vega: Panic unimplemented SDWA/DPP for VOP1/VOP2 Add a panic if SDWA or DPP is used for an instruction which does not implement support for it. If an application uses SDWA or DPP it likely does not operate in the same way as the base instruction and therefore gem5 should panic rather than continue. It is likely data is incorrect which will make it more difficult to debug an application. Change-Id: I68ac448b0d62941761ef4efa0169f95796270f48 --- src/arch/amdgpu/vega/insts/vop1.cc | 162 ++++++++++++++++++++++++++++- src/arch/amdgpu/vega/insts/vop2.cc | 131 +++++++++++++++++++++++ 2 files changed, 291 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index f970923951..f3744b52f2 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -80,6 +80,8 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src_dpp.read(); @@ -148,6 +150,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + if (exec_mask) { src_lane = findLsbSet(exec_mask); } @@ -182,6 +187,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -226,6 +234,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -258,6 +269,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -290,6 +304,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -324,6 +341,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -372,6 +392,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -439,6 +462,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { float tmp = src[lane]; @@ -475,6 +501,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { AMDGPU::mxfloat16 tmp(src[lane]); @@ -509,6 +538,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); @@ -542,6 +574,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane]); @@ -595,6 +630,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -627,6 +665,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -659,6 +700,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); @@ -691,6 +735,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); @@ -723,6 +770,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); @@ -755,6 +805,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); @@ -789,6 +842,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -835,6 +891,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -867,6 +926,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -900,6 +962,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -932,6 +997,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -965,6 +1033,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -997,6 +1068,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF32 int_part(0.0); @@ -1030,6 +1104,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -1063,6 +1140,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -1095,6 +1175,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -1128,6 +1211,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -1160,6 +1246,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -1192,6 +1281,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -1224,6 +1316,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1258,6 +1353,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1290,6 +1388,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / std::sqrt(src[lane]); @@ -1322,6 +1423,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1366,6 +1470,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1409,6 +1516,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1441,6 +1551,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1477,6 +1590,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1517,6 +1633,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1553,6 +1672,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~src[lane]; @@ -1585,6 +1707,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = reverseBits(src[lane]); @@ -1617,6 +1742,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOneMsb(src[lane]); @@ -1649,6 +1777,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOne(src[lane]); @@ -1681,6 +1812,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = firstOppositeSignBit(src[lane]); @@ -1714,6 +1848,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1752,6 +1889,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1789,6 +1929,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF64 int_part(0.0); @@ -1827,6 +1970,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1870,6 +2016,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1926,8 +2075,8 @@ namespace VegaISA src.readSrc(); - panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); - panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { @@ -2359,6 +2508,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -2391,6 +2543,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -2423,6 +2578,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src[lane]; diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc index 55146711b6..f6eec253a3 100644 --- a/src/arch/amdgpu/vega/insts/vop2.cc +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -67,6 +67,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -102,6 +105,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -163,6 +168,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -198,6 +206,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -232,6 +243,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -266,6 +280,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isnan(src0[lane]) || @@ -344,6 +361,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) @@ -378,6 +398,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemI64 tmp_src0 @@ -445,6 +468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); @@ -481,6 +507,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmin(src0[lane], src1[lane]); @@ -515,6 +544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmax(src0[lane], src1[lane]); @@ -548,6 +580,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -581,6 +616,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -614,6 +652,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -647,6 +688,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -682,6 +726,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -717,6 +764,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -751,6 +801,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and vdst during selecting @@ -826,6 +878,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -886,6 +940,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -961,6 +1017,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] ^ src1[lane]; @@ -998,6 +1057,8 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -1064,6 +1125,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], k, src1[lane]); @@ -1103,6 +1167,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], k); @@ -1141,6 +1208,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -1225,6 +1294,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1265,6 +1337,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1308,6 +1383,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane] @@ -1355,6 +1433,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1401,6 +1482,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1598,6 +1682,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane]; @@ -1632,6 +1719,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1667,6 +1757,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1701,6 +1794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -1735,6 +1831,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); @@ -1770,6 +1869,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1805,6 +1907,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1882,6 +1987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1915,6 +2023,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1948,6 +2059,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -1981,6 +2095,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -2034,6 +2151,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -2108,6 +2227,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -2141,6 +2263,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -2175,6 +2300,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); @@ -2209,6 +2337,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~(src0[lane] ^ src1[lane]); From 21f6e166b7f8fe9dcf9c23e4c890773ed6798a26 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 17 Jul 2024 14:57:52 -0700 Subject: [PATCH 4/4] arch-vega: Panic on SDWAB / DPP VOPC unimplemented If SDWAB or DPP are used on a VOPC instruction and those are not implemented, it is highly likely to be a problem for the application. Rather than continue to execute and cause undefined behavior, exit the simulation with a panic showing the line of the instruction causing the issue. Change-Id: Ib3f94df7445d068b26907470c1f733be16cd2fc2 --- src/arch/amdgpu/vega/insts/vopc.cc | 390 +++++++++++++++++++++++++++++ 1 file changed, 390 insertions(+) diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc index 0e1fb04f75..9361e68b67 100644 --- a/src/arch/amdgpu/vega/insts/vopc.cc +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -74,6 +74,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -189,6 +192,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -304,6 +310,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -420,6 +429,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -1277,6 +1289,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -1311,6 +1326,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -1345,6 +1363,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -1379,6 +1400,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -1413,6 +1437,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -1448,6 +1475,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -1482,6 +1512,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -1517,6 +1550,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -1552,6 +1588,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -1586,6 +1625,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -1621,6 +1663,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -1655,6 +1700,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -1689,6 +1737,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -1723,6 +1774,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -1818,6 +1872,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -1854,6 +1911,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -1890,6 +1950,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -1926,6 +1989,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -1962,6 +2028,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -1999,6 +2068,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -2036,6 +2108,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -2074,6 +2149,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -2111,6 +2189,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -2147,6 +2228,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -2184,6 +2268,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -2220,6 +2307,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -2256,6 +2346,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); @@ -2292,6 +2385,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -2387,6 +2483,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -2421,6 +2520,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -2455,6 +2557,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -2489,6 +2594,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -2523,6 +2631,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -2558,6 +2669,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -2592,6 +2706,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -2627,6 +2744,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -2662,6 +2782,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -2696,6 +2819,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -2731,6 +2857,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -2765,6 +2894,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -2799,6 +2931,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -2833,6 +2968,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -2928,6 +3066,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -2964,6 +3105,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -3000,6 +3144,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3036,6 +3183,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3072,6 +3222,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -3109,6 +3262,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -3146,6 +3302,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -3184,6 +3343,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -3221,6 +3383,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -3257,6 +3422,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -3294,6 +3462,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -3330,6 +3501,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -3366,6 +3540,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -3402,6 +3579,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -3495,6 +3675,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -3528,6 +3711,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -3561,6 +3747,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3594,6 +3783,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3627,6 +3819,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -3660,6 +3855,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -3749,6 +3947,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -3823,6 +4024,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3856,6 +4060,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3932,6 +4139,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4024,6 +4234,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4059,6 +4272,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4094,6 +4310,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4129,6 +4348,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4164,6 +4386,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4199,6 +4424,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4294,6 +4522,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4329,6 +4560,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4364,6 +4598,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4399,6 +4636,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4434,6 +4674,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4469,6 +4712,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4561,6 +4807,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4594,6 +4843,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4627,6 +4879,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4660,6 +4915,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4693,6 +4951,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4726,6 +4987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4815,6 +5079,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4848,6 +5115,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4881,6 +5151,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4914,6 +5187,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4947,6 +5223,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4980,6 +5259,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5072,6 +5354,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5107,6 +5392,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5142,6 +5430,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5177,6 +5468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5212,6 +5506,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5247,6 +5544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5342,6 +5642,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5377,6 +5680,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5412,6 +5718,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5447,6 +5756,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5482,6 +5794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5517,6 +5832,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5609,6 +5927,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5642,6 +5963,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5675,6 +5999,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5708,6 +6035,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5741,6 +6071,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5774,6 +6107,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5863,6 +6199,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5896,6 +6235,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5929,6 +6271,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5962,6 +6307,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5995,6 +6343,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6028,6 +6379,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -6120,6 +6474,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -6155,6 +6512,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -6190,6 +6550,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -6225,6 +6588,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -6260,6 +6626,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6295,6 +6664,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -6390,6 +6762,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -6425,6 +6800,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -6460,6 +6838,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -6495,6 +6876,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -6530,6 +6914,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6565,6 +6952,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);