diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index 504946534f..c8ad4ef3d3 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -35,6 +35,7 @@ #include "arch/amdgpu/vega/gpu_decoder.hh" #include "arch/amdgpu/vega/gpu_mem_helpers.hh" #include "arch/amdgpu/vega/insts/gpu_static_inst.hh" +#include "arch/amdgpu/vega/insts/inst_util.hh" #include "arch/amdgpu/vega/operand.hh" #include "debug/GPUExec.hh" #include "debug/VEGA.hh" @@ -421,6 +422,159 @@ namespace VegaISA InstFormat extData; uint32_t varSize; + template + uint32_t + sdwabSelect(uint32_t dword, const SDWASelVals sel, + bool sign_ext, bool neg, bool abs) + { + // Use the gem5 bits() helper to select a sub region from the + // dword based on the select. Return a 32-bit unsigned which will + // be cast to the appropriate compare type in the lambda passed to + // sdwabHelper. + int low_bit = 0, high_bit = 0; + uint32_t rv = dword; + + if (sel < SDWA_WORD_0) { + // Selecting a sub-dword value smaller than a word (i.e., a + // byte). These values are 0-3 so multiplying by BITS_PER_BYTE + // gives the lower and upper bit easily. + low_bit = sel * VegaISA::BITS_PER_BYTE; + high_bit = low_bit + VegaISA::BITS_PER_BYTE - 1; + } else if (sel < SDWA_DWORD) { + // Selecting a sub-dword value of word size. Enum value is 4 + // or 5, so selecting the LSb and multiplying gives the lower + // and upper bit. + low_bit = (sel & 1) * VegaISA::BITS_PER_WORD; + high_bit = low_bit + VegaISA::MSB_PER_WORD - 1; + } else { + // We are selecting the whole dword. Assert that is true and + // set the bit locations for lower and upper based on dword + // size. + assert(sel == SDWA_DWORD); + low_bit = 0; + high_bit = sizeof(uint32_t) * VegaISA::BITS_PER_BYTE - 1; + } + + rv = bits(dword, high_bit, low_bit); + + uint32_t sign_bit = 1 << high_bit; + + // Panic on combinations which do not make sense. + if (std::is_integral_v && std::is_unsigned_v) { + panic_if(neg, "SWDAB negation operation on unsigned type!\n"); + panic_if(sign_ext, "SWDAB sign extend on unsigned type!\n"); + } + + // Apply ABS, then NEG, then SEXT. + if (abs) { + if (std::is_integral_v) { + // If sign is set, sign extend first then call std::abs. + if ((rv & sign_bit) && std::is_signed_v) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + rv = std::abs(static_cast(rv)) & 0xFFFFFFFF; + } + } else { + // Clear sign bit for FP types. + rv = rv & mask(high_bit); + } + } + + if (neg) { + if (std::is_integral_v) { + // If sign is set, sign extend first then call unary-. + if (rv & sign_bit) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + rv = -rv; + } + } else { + // Flip sign bit for FP types. + rv = rv ^ mask(high_bit); + } + } + + if (sign_ext) { + if (std::is_integral_v) { + if (rv & sign_bit) { + rv = sext(rv, high_bit + 1) & 0xFFFFFFFF; + } + } else { + // It is not entirely clear what to do here. Literal + // extensions for FP operands append zeros to mantissa + // but specification does not state anything for SDWAB. + panic("SDWAB sign extend set for non-integral type!\n"); + } + } + + return rv; + } + + template + void + sdwabHelper(GPUDynInstPtr gpuDynInst, int (*cmpFunc)(T, T)) + { + DPRINTF(VEGA, "Handling %s SRC SDWA. SRC0: register %s[%d], " + "sDst s[%d], sDst type %s, SRC0_SEL: %d, SRC0_SEXT: %d " + "SRC0_NEG: %d, SRC0_ABS: %d, SRC1: register %s[%d], " + "SRC1_SEL: %d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: " + "%d\n", _opcode.c_str(), + (extData.iFmt_VOP_SDWAB.S0 ? "s" : "v"), + extData.iFmt_VOP_SDWAB.SRC0, + extData.iFmt_VOP_SDWAB.SDST, + (extData.iFmt_VOP_SDWAB.SD ? "SGPR" : "VCC"), + extData.iFmt_VOP_SDWAB.SRC0_SEL, + extData.iFmt_VOP_SDWAB.SRC0_SEXT, + extData.iFmt_VOP_SDWAB.SRC0_NEG, + extData.iFmt_VOP_SDWAB.SRC0_ABS, + (extData.iFmt_VOP_SDWAB.S1 ? "s" : "v"), + instData.VSRC1, + extData.iFmt_VOP_SDWAB.SRC1_SEL, + extData.iFmt_VOP_SDWAB.SRC1_SEXT, + extData.iFmt_VOP_SDWAB.SRC1_NEG, + extData.iFmt_VOP_SDWAB.SRC1_ABS); + + // Start with SRC0 and insert 9th bit for VGPR source (S0 == 0). + int src0_idx = extData.iFmt_VOP_SDWAB.SRC0; + src0_idx += (extData.iFmt_VOP_SDWAB.S0 == 0) ? 0x100 : 0; + + // Start with VSRC1[7:0], insert 9th bit for VGPR source (S1 == 0). + int src1_idx = instData.VSRC1; + src1_idx += (extData.iFmt_VOP_SDWAB.S1 == 0) ? 0x100 : 0; + + // SD == 0 if VCC is dest, else use SDST index. + int sdst_idx = (extData.iFmt_VOP_SDWAB.SD == 1) ? + int(extData.iFmt_VOP_SDWAB.SDST) : REG_VCC_LO; + + ConstVecOperandU32 src0(gpuDynInst, src0_idx); + ConstVecOperandU32 src1(gpuDynInst, src1_idx); + ScalarOperandU64 sdst(gpuDynInst, sdst_idx); + + // Use readSrc in case of scalar const register. + src0.readSrc(); + src1.readSrc(); + + // Select bits first, then cast to type, then apply modifiers. + const SDWASelVals src0_sel = + (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC0_SEL; + const SDWASelVals src1_sel = + (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC1_SEL; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->wavefront()->execMask(lane)) { + T a = sdwabSelect(src0[lane], src0_sel, + extData.iFmt_VOP_SDWAB.SRC0_SEXT, + extData.iFmt_VOP_SDWAB.SRC0_NEG, + extData.iFmt_VOP_SDWAB.SRC0_ABS); + T b = sdwabSelect(src1[lane], src1_sel, + extData.iFmt_VOP_SDWAB.SRC1_SEXT, + extData.iFmt_VOP_SDWAB.SRC1_NEG, + extData.iFmt_VOP_SDWAB.SRC1_ABS); + sdst.setBit(lane, cmpFunc(a, b)); + } + } + + sdst.write(); + } + private: bool hasSecondDword(InFmt_VOPC *); }; // Inst_VOPC diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index f970923951..f3744b52f2 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -80,6 +80,8 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src_dpp.read(); @@ -148,6 +150,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + if (exec_mask) { src_lane = findLsbSet(exec_mask); } @@ -182,6 +187,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -226,6 +234,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -258,6 +269,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -290,6 +304,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -324,6 +341,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -372,6 +392,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -439,6 +462,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { float tmp = src[lane]; @@ -475,6 +501,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { AMDGPU::mxfloat16 tmp(src[lane]); @@ -509,6 +538,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); @@ -542,6 +574,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane]); @@ -595,6 +630,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -627,6 +665,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -659,6 +700,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); @@ -691,6 +735,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); @@ -723,6 +770,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); @@ -755,6 +805,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); @@ -789,6 +842,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -835,6 +891,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -867,6 +926,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -900,6 +962,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -932,6 +997,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -965,6 +1033,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -997,6 +1068,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF32 int_part(0.0); @@ -1030,6 +1104,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -1063,6 +1140,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -1095,6 +1175,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -1128,6 +1211,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -1160,6 +1246,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -1192,6 +1281,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -1224,6 +1316,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1258,6 +1353,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1290,6 +1388,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / std::sqrt(src[lane]); @@ -1322,6 +1423,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1366,6 +1470,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1409,6 +1516,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1441,6 +1551,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1477,6 +1590,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1517,6 +1633,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1553,6 +1672,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~src[lane]; @@ -1585,6 +1707,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = reverseBits(src[lane]); @@ -1617,6 +1742,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOneMsb(src[lane]); @@ -1649,6 +1777,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOne(src[lane]); @@ -1681,6 +1812,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = firstOppositeSignBit(src[lane]); @@ -1714,6 +1848,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1752,6 +1889,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1789,6 +1929,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF64 int_part(0.0); @@ -1827,6 +1970,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1870,6 +2016,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1926,8 +2075,8 @@ namespace VegaISA src.readSrc(); - panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); - panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { @@ -2359,6 +2508,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -2391,6 +2543,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -2423,6 +2578,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src[lane]; diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc index 55146711b6..f6eec253a3 100644 --- a/src/arch/amdgpu/vega/insts/vop2.cc +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -67,6 +67,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -102,6 +105,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -163,6 +168,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -198,6 +206,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -232,6 +243,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -266,6 +280,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isnan(src0[lane]) || @@ -344,6 +361,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) @@ -378,6 +398,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemI64 tmp_src0 @@ -445,6 +468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); @@ -481,6 +507,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmin(src0[lane], src1[lane]); @@ -515,6 +544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmax(src0[lane], src1[lane]); @@ -548,6 +580,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -581,6 +616,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -614,6 +652,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -647,6 +688,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -682,6 +726,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -717,6 +764,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -751,6 +801,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and vdst during selecting @@ -826,6 +878,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -886,6 +940,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -961,6 +1017,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] ^ src1[lane]; @@ -998,6 +1057,8 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -1064,6 +1125,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], k, src1[lane]); @@ -1103,6 +1167,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], k); @@ -1141,6 +1208,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -1225,6 +1294,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1265,6 +1337,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1308,6 +1383,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane] @@ -1355,6 +1433,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1401,6 +1482,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1598,6 +1682,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane]; @@ -1632,6 +1719,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1667,6 +1757,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1701,6 +1794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -1735,6 +1831,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); @@ -1770,6 +1869,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1805,6 +1907,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1882,6 +1987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1915,6 +2023,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1948,6 +2059,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -1981,6 +2095,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -2034,6 +2151,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -2108,6 +2227,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -2141,6 +2263,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -2175,6 +2300,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); @@ -2209,6 +2337,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~(src0[lane] ^ src1[lane]); diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc index 2c386fec74..9361e68b67 100644 --- a/src/arch/amdgpu/vega/insts/vopc.cc +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -74,6 +74,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -189,6 +192,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -304,6 +310,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -420,6 +429,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (bits(src1[lane], 0) || bits(src1[lane], 1)) { @@ -1277,6 +1289,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -1311,6 +1326,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -1345,6 +1363,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -1379,6 +1400,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -1413,6 +1437,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -1448,6 +1475,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -1482,6 +1512,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -1517,6 +1550,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -1552,6 +1588,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -1586,6 +1625,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -1621,6 +1663,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -1655,6 +1700,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -1689,6 +1737,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -1723,6 +1774,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -1818,6 +1872,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -1854,6 +1911,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -1890,6 +1950,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -1926,6 +1989,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -1962,6 +2028,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -1999,6 +2068,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -2036,6 +2108,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -2074,6 +2149,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -2111,6 +2189,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -2147,6 +2228,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -2184,6 +2268,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -2220,6 +2307,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -2256,6 +2346,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); @@ -2292,6 +2385,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -2387,6 +2483,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -2421,6 +2520,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -2455,6 +2557,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -2489,6 +2594,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -2523,6 +2631,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -2558,6 +2669,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -2592,6 +2706,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -2627,6 +2744,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -2662,6 +2782,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -2696,6 +2819,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -2731,6 +2857,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -2765,6 +2894,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -2799,6 +2931,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -2833,6 +2968,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -2928,6 +3066,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -2964,6 +3105,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -3000,6 +3144,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3036,6 +3183,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3072,6 +3222,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (src0[lane] < src1[lane] @@ -3109,6 +3262,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -3146,6 +3302,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (!std::isnan(src0[lane]) @@ -3184,6 +3343,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, (std::isnan(src0[lane]) @@ -3221,6 +3383,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); @@ -3257,6 +3422,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane] @@ -3294,6 +3462,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); @@ -3330,6 +3501,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); @@ -3366,6 +3540,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -3402,6 +3579,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); @@ -3495,6 +3675,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -3528,6 +3711,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -3561,6 +3747,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3594,6 +3783,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3627,6 +3819,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -3660,6 +3855,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -3749,6 +3947,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -3782,13 +3983,21 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } + auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; }; - vcc.write(); + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } + } + + vcc.write(); + } } // execute // --- Inst_VOPC__V_CMP_LE_U16 class methods --- @@ -3815,6 +4024,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -3848,6 +4060,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -3881,10 +4096,20 @@ namespace VegaISA src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; }; + + if (isSDWAInst()) { + sdwabHelper(gpuDynInst, cmpImpl); + } else if (isDPPInst()) { + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, cmpImpl(src0[lane], src1[lane])); + } } + + vcc.write(); } vcc.write(); @@ -3914,6 +4139,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4006,6 +4234,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4041,6 +4272,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4076,6 +4310,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4111,6 +4348,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4146,6 +4386,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4181,6 +4424,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4276,6 +4522,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4311,6 +4560,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4346,6 +4598,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4381,6 +4636,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4416,6 +4674,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4451,6 +4712,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4543,6 +4807,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4576,6 +4843,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4609,6 +4879,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4642,6 +4915,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4675,6 +4951,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4708,6 +4987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -4797,6 +5079,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -4830,6 +5115,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -4863,6 +5151,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -4896,6 +5187,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -4929,6 +5223,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -4962,6 +5259,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5054,6 +5354,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5089,6 +5392,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5124,6 +5430,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5159,6 +5468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5194,6 +5506,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5229,6 +5544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5324,6 +5642,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5359,6 +5680,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5394,6 +5718,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5429,6 +5756,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5464,6 +5794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5499,6 +5832,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5591,6 +5927,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5624,6 +5963,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5657,6 +5999,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5690,6 +6035,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5723,6 +6071,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -5756,6 +6107,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -5845,6 +6199,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -5878,6 +6235,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -5911,6 +6271,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -5944,6 +6307,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -5977,6 +6343,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6010,6 +6379,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -6102,6 +6474,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -6137,6 +6512,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -6172,6 +6550,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -6207,6 +6588,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -6242,6 +6626,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6277,6 +6664,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); @@ -6372,6 +6762,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); @@ -6407,6 +6800,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); @@ -6442,6 +6838,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); @@ -6477,6 +6876,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); @@ -6512,6 +6914,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); @@ -6547,6 +6952,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);