diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 504946534f..c8ad4ef3d3 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -35,6 +35,7 @@
 #include "arch/amdgpu/vega/gpu_decoder.hh"
 #include "arch/amdgpu/vega/gpu_mem_helpers.hh"
 #include "arch/amdgpu/vega/insts/gpu_static_inst.hh"
+#include "arch/amdgpu/vega/insts/inst_util.hh"
 #include "arch/amdgpu/vega/operand.hh"
 #include "debug/GPUExec.hh"
 #include "debug/VEGA.hh"
@@ -421,6 +422,159 @@ namespace VegaISA
         InstFormat extData;
         uint32_t varSize;
 
+        template<typename T>
+        uint32_t
+        sdwabSelect(uint32_t dword, const SDWASelVals sel,
+                    bool sign_ext, bool neg, bool abs)
+        {
+            // Use the gem5 bits() helper to select a sub region from the
+            // dword based on the select. Return a 32-bit unsigned which will
+            // be cast to the appropriate compare type in the lambda passed to
+            // sdwabHelper.
+            int low_bit = 0, high_bit = 0;
+            uint32_t rv = dword;
+
+            if (sel < SDWA_WORD_0) {
+                // Selecting a sub-dword value smaller than a word (i.e., a
+                // byte). These values are 0-3 so multiplying by BITS_PER_BYTE
+                // gives the lower and upper bit easily.
+                low_bit = sel * VegaISA::BITS_PER_BYTE;
+                high_bit = low_bit + VegaISA::BITS_PER_BYTE - 1;
+            } else if (sel < SDWA_DWORD) {
+                // Selecting a sub-dword value of word size. Enum value is 4
+                // or 5, so selecting the LSb and multiplying gives the lower
+                // and upper bit.
+                low_bit = (sel & 1) * VegaISA::BITS_PER_WORD;
+                high_bit = low_bit + VegaISA::MSB_PER_WORD - 1;
+            } else {
+                // We are selecting the whole dword. Assert that is true and
+                // set the bit locations for lower and upper based on dword
+                // size.
+                assert(sel == SDWA_DWORD);
+                low_bit = 0;
+                high_bit = sizeof(uint32_t) * VegaISA::BITS_PER_BYTE - 1;
+            }
+
+            rv = bits(dword, high_bit, low_bit);
+
+            uint32_t sign_bit = 1 << high_bit;
+
+            // Panic on combinations which do not make sense.
+            if (std::is_integral_v<T> && std::is_unsigned_v<T>) {
+                panic_if(neg, "SWDAB negation operation on unsigned type!\n");
+                panic_if(sign_ext, "SWDAB sign extend on unsigned type!\n");
+            }
+
+            // Apply ABS, then NEG, then SEXT.
+            if (abs) {
+                if (std::is_integral_v<T>) {
+                    // If sign is set, sign extend first then call std::abs.
+                    if ((rv & sign_bit) && std::is_signed_v<T>) {
+                        rv = sext(rv, high_bit + 1) & 0xFFFFFFFF;
+                        rv = std::abs(static_cast<long long>(rv)) & 0xFFFFFFFF;
+                    }
+                } else {
+                    // Clear sign bit for FP types.
+                    rv = rv & mask(high_bit);
+                }
+            }
+
+            if (neg) {
+                if (std::is_integral_v<T>) {
+                    // If sign is set, sign extend first then call unary-.
+                    if (rv & sign_bit) {
+                        rv = sext(rv, high_bit + 1) & 0xFFFFFFFF;
+                        rv = -rv;
+                    }
+                } else {
+                    // Flip sign bit for FP types.
+                    rv = rv ^ mask(high_bit);
+                }
+            }
+
+            if (sign_ext) {
+                if (std::is_integral_v<T>) {
+                    if (rv & sign_bit) {
+                        rv = sext(rv, high_bit + 1) & 0xFFFFFFFF;
+                    }
+                } else {
+                    // It is not entirely clear what to do here. Literal
+                    // extensions for FP operands append zeros to mantissa
+                    // but specification does not state anything for SDWAB.
+                    panic("SDWAB sign extend set for non-integral type!\n");
+                }
+            }
+
+            return rv;
+        }
+
+        template<typename T>
+        void
+        sdwabHelper(GPUDynInstPtr gpuDynInst, int (*cmpFunc)(T, T))
+        {
+            DPRINTF(VEGA, "Handling %s SRC SDWA. SRC0: register %s[%d], "
+                    "sDst s[%d], sDst type %s, SRC0_SEL: %d, SRC0_SEXT: %d "
+                    "SRC0_NEG: %d, SRC0_ABS: %d, SRC1: register %s[%d], "
+                    "SRC1_SEL: %d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: "
+                    "%d\n", _opcode.c_str(),
+                    (extData.iFmt_VOP_SDWAB.S0 ? "s" : "v"),
+                    extData.iFmt_VOP_SDWAB.SRC0,
+                    extData.iFmt_VOP_SDWAB.SDST,
+                    (extData.iFmt_VOP_SDWAB.SD ? "SGPR" : "VCC"),
+                    extData.iFmt_VOP_SDWAB.SRC0_SEL,
+                    extData.iFmt_VOP_SDWAB.SRC0_SEXT,
+                    extData.iFmt_VOP_SDWAB.SRC0_NEG,
+                    extData.iFmt_VOP_SDWAB.SRC0_ABS,
+                    (extData.iFmt_VOP_SDWAB.S1 ? "s" : "v"),
+                    instData.VSRC1,
+                    extData.iFmt_VOP_SDWAB.SRC1_SEL,
+                    extData.iFmt_VOP_SDWAB.SRC1_SEXT,
+                    extData.iFmt_VOP_SDWAB.SRC1_NEG,
+                    extData.iFmt_VOP_SDWAB.SRC1_ABS);
+
+            // Start with SRC0 and insert 9th bit for VGPR source (S0 == 0).
+            int src0_idx = extData.iFmt_VOP_SDWAB.SRC0;
+            src0_idx += (extData.iFmt_VOP_SDWAB.S0 == 0) ? 0x100 : 0;
+
+            // Start with VSRC1[7:0], insert 9th bit for VGPR source (S1 == 0).
+            int src1_idx = instData.VSRC1;
+            src1_idx += (extData.iFmt_VOP_SDWAB.S1 == 0) ? 0x100 : 0;
+
+            // SD == 0 if VCC is dest, else use SDST index.
+            int sdst_idx = (extData.iFmt_VOP_SDWAB.SD == 1) ?
+                int(extData.iFmt_VOP_SDWAB.SDST) : REG_VCC_LO;
+
+            ConstVecOperandU32 src0(gpuDynInst, src0_idx);
+            ConstVecOperandU32 src1(gpuDynInst, src1_idx);
+            ScalarOperandU64 sdst(gpuDynInst, sdst_idx);
+
+            // Use readSrc in case of scalar const register.
+            src0.readSrc();
+            src1.readSrc();
+
+            // Select bits first, then cast to type, then apply modifiers.
+            const SDWASelVals src0_sel =
+                (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC0_SEL;
+            const SDWASelVals src1_sel =
+                (SDWASelVals)extData.iFmt_VOP_SDWAB.SRC1_SEL;
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->wavefront()->execMask(lane)) {
+                    T a = sdwabSelect<T>(src0[lane], src0_sel,
+                                         extData.iFmt_VOP_SDWAB.SRC0_SEXT,
+                                         extData.iFmt_VOP_SDWAB.SRC0_NEG,
+                                         extData.iFmt_VOP_SDWAB.SRC0_ABS);
+                    T b = sdwabSelect<T>(src1[lane], src1_sel,
+                                         extData.iFmt_VOP_SDWAB.SRC1_SEXT,
+                                         extData.iFmt_VOP_SDWAB.SRC1_NEG,
+                                         extData.iFmt_VOP_SDWAB.SRC1_ABS);
+                    sdst.setBit(lane, cmpFunc(a, b));
+                }
+            }
+
+            sdst.write();
+        }
+
       private:
         bool hasSecondDword(InFmt_VOPC *);
     }; // Inst_VOPC
diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc
index f970923951..f3744b52f2 100644
--- a/src/arch/amdgpu/vega/insts/vop1.cc
+++ b/src/arch/amdgpu/vega/insts/vop1.cc
@@ -80,6 +80,8 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src_dpp.read();
@@ -148,6 +150,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         if (exec_mask) {
             src_lane = findLsbSet(exec_mask);
         }
@@ -182,6 +187,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -226,6 +234,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -258,6 +269,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -290,6 +304,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -324,6 +341,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -372,6 +392,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -439,6 +462,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 float tmp = src[lane];
@@ -475,6 +501,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 AMDGPU::mxfloat16 tmp(src[lane]);
@@ -509,6 +538,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
@@ -542,6 +574,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
@@ -595,6 +630,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -627,6 +665,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -659,6 +700,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
@@ -691,6 +735,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
@@ -723,6 +770,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
@@ -755,6 +805,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
@@ -789,6 +842,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -835,6 +891,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -867,6 +926,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::trunc(src[lane]);
@@ -900,6 +962,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::ceil(src[lane]);
@@ -932,6 +997,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = roundNearestEven(src[lane]);
@@ -965,6 +1033,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::floor(src[lane]);
@@ -997,6 +1068,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemF32 int_part(0.0);
@@ -1030,6 +1104,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::trunc(src[lane]);
@@ -1063,6 +1140,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::ceil(src[lane]);
@@ -1095,6 +1175,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = roundNearestEven(src[lane]);
@@ -1128,6 +1211,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::floor(src[lane]);
@@ -1160,6 +1246,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::pow(2.0, src[lane]);
@@ -1192,6 +1281,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::log2(src[lane]);
@@ -1224,6 +1316,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / src[lane];
@@ -1258,6 +1353,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / src[lane];
@@ -1290,6 +1388,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
@@ -1322,6 +1423,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1366,6 +1470,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1409,6 +1516,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::sqrt(src[lane]);
@@ -1441,6 +1551,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::sqrt(src[lane]);
@@ -1477,6 +1590,9 @@ namespace VegaISA
         src.readSrc();
         pi.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1517,6 +1633,9 @@ namespace VegaISA
         src.readSrc();
         pi.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1553,6 +1672,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = ~src[lane];
@@ -1585,6 +1707,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = reverseBits(src[lane]);
@@ -1617,6 +1742,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = findFirstOneMsb(src[lane]);
@@ -1649,6 +1777,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = findFirstOne(src[lane]);
@@ -1681,6 +1812,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = firstOppositeSignBit(src[lane]);
@@ -1714,6 +1848,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1752,6 +1889,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1789,6 +1929,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemF64 int_part(0.0);
@@ -1827,6 +1970,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1870,6 +2016,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1926,8 +2075,8 @@ namespace VegaISA
 
         src.readSrc();
 
-        panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
-        panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
@@ -2359,6 +2508,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::pow(2.0, src[lane]);
@@ -2391,6 +2543,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::log2(src[lane]);
@@ -2423,6 +2578,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src[lane];
diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc
index 55146711b6..f6eec253a3 100644
--- a/src/arch/amdgpu/vega/insts/vop2.cc
+++ b/src/arch/amdgpu/vega/insts/vop2.cc
@@ -67,6 +67,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -102,6 +105,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -163,6 +168,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -198,6 +206,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -232,6 +243,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] * src1[lane];
@@ -266,6 +280,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isnan(src0[lane]) ||
@@ -344,6 +361,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
@@ -378,6 +398,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemI64 tmp_src0
@@ -445,6 +468,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
@@ -481,6 +507,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
@@ -515,6 +544,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
@@ -548,6 +580,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -581,6 +616,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -614,6 +652,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -647,6 +688,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -682,6 +726,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -717,6 +764,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -751,6 +801,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and vdst during selecting
@@ -826,6 +878,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -886,6 +940,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -961,6 +1017,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] ^ src1[lane];
@@ -998,6 +1057,8 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -1064,6 +1125,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
@@ -1103,6 +1167,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], src1[lane], k);
@@ -1141,6 +1208,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -1225,6 +1294,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -1265,6 +1337,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -1308,6 +1383,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] + src1[lane]
@@ -1355,6 +1433,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -1401,6 +1482,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -1598,6 +1682,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] + src1[lane];
@@ -1632,6 +1719,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -1667,6 +1757,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -1701,6 +1794,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] * src1[lane];
@@ -1735,6 +1831,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
@@ -1770,6 +1869,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> src0[lane];
@@ -1805,6 +1907,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> src0[lane];
@@ -1882,6 +1987,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1915,6 +2023,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1948,6 +2059,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -1981,6 +2095,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -2034,6 +2151,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -2108,6 +2227,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -2141,6 +2263,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -2175,6 +2300,9 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
@@ -2209,6 +2337,9 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = ~(src0[lane] ^ src1[lane]);
diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc
index 2c386fec74..9361e68b67 100644
--- a/src/arch/amdgpu/vega/insts/vopc.cc
+++ b/src/arch/amdgpu/vega/insts/vopc.cc
@@ -74,6 +74,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
@@ -189,6 +192,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
@@ -304,6 +310,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
@@ -420,6 +429,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
@@ -1277,6 +1289,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -1311,6 +1326,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -1345,6 +1363,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -1379,6 +1400,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -1413,6 +1437,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (src0[lane] < src1[lane]
@@ -1448,6 +1475,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -1482,6 +1512,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (!std::isnan(src0[lane])
@@ -1517,6 +1550,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (std::isnan(src0[lane])
@@ -1552,6 +1588,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
@@ -1586,6 +1625,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]
@@ -1621,6 +1663,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
@@ -1655,6 +1700,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
@@ -1689,6 +1737,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -1723,6 +1774,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
@@ -1818,6 +1872,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -1854,6 +1911,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -1890,6 +1950,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -1926,6 +1989,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -1962,6 +2028,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (src0[lane] < src1[lane]
@@ -1999,6 +2068,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -2036,6 +2108,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (!std::isnan(src0[lane])
@@ -2074,6 +2149,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (std::isnan(src0[lane])
@@ -2111,6 +2189,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
@@ -2147,6 +2228,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]
@@ -2184,6 +2268,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
@@ -2220,6 +2307,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
@@ -2256,6 +2346,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0);
@@ -2292,6 +2385,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
@@ -2387,6 +2483,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -2421,6 +2520,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -2455,6 +2557,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -2489,6 +2594,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -2523,6 +2631,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (src0[lane] < src1[lane]
@@ -2558,6 +2669,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -2592,6 +2706,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (!std::isnan(src0[lane])
@@ -2627,6 +2744,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (std::isnan(src0[lane])
@@ -2662,6 +2782,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
@@ -2696,6 +2819,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]
@@ -2731,6 +2857,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
@@ -2765,6 +2894,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
@@ -2799,6 +2931,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -2833,6 +2968,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
@@ -2928,6 +3066,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -2964,6 +3105,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -3000,6 +3144,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -3036,6 +3183,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -3072,6 +3222,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (src0[lane] < src1[lane]
@@ -3109,6 +3262,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -3146,6 +3302,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (!std::isnan(src0[lane])
@@ -3184,6 +3343,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, (std::isnan(src0[lane])
@@ -3221,6 +3383,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
@@ -3257,6 +3422,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]
@@ -3294,6 +3462,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
@@ -3330,6 +3501,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
@@ -3366,6 +3540,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -3402,6 +3579,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
@@ -3495,6 +3675,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -3528,6 +3711,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -3561,6 +3747,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -3594,6 +3783,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -3627,6 +3819,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -3660,6 +3855,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -3749,6 +3947,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -3782,13 +3983,21 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (wf->execMask(lane)) {
-                vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
-            }
-        }
+        auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; };
 
-        vcc.write();
+        if (isSDWAInst()) {
+            sdwabHelper<uint16_t>(gpuDynInst, cmpImpl);
+        } else if (isDPPInst()) {
+            panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+        } else {
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (wf->execMask(lane)) {
+                    vcc.setBit(lane, cmpImpl(src0[lane], src1[lane]));
+                }
+            }
+
+            vcc.write();
+        }
     } // execute
     // --- Inst_VOPC__V_CMP_LE_U16 class methods ---
 
@@ -3815,6 +4024,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -3848,6 +4060,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -3881,10 +4096,20 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (wf->execMask(lane)) {
-                vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
+        auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; };
+
+        if (isSDWAInst()) {
+            sdwabHelper<uint16_t>(gpuDynInst, cmpImpl);
+        } else if (isDPPInst()) {
+            panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+        } else {
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (wf->execMask(lane)) {
+                    vcc.setBit(lane, cmpImpl(src0[lane], src1[lane]));
+                }
             }
+
+            vcc.write();
         }
 
         vcc.write();
@@ -3914,6 +4139,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -4006,6 +4234,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -4041,6 +4272,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -4076,6 +4310,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -4111,6 +4348,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -4146,6 +4386,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -4181,6 +4424,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -4276,6 +4522,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -4311,6 +4560,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -4346,6 +4598,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -4381,6 +4636,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -4416,6 +4674,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -4451,6 +4712,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -4543,6 +4807,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -4576,6 +4843,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -4609,6 +4879,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -4642,6 +4915,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -4675,6 +4951,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -4708,6 +4987,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -4797,6 +5079,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -4830,6 +5115,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -4863,6 +5151,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -4896,6 +5187,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -4929,6 +5223,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -4962,6 +5259,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -5054,6 +5354,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -5089,6 +5392,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -5124,6 +5430,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -5159,6 +5468,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -5194,6 +5506,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -5229,6 +5544,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -5324,6 +5642,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -5359,6 +5680,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -5394,6 +5718,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -5429,6 +5756,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -5464,6 +5794,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -5499,6 +5832,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -5591,6 +5927,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -5624,6 +5963,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -5657,6 +5999,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -5690,6 +6035,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -5723,6 +6071,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -5756,6 +6107,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -5845,6 +6199,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -5878,6 +6235,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -5911,6 +6271,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -5944,6 +6307,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -5977,6 +6343,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -6010,6 +6379,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -6102,6 +6474,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -6137,6 +6512,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -6172,6 +6550,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -6207,6 +6588,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -6242,6 +6626,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -6277,6 +6664,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
@@ -6372,6 +6762,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
@@ -6407,6 +6800,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
@@ -6442,6 +6838,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
@@ -6477,6 +6876,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
@@ -6512,6 +6914,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
@@ -6547,6 +6952,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);