From b75fe56da5a92d703b5f3c841435ff87fe1f2b61 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 24 Jul 2024 17:32:37 -0700
Subject: [PATCH] arch-vega: Panic unimplemented SDWA/DPP for VOP1/VOP2

Add a panic if SDWA or DPP is used for an instruction which does not
implement support for it. If an application uses SDWA or DPP it likely
does not operate in the same way as the base instruction and therefore
gem5 should panic rather than continue. It is likely data is incorrect
which will make it more difficult to debug an application.

Change-Id: I68ac448b0d62941761ef4efa0169f95796270f48
---
 src/arch/amdgpu/vega/insts/vop1.cc | 162 ++++++++++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/vop2.cc | 131 +++++++++++++++++++++++
 2 files changed, 291 insertions(+), 2 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc
index f970923951..f3744b52f2 100644
--- a/src/arch/amdgpu/vega/insts/vop1.cc
+++ b/src/arch/amdgpu/vega/insts/vop1.cc
@@ -80,6 +80,8 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src_dpp.read();
@@ -148,6 +150,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         if (exec_mask) {
             src_lane = findLsbSet(exec_mask);
         }
@@ -182,6 +187,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -226,6 +234,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -258,6 +269,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -290,6 +304,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -324,6 +341,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -372,6 +392,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -439,6 +462,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 float tmp = src[lane];
@@ -475,6 +501,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 AMDGPU::mxfloat16 tmp(src[lane]);
@@ -509,6 +538,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
@@ -542,6 +574,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
@@ -595,6 +630,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)src[lane];
@@ -627,6 +665,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -659,6 +700,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
@@ -691,6 +735,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
@@ -723,6 +770,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
@@ -755,6 +805,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
@@ -789,6 +842,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 int exp;
@@ -835,6 +891,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = (VecElemF64)src[lane];
@@ -867,6 +926,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::trunc(src[lane]);
@@ -900,6 +962,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::ceil(src[lane]);
@@ -932,6 +997,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = roundNearestEven(src[lane]);
@@ -965,6 +1033,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::floor(src[lane]);
@@ -997,6 +1068,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemF32 int_part(0.0);
@@ -1030,6 +1104,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::trunc(src[lane]);
@@ -1063,6 +1140,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::ceil(src[lane]);
@@ -1095,6 +1175,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = roundNearestEven(src[lane]);
@@ -1128,6 +1211,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::floor(src[lane]);
@@ -1160,6 +1246,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::pow(2.0, src[lane]);
@@ -1192,6 +1281,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::log2(src[lane]);
@@ -1224,6 +1316,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / src[lane];
@@ -1258,6 +1353,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / src[lane];
@@ -1290,6 +1388,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
@@ -1322,6 +1423,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1366,6 +1470,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1409,6 +1516,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::sqrt(src[lane]);
@@ -1441,6 +1551,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::sqrt(src[lane]);
@@ -1477,6 +1590,9 @@ namespace VegaISA
         src.readSrc();
         pi.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1517,6 +1633,9 @@ namespace VegaISA
         src.readSrc();
         pi.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1553,6 +1672,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = ~src[lane];
@@ -1585,6 +1707,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = reverseBits(src[lane]);
@@ -1617,6 +1742,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = findFirstOneMsb(src[lane]);
@@ -1649,6 +1777,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = findFirstOne(src[lane]);
@@ -1681,6 +1812,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = firstOppositeSignBit(src[lane]);
@@ -1714,6 +1848,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1752,6 +1889,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1789,6 +1929,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemF64 int_part(0.0);
@@ -1827,6 +1970,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1870,6 +2016,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1926,8 +2075,8 @@ namespace VegaISA
 
         src.readSrc();
 
-        panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
-        panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
@@ -2359,6 +2508,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::pow(2.0, src[lane]);
@@ -2391,6 +2543,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::log2(src[lane]);
@@ -2423,6 +2578,9 @@ namespace VegaISA
 
         src.readSrc();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src[lane];
diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc
index 55146711b6..f6eec253a3 100644
--- a/src/arch/amdgpu/vega/insts/vop2.cc
+++ b/src/arch/amdgpu/vega/insts/vop2.cc
@@ -67,6 +67,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -102,6 +105,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -163,6 +168,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -198,6 +206,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -232,6 +243,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] * src1[lane];
@@ -266,6 +280,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 if (std::isnan(src0[lane]) ||
@@ -344,6 +361,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
@@ -378,6 +398,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemI64 tmp_src0
@@ -445,6 +468,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
@@ -481,6 +507,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
@@ -515,6 +544,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
@@ -548,6 +580,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -581,6 +616,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -614,6 +652,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -647,6 +688,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -682,6 +726,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -717,6 +764,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -751,6 +801,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and vdst during selecting
@@ -826,6 +878,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -886,6 +940,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -961,6 +1017,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] ^ src1[lane];
@@ -998,6 +1057,8 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+
         if (isDPPInst()) {
             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
             src0_dpp.read();
@@ -1064,6 +1125,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
@@ -1103,6 +1167,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], src1[lane], k);
@@ -1141,6 +1208,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -1225,6 +1294,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -1265,6 +1337,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -1308,6 +1383,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] + src1[lane]
@@ -1355,6 +1433,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -1401,6 +1482,9 @@ namespace VegaISA
         src1.read();
         vcc.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane]
@@ -1598,6 +1682,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] + src1[lane];
@@ -1632,6 +1719,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -1667,6 +1757,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -1701,6 +1794,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] * src1[lane];
@@ -1735,6 +1831,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
@@ -1770,6 +1869,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> src0[lane];
@@ -1805,6 +1907,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] >> src0[lane];
@@ -1882,6 +1987,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1915,6 +2023,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1948,6 +2059,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -1981,6 +2095,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -2034,6 +2151,8 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         if (isSDWAInst()) {
             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
             // use copies of original src0, src1, and dest during selecting
@@ -2108,6 +2227,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src0[lane] - src1[lane];
@@ -2141,6 +2263,9 @@ namespace VegaISA
         src0.readSrc();
         src1.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = src1[lane] - src0[lane];
@@ -2175,6 +2300,9 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
@@ -2209,6 +2337,9 @@ namespace VegaISA
         src1.read();
         vdst.read();
 
+        panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
+        panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
+
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (wf->execMask(lane)) {
                 vdst[lane] = ~(src0[lane] ^ src1[lane]);