From b75fe56da5a92d703b5f3c841435ff87fe1f2b61 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 24 Jul 2024 17:32:37 -0700 Subject: [PATCH] arch-vega: Panic unimplemented SDWA/DPP for VOP1/VOP2 Add a panic if SDWA or DPP is used for an instruction which does not implement support for it. If an application uses SDWA or DPP it likely does not operate in the same way as the base instruction and therefore gem5 should panic rather than continue. It is likely data is incorrect which will make it more difficult to debug an application. Change-Id: I68ac448b0d62941761ef4efa0169f95796270f48 --- src/arch/amdgpu/vega/insts/vop1.cc | 162 ++++++++++++++++++++++++++++- src/arch/amdgpu/vega/insts/vop2.cc | 131 +++++++++++++++++++++++ 2 files changed, 291 insertions(+), 2 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index f970923951..f3744b52f2 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -80,6 +80,8 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src_dpp.read(); @@ -148,6 +150,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + if (exec_mask) { src_lane = findLsbSet(exec_mask); } @@ -182,6 +187,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -226,6 +234,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -258,6 +269,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -290,6 +304,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -324,6 +341,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -372,6 +392,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -439,6 +462,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { float tmp = src[lane]; @@ -475,6 +501,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { AMDGPU::mxfloat16 tmp(src[lane]); @@ -509,6 +538,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); @@ -542,6 +574,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane]); @@ -595,6 +630,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -627,6 +665,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -659,6 +700,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); @@ -691,6 +735,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); @@ -723,6 +770,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); @@ -755,6 +805,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); @@ -789,6 +842,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -835,6 +891,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -867,6 +926,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -900,6 +962,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -932,6 +997,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -965,6 +1033,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -997,6 +1068,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF32 int_part(0.0); @@ -1030,6 +1104,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -1063,6 +1140,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -1095,6 +1175,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -1128,6 +1211,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -1160,6 +1246,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -1192,6 +1281,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -1224,6 +1316,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1258,6 +1353,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1290,6 +1388,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / std::sqrt(src[lane]); @@ -1322,6 +1423,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1366,6 +1470,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1409,6 +1516,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1441,6 +1551,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1477,6 +1590,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1517,6 +1633,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1553,6 +1672,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~src[lane]; @@ -1585,6 +1707,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = reverseBits(src[lane]); @@ -1617,6 +1742,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOneMsb(src[lane]); @@ -1649,6 +1777,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOne(src[lane]); @@ -1681,6 +1812,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = firstOppositeSignBit(src[lane]); @@ -1714,6 +1848,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1752,6 +1889,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1789,6 +1929,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF64 int_part(0.0); @@ -1827,6 +1970,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1870,6 +2016,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1926,8 +2075,8 @@ namespace VegaISA src.readSrc(); - panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); - panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { @@ -2359,6 +2508,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -2391,6 +2543,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -2423,6 +2578,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src[lane]; diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc index 55146711b6..f6eec253a3 100644 --- a/src/arch/amdgpu/vega/insts/vop2.cc +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -67,6 +67,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -102,6 +105,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -163,6 +168,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -198,6 +206,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -232,6 +243,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -266,6 +280,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isnan(src0[lane]) || @@ -344,6 +361,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) @@ -378,6 +398,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemI64 tmp_src0 @@ -445,6 +468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); @@ -481,6 +507,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmin(src0[lane], src1[lane]); @@ -515,6 +544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmax(src0[lane], src1[lane]); @@ -548,6 +580,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -581,6 +616,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -614,6 +652,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -647,6 +688,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -682,6 +726,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -717,6 +764,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -751,6 +801,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and vdst during selecting @@ -826,6 +878,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -886,6 +940,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -961,6 +1017,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] ^ src1[lane]; @@ -998,6 +1057,8 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -1064,6 +1125,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], k, src1[lane]); @@ -1103,6 +1167,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], k); @@ -1141,6 +1208,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -1225,6 +1294,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1265,6 +1337,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1308,6 +1383,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane] @@ -1355,6 +1433,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1401,6 +1482,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1598,6 +1682,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane]; @@ -1632,6 +1719,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1667,6 +1757,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1701,6 +1794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -1735,6 +1831,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); @@ -1770,6 +1869,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1805,6 +1907,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1882,6 +1987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1915,6 +2023,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1948,6 +2059,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -1981,6 +2095,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -2034,6 +2151,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -2108,6 +2227,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -2141,6 +2263,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -2175,6 +2300,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); @@ -2209,6 +2337,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~(src0[lane] ^ src1[lane]);