diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index f970923951..f3744b52f2 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -80,6 +80,8 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src_dpp.read(); @@ -148,6 +150,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + if (exec_mask) { src_lane = findLsbSet(exec_mask); } @@ -182,6 +187,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -226,6 +234,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -258,6 +269,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -290,6 +304,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -324,6 +341,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -372,6 +392,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -439,6 +462,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { float tmp = src[lane]; @@ -475,6 +501,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { AMDGPU::mxfloat16 tmp(src[lane]); @@ -509,6 +538,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); @@ -542,6 +574,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemI32)std::floor(src[lane]); @@ -595,6 +630,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)src[lane]; @@ -627,6 +665,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -659,6 +700,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); @@ -691,6 +735,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); @@ -723,6 +770,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); @@ -755,6 +805,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); @@ -789,6 +842,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { int exp; @@ -835,6 +891,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = (VecElemF64)src[lane]; @@ -867,6 +926,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -900,6 +962,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -932,6 +997,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -965,6 +1033,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -997,6 +1068,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF32 int_part(0.0); @@ -1030,6 +1104,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::trunc(src[lane]); @@ -1063,6 +1140,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::ceil(src[lane]); @@ -1095,6 +1175,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = roundNearestEven(src[lane]); @@ -1128,6 +1211,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::floor(src[lane]); @@ -1160,6 +1246,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -1192,6 +1281,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -1224,6 +1316,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1258,6 +1353,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / src[lane]; @@ -1290,6 +1388,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = 1.0 / std::sqrt(src[lane]); @@ -1322,6 +1423,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1366,6 +1470,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::fpclassify(src[lane]) == FP_ZERO) { @@ -1409,6 +1516,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1441,6 +1551,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::sqrt(src[lane]); @@ -1477,6 +1590,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1517,6 +1633,9 @@ namespace VegaISA src.readSrc(); pi.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (src[lane] < -256.0 || src[lane] > 256.0) { @@ -1553,6 +1672,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~src[lane]; @@ -1585,6 +1707,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = reverseBits(src[lane]); @@ -1617,6 +1742,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOneMsb(src[lane]); @@ -1649,6 +1777,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = findFirstOne(src[lane]); @@ -1681,6 +1812,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = firstOppositeSignBit(src[lane]); @@ -1714,6 +1848,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1752,6 +1889,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1789,6 +1929,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemF64 int_part(0.0); @@ -1827,6 +1970,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1870,6 +2016,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isinf(src[lane]) || std::isnan(src[lane])) { @@ -1926,8 +2075,8 @@ namespace VegaISA src.readSrc(); - panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); - panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { @@ -2359,6 +2508,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::pow(2.0, src[lane]); @@ -2391,6 +2543,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::log2(src[lane]); @@ -2423,6 +2578,9 @@ namespace VegaISA src.readSrc(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src[lane]; diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc index 55146711b6..f6eec253a3 100644 --- a/src/arch/amdgpu/vega/insts/vop2.cc +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -67,6 +67,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -102,6 +105,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -163,6 +168,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -198,6 +206,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -232,6 +243,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -266,6 +280,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { if (std::isnan(src0[lane]) || @@ -344,6 +361,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) @@ -378,6 +398,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemI64 tmp_src0 @@ -445,6 +468,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); @@ -481,6 +507,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmin(src0[lane], src1[lane]); @@ -515,6 +544,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fmax(src0[lane], src1[lane]); @@ -548,6 +580,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -581,6 +616,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -614,6 +652,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -647,6 +688,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -682,6 +726,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -717,6 +764,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); @@ -751,6 +801,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and vdst during selecting @@ -826,6 +878,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -886,6 +940,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -961,6 +1017,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] ^ src1[lane]; @@ -998,6 +1057,8 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + if (isDPPInst()) { VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); src0_dpp.read(); @@ -1064,6 +1125,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], k, src1[lane]); @@ -1103,6 +1167,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], k); @@ -1141,6 +1208,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -1225,6 +1294,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1265,6 +1337,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1308,6 +1383,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane] @@ -1355,6 +1433,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1401,6 +1482,9 @@ namespace VegaISA src1.read(); vcc.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] @@ -1598,6 +1682,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] + src1[lane]; @@ -1632,6 +1719,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -1667,6 +1757,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -1701,6 +1794,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] * src1[lane]; @@ -1735,6 +1831,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); @@ -1770,6 +1869,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1805,6 +1907,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] >> src0[lane]; @@ -1882,6 +1987,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1915,6 +2023,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::max(src0[lane], src1[lane]); @@ -1948,6 +2059,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -1981,6 +2095,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::min(src0[lane], src1[lane]); @@ -2034,6 +2151,8 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + if (isSDWAInst()) { VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); // use copies of original src0, src1, and dest during selecting @@ -2108,6 +2227,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src0[lane] - src1[lane]; @@ -2141,6 +2263,9 @@ namespace VegaISA src0.readSrc(); src1.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = src1[lane] - src0[lane]; @@ -2175,6 +2300,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); @@ -2209,6 +2337,9 @@ namespace VegaISA src1.read(); vdst.read(); + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { if (wf->execMask(lane)) { vdst[lane] = ~(src0[lane] ^ src1[lane]);