arch-vega: Panic unimplemented SDWA/DPP for VOP1/VOP2

Add a panic if SDWA or DPP is used for an instruction which does not
implement support for it. If an application uses SDWA or DPP it likely
does not operate in the same way as the base instruction and therefore
gem5 should panic rather than continue. It is likely data is incorrect
which will make it more difficult to debug an application.

Change-Id: I68ac448b0d62941761ef4efa0169f95796270f48
This commit is contained in:
Matthew Poremba
2024-07-24 17:32:37 -07:00
parent 6558821e2d
commit b75fe56da5
2 changed files with 291 additions and 2 deletions

View File

@@ -80,6 +80,8 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
if (isDPPInst()) {
VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src_dpp.read();
@@ -148,6 +150,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
if (exec_mask) {
src_lane = findLsbSet(exec_mask);
}
@@ -182,6 +187,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
@@ -226,6 +234,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
@@ -258,6 +269,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
@@ -290,6 +304,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
@@ -324,6 +341,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
@@ -372,6 +392,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
@@ -439,6 +462,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
float tmp = src[lane];
@@ -475,6 +501,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
AMDGPU::mxfloat16 tmp(src[lane]);
@@ -509,6 +538,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
@@ -542,6 +574,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemI32)std::floor(src[lane]);
@@ -595,6 +630,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)src[lane];
@@ -627,6 +665,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
@@ -659,6 +700,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
@@ -691,6 +735,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
@@ -723,6 +770,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
@@ -755,6 +805,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
@@ -789,6 +842,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
int exp;
@@ -835,6 +891,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = (VecElemF64)src[lane];
@@ -867,6 +926,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
@@ -900,6 +962,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
@@ -932,6 +997,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
@@ -965,6 +1033,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
@@ -997,6 +1068,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF32 int_part(0.0);
@@ -1030,6 +1104,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::trunc(src[lane]);
@@ -1063,6 +1140,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::ceil(src[lane]);
@@ -1095,6 +1175,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = roundNearestEven(src[lane]);
@@ -1128,6 +1211,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::floor(src[lane]);
@@ -1160,6 +1246,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
@@ -1192,6 +1281,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
@@ -1224,6 +1316,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
@@ -1258,6 +1353,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / src[lane];
@@ -1290,6 +1388,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = 1.0 / std::sqrt(src[lane]);
@@ -1322,6 +1423,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1366,6 +1470,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::fpclassify(src[lane]) == FP_ZERO) {
@@ -1409,6 +1516,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
@@ -1441,6 +1551,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::sqrt(src[lane]);
@@ -1477,6 +1590,9 @@ namespace VegaISA
src.readSrc();
pi.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1517,6 +1633,9 @@ namespace VegaISA
src.readSrc();
pi.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (src[lane] < -256.0 || src[lane] > 256.0) {
@@ -1553,6 +1672,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ~src[lane];
@@ -1585,6 +1707,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = reverseBits(src[lane]);
@@ -1617,6 +1742,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOneMsb(src[lane]);
@@ -1649,6 +1777,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = findFirstOne(src[lane]);
@@ -1681,6 +1812,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = firstOppositeSignBit(src[lane]);
@@ -1714,6 +1848,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1752,6 +1889,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1789,6 +1929,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemF64 int_part(0.0);
@@ -1827,6 +1970,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1870,6 +2016,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
@@ -1926,8 +2075,8 @@ namespace VegaISA
src.readSrc();
panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
@@ -2359,6 +2508,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::pow(2.0, src[lane]);
@@ -2391,6 +2543,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::log2(src[lane]);
@@ -2423,6 +2578,9 @@ namespace VegaISA
src.readSrc();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src[lane];

View File

@@ -67,6 +67,9 @@ namespace VegaISA
src1.read();
vcc.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
@@ -102,6 +105,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src0_dpp.read();
@@ -163,6 +168,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
@@ -198,6 +206,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
@@ -232,6 +243,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane];
@@ -266,6 +280,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
if (std::isnan(src0[lane]) ||
@@ -344,6 +361,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
@@ -378,6 +398,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemI64 tmp_src0
@@ -445,6 +468,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
@@ -481,6 +507,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmin(src0[lane], src1[lane]);
@@ -515,6 +544,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fmax(src0[lane], src1[lane]);
@@ -548,6 +580,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -581,6 +616,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -614,6 +652,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -647,6 +688,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -682,6 +726,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -717,6 +764,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
@@ -751,6 +801,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and vdst during selecting
@@ -826,6 +878,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
if (isDPPInst()) {
VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src0_dpp.read();
@@ -886,6 +940,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
@@ -961,6 +1017,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] ^ src1[lane];
@@ -998,6 +1057,8 @@ namespace VegaISA
src1.read();
vdst.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
if (isDPPInst()) {
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
src0_dpp.read();
@@ -1064,6 +1125,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], k, src1[lane]);
@@ -1103,6 +1167,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], k);
@@ -1141,6 +1208,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
@@ -1225,6 +1294,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
@@ -1265,6 +1337,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
@@ -1308,6 +1383,9 @@ namespace VegaISA
src1.read();
vcc.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane]
@@ -1355,6 +1433,9 @@ namespace VegaISA
src1.read();
vcc.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
@@ -1401,6 +1482,9 @@ namespace VegaISA
src1.read();
vcc.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane]
@@ -1598,6 +1682,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] + src1[lane];
@@ -1632,6 +1719,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
@@ -1667,6 +1757,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
@@ -1701,6 +1794,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] * src1[lane];
@@ -1735,6 +1831,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
@@ -1770,6 +1869,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> src0[lane];
@@ -1805,6 +1907,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] >> src0[lane];
@@ -1882,6 +1987,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1915,6 +2023,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::max(src0[lane], src1[lane]);
@@ -1948,6 +2059,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -1981,6 +2095,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::min(src0[lane], src1[lane]);
@@ -2034,6 +2151,8 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
if (isSDWAInst()) {
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
// use copies of original src0, src1, and dest during selecting
@@ -2108,6 +2227,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src0[lane] - src1[lane];
@@ -2141,6 +2263,9 @@ namespace VegaISA
src0.readSrc();
src1.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = src1[lane] - src0[lane];
@@ -2175,6 +2300,9 @@ namespace VegaISA
src1.read();
vdst.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
@@ -2209,6 +2337,9 @@ namespace VegaISA
src1.read();
vdst.read();
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vdst[lane] = ~(src0[lane] ^ src1[lane]);