arch-vega: Panic unimplemented SDWA/DPP for VOP1/VOP2
Add a panic if SDWA or DPP is used for an instruction which does not implement support for it. If an application uses SDWA or DPP it likely does not operate in the same way as the base instruction and therefore gem5 should panic rather than continue. It is likely data is incorrect which will make it more difficult to debug an application. Change-Id: I68ac448b0d62941761ef4efa0169f95796270f48
This commit is contained in:
@@ -80,6 +80,8 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src_dpp.read();
|
||||
@@ -148,6 +150,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
if (exec_mask) {
|
||||
src_lane = findLsbSet(exec_mask);
|
||||
}
|
||||
@@ -182,6 +187,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
int exp;
|
||||
@@ -226,6 +234,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF64)src[lane];
|
||||
@@ -258,6 +269,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)src[lane];
|
||||
@@ -290,6 +304,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)src[lane];
|
||||
@@ -324,6 +341,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
int exp;
|
||||
@@ -372,6 +392,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
int exp;
|
||||
@@ -439,6 +462,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
float tmp = src[lane];
|
||||
@@ -475,6 +501,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
AMDGPU::mxfloat16 tmp(src[lane]);
|
||||
@@ -509,6 +538,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
|
||||
@@ -542,6 +574,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemI32)std::floor(src[lane]);
|
||||
@@ -595,6 +630,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)src[lane];
|
||||
@@ -627,6 +665,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF64)src[lane];
|
||||
@@ -659,6 +700,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
|
||||
@@ -691,6 +735,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
|
||||
@@ -723,6 +770,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
|
||||
@@ -755,6 +805,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
|
||||
@@ -789,6 +842,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
int exp;
|
||||
@@ -835,6 +891,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = (VecElemF64)src[lane];
|
||||
@@ -867,6 +926,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::trunc(src[lane]);
|
||||
@@ -900,6 +962,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::ceil(src[lane]);
|
||||
@@ -932,6 +997,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = roundNearestEven(src[lane]);
|
||||
@@ -965,6 +1033,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::floor(src[lane]);
|
||||
@@ -997,6 +1068,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
VecElemF32 int_part(0.0);
|
||||
@@ -1030,6 +1104,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::trunc(src[lane]);
|
||||
@@ -1063,6 +1140,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::ceil(src[lane]);
|
||||
@@ -1095,6 +1175,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = roundNearestEven(src[lane]);
|
||||
@@ -1128,6 +1211,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::floor(src[lane]);
|
||||
@@ -1160,6 +1246,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::pow(2.0, src[lane]);
|
||||
@@ -1192,6 +1281,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::log2(src[lane]);
|
||||
@@ -1224,6 +1316,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = 1.0 / src[lane];
|
||||
@@ -1258,6 +1353,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = 1.0 / src[lane];
|
||||
@@ -1290,6 +1388,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = 1.0 / std::sqrt(src[lane]);
|
||||
@@ -1322,6 +1423,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::fpclassify(src[lane]) == FP_ZERO) {
|
||||
@@ -1366,6 +1470,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::fpclassify(src[lane]) == FP_ZERO) {
|
||||
@@ -1409,6 +1516,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::sqrt(src[lane]);
|
||||
@@ -1441,6 +1551,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::sqrt(src[lane]);
|
||||
@@ -1477,6 +1590,9 @@ namespace VegaISA
|
||||
src.readSrc();
|
||||
pi.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (src[lane] < -256.0 || src[lane] > 256.0) {
|
||||
@@ -1517,6 +1633,9 @@ namespace VegaISA
|
||||
src.readSrc();
|
||||
pi.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (src[lane] < -256.0 || src[lane] > 256.0) {
|
||||
@@ -1553,6 +1672,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = ~src[lane];
|
||||
@@ -1585,6 +1707,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = reverseBits(src[lane]);
|
||||
@@ -1617,6 +1742,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = findFirstOneMsb(src[lane]);
|
||||
@@ -1649,6 +1777,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = findFirstOne(src[lane]);
|
||||
@@ -1681,6 +1812,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = firstOppositeSignBit(src[lane]);
|
||||
@@ -1714,6 +1848,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
|
||||
@@ -1752,6 +1889,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
|
||||
@@ -1789,6 +1929,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
VecElemF64 int_part(0.0);
|
||||
@@ -1827,6 +1970,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
|
||||
@@ -1870,6 +2016,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::isinf(src[lane]) || std::isnan(src[lane])) {
|
||||
@@ -1926,8 +2075,8 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64");
|
||||
panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64");
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
@@ -2359,6 +2508,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::pow(2.0, src[lane]);
|
||||
@@ -2391,6 +2543,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::log2(src[lane]);
|
||||
@@ -2423,6 +2578,9 @@ namespace VegaISA
|
||||
|
||||
src.readSrc();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src[lane];
|
||||
|
||||
@@ -67,6 +67,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vcc.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane]
|
||||
@@ -102,6 +105,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src0_dpp.read();
|
||||
@@ -163,6 +168,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] - src1[lane];
|
||||
@@ -198,6 +206,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] - src0[lane];
|
||||
@@ -232,6 +243,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] * src1[lane];
|
||||
@@ -266,6 +280,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
if (std::isnan(src0[lane]) ||
|
||||
@@ -344,6 +361,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
|
||||
@@ -378,6 +398,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
VecElemI64 tmp_src0
|
||||
@@ -445,6 +468,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
|
||||
@@ -481,6 +507,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fmin(src0[lane], src1[lane]);
|
||||
@@ -515,6 +544,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fmax(src0[lane], src1[lane]);
|
||||
@@ -548,6 +580,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::min(src0[lane], src1[lane]);
|
||||
@@ -581,6 +616,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::max(src0[lane], src1[lane]);
|
||||
@@ -614,6 +652,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::min(src0[lane], src1[lane]);
|
||||
@@ -647,6 +688,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::max(src0[lane], src1[lane]);
|
||||
@@ -682,6 +726,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
|
||||
@@ -717,6 +764,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
|
||||
@@ -751,6 +801,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
if (isSDWAInst()) {
|
||||
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
|
||||
// use copies of original src0, src1, and vdst during selecting
|
||||
@@ -826,6 +878,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src0_dpp.read();
|
||||
@@ -886,6 +940,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
if (isSDWAInst()) {
|
||||
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
|
||||
// use copies of original src0, src1, and dest during selecting
|
||||
@@ -961,6 +1017,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] ^ src1[lane];
|
||||
@@ -998,6 +1057,8 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vdst.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src0_dpp.read();
|
||||
@@ -1064,6 +1125,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0[lane], k, src1[lane]);
|
||||
@@ -1103,6 +1167,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0[lane], src1[lane], k);
|
||||
@@ -1141,6 +1208,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
if (isSDWAInst()) {
|
||||
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
|
||||
// use copies of original src0, src1, and dest during selecting
|
||||
@@ -1225,6 +1294,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] - src1[lane];
|
||||
@@ -1265,6 +1337,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] - src0[lane];
|
||||
@@ -1308,6 +1383,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vcc.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] + src1[lane]
|
||||
@@ -1355,6 +1433,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vcc.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane]
|
||||
@@ -1401,6 +1482,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vcc.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane]
|
||||
@@ -1598,6 +1682,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] + src1[lane];
|
||||
@@ -1632,6 +1719,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] - src1[lane];
|
||||
@@ -1667,6 +1757,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] - src0[lane];
|
||||
@@ -1701,6 +1794,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] * src1[lane];
|
||||
@@ -1735,6 +1831,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
|
||||
@@ -1770,6 +1869,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] >> src0[lane];
|
||||
@@ -1805,6 +1907,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] >> src0[lane];
|
||||
@@ -1882,6 +1987,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::max(src0[lane], src1[lane]);
|
||||
@@ -1915,6 +2023,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::max(src0[lane], src1[lane]);
|
||||
@@ -1948,6 +2059,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::min(src0[lane], src1[lane]);
|
||||
@@ -1981,6 +2095,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::min(src0[lane], src1[lane]);
|
||||
@@ -2034,6 +2151,8 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
if (isSDWAInst()) {
|
||||
VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
|
||||
// use copies of original src0, src1, and dest during selecting
|
||||
@@ -2108,6 +2227,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] - src1[lane];
|
||||
@@ -2141,6 +2263,9 @@ namespace VegaISA
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src1[lane] - src0[lane];
|
||||
@@ -2175,6 +2300,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vdst.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
|
||||
@@ -2209,6 +2337,9 @@ namespace VegaISA
|
||||
src1.read();
|
||||
vdst.read();
|
||||
|
||||
panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode);
|
||||
panic_if(isDPPInst(), "DPP not implemented for %s", _opcode);
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = ~(src0[lane] ^ src1[lane]);
|
||||
|
||||
Reference in New Issue
Block a user