arch-vega: Add SDWAB for v_cmp_{eq,ne}_u16

This shows an example of how to use the previous commit which adds an
SDWAB helper. The execute() method of both are the same with the
exception of the lambda function passed to the helper method.

Change-Id: I5ffe361440b4020b9f7669c0ed946aa6b3bbec25
This commit is contained in:
Matthew Poremba
2024-07-17 15:23:54 -07:00
parent 69338703e7
commit 6558821e2d

View File

@@ -3782,13 +3782,21 @@ namespace VegaISA
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
}
}
auto cmpImpl = [](uint16_t a, uint16_t b) { return a == b ? 1 : 0; };
vcc.write();
if (isSDWAInst()) {
sdwabHelper<uint16_t>(gpuDynInst, cmpImpl);
} else if (isDPPInst()) {
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, cmpImpl(src0[lane], src1[lane]));
}
}
vcc.write();
}
} // execute
// --- Inst_VOPC__V_CMP_LE_U16 class methods ---
@@ -3881,10 +3889,20 @@ namespace VegaISA
src0.readSrc();
src1.read();
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
auto cmpImpl = [](uint16_t a, uint16_t b) { return a != b ? 1 : 0; };
if (isSDWAInst()) {
sdwabHelper<uint16_t>(gpuDynInst, cmpImpl);
} else if (isDPPInst()) {
panic_if(isDPPInst(), "DPP not supported for %s", _opcode);
} else {
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
vcc.setBit(lane, cmpImpl(src0[lane], src1[lane]));
}
}
vcc.write();
}
vcc.write();