arch-gcn3: add support for v_mbcnt_hi and v_mbcnt_lo

Change-Id: I1c70fe693c904f1abd7d5a2b99220c74a075eae5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29948
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Matt Sinclair
2018-07-10 02:58:28 -04:00
committed by Anthony Gutierrez
parent c7b6e7c613
commit 1836d58b36

View File

@@ -30309,8 +30309,36 @@ namespace Gcn3ISA
void
Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
}
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
uint64_t threadMask = 0;
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
InFmt_VOP3 *iFmt)
@@ -30330,8 +30358,36 @@ namespace Gcn3ISA
void
Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
{
panicUnimplemented();
}
Wavefront *wf = gpuDynInst->wavefront();
ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
VecOperandU32 vdst(gpuDynInst, instData.VDST);
uint64_t threadMask = 0;
src0.readSrc();
src1.readSrc();
/**
* input modifiers are supported by FP operations only
*/
assert(!(instData.ABS & 0x1));
assert(!(instData.ABS & 0x2));
assert(!(instData.ABS & 0x4));
assert(!(extData.NEG & 0x1));
assert(!(extData.NEG & 0x2));
assert(!(extData.NEG & 0x4));
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
src1[lane];
}
}
vdst.write();
} // execute
// --- Inst_VOP3__V_LSHLREV_B64 class methods ---
Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt)
: Inst_VOP3(iFmt, "v_lshlrev_b64", false)