From 64496338278f337e38190af97b9208bb9882495a Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 16 Dec 2022 13:39:24 -0800 Subject: [PATCH] arch-vega: Add DPP support for V_AND_B32 A DPP variant of V_AND_B32 was found in rocPRIM. With this changeset the unit tests for rocPRIM scan_inclusive are passing. Change-Id: I5a65f2cf6b56ac13609b191e3b3dfeb55e630942 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66753 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- src/arch/amdgpu/vega/insts/instructions.cc | 34 +++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index c9e57bc2f7..1f37ff14cc 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -6844,15 +6844,41 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); VecOperandU32 vdst(gpuDynInst, instData.VDST); src0.readSrc(); src1.read(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; + if (isDPPInst()) { + VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] & src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } } }