arch-gcn3: fix bug with DPP support
Instructions that use the DPP field need to use the extra SRC0 register associated with the DPP instruction instead of the "default" SRC0 register, since the default SRC0 register contains the DPP information when DPP is being used. This commit fixes 2735c3bb88 to take this into account. Additionally, this commit removes write of the src register from the DPP helper functions, to avoid overwriting any changes made to the destination register. Finally, this change modifies the instructions that use DPP to simplify the flow through the execute() functions. Change-Id: I80fd0af1f131f287f18ff73b3c1c9122d8c60823 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29947 Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
ed3135ea6a
commit
c7b6e7c613
@@ -505,7 +505,6 @@ namespace Gcn3ISA
|
||||
src0[lane] = 0;
|
||||
}
|
||||
|
||||
src0.write();
|
||||
// reset for next iteration
|
||||
laneDisabled = false;
|
||||
}
|
||||
|
||||
@@ -5296,8 +5296,12 @@ namespace Gcn3ISA
|
||||
VecOperandF32 src1(gpuDynInst, instData.VSRC1);
|
||||
VecOperandF32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src0_dpp.read();
|
||||
|
||||
DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
|
||||
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
|
||||
@@ -5313,14 +5317,17 @@ namespace Gcn3ISA
|
||||
extData.iFmt_VOP_DPP.ROW_MASK);
|
||||
|
||||
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
|
||||
}
|
||||
|
||||
src0.readSrc();
|
||||
src1.read();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] + src1[lane];
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0_dpp[lane] + src1[lane];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src0[lane] + src1[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6164,6 +6171,7 @@ namespace Gcn3ISA
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src0_dpp.read();
|
||||
|
||||
DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
|
||||
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
|
||||
@@ -6179,11 +6187,18 @@ namespace Gcn3ISA
|
||||
extData.iFmt_VOP_DPP.ROW_MASK);
|
||||
|
||||
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
|
||||
}
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
|
||||
vdst[lane]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7117,8 +7132,11 @@ namespace Gcn3ISA
|
||||
ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
|
||||
VecOperandU32 vdst(gpuDynInst, instData.VDST);
|
||||
|
||||
src.readSrc();
|
||||
|
||||
if (isDPPInst()) {
|
||||
VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
|
||||
src_dpp.read();
|
||||
|
||||
DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
|
||||
"DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
|
||||
@@ -7137,14 +7155,18 @@ namespace Gcn3ISA
|
||||
// to negate it or take the absolute value of it
|
||||
assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
|
||||
assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
|
||||
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp);
|
||||
}
|
||||
processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
|
||||
|
||||
src.readSrc();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src[lane];
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src_dpp[lane];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
vdst[lane] = src[lane];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user