arch-vega: Fix vop2Helper scalar support (#142)
A previous change added a vop2Helper to remove 100s of lines of common code from VOP2 instructions related to processing SDWA and DPP support. That change inadvertently changed the type of operand source 0 from const to non-const. The vector container operator[] does not allow reading a scalar value such as a constant, a dword literal, etc. The error shows up in the form of: assert(!scalar) in operand.hh. Since the SDWA and DPP cases need to modify the source vector and non-SDWA/DPP cases might require const, we make a non-const copy of the const source 0 vector and place it in a temporary non-const vector. This non-const vector is passed to the lambda function implementation of the instruction. This prevents needing a const and non-const version of the lambda and avoids needing to propagate the template parameters through the various SDWA/DPP helper methods which seems like it will not work anyways as they need to modify the vector. As a result of this, as more VOP2 instructions are implemented using this helper, they will need to specify the const and non-const template parameters of the vector container needed for the instruction. Change-Id: Ia0b3c550d7de32b830040007a110f4821e3385aa
This commit is contained in:
@@ -6394,7 +6394,7 @@ namespace VegaISA
|
||||
}
|
||||
};
|
||||
|
||||
vop2Helper<VecOperandU32>(gpuDynInst, opImpl);
|
||||
vop2Helper<ConstVecOperandU32, VecOperandU32>(gpuDynInst, opImpl);
|
||||
} // execute
|
||||
// --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---
|
||||
|
||||
|
||||
@@ -339,7 +339,7 @@ namespace VegaISA
|
||||
return src0_dpp;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
template<typename ConstT, typename T>
|
||||
void vop2Helper(GPUDynInstPtr gpuDynInst,
|
||||
void (*fOpImpl)(T&, T&, T&, Wavefront*))
|
||||
{
|
||||
@@ -359,7 +359,19 @@ namespace VegaISA
|
||||
T src0_dpp = dppHelper(gpuDynInst, src1);
|
||||
fOpImpl(src0_dpp, src1, vdst, wf);
|
||||
} else {
|
||||
fOpImpl(src0, src1, vdst, wf);
|
||||
// src0 is unmodified. We need to use the const container
|
||||
// type to allow reading scalar operands from src0. Only
|
||||
// src0 can index scalar operands. We copy this to vdst
|
||||
// temporarily to pass to the lambda so the instruction
|
||||
// does not need to write two lambda functions (one for
|
||||
// a const src0 and one of a mutable src0).
|
||||
ConstT const_src0(gpuDynInst, instData.SRC0);
|
||||
const_src0.readSrc();
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
vdst[lane] = const_src0[lane];
|
||||
}
|
||||
fOpImpl(vdst, src1, vdst, wf);
|
||||
}
|
||||
|
||||
vdst.write();
|
||||
|
||||
Reference in New Issue
Block a user