arch-vega,gpu-compute: Fix misc ubsan runtime errors
Three main fixes: - Remove the initDynOperandInfo. UBSAN errors and exits due to things not being captured properly. After a few failed attempts playing with the capture list, just move the lambda to a new method. - Invalid data type size for some thread mask instructions. This might actually have caused silent bugs when the thread id was > 31. - Alignment issues with the operands. Change-Id: I0297e10df0f0ab9730b6f1bd132602cd36b5e7ac
This commit is contained in:
@@ -1224,7 +1224,8 @@ namespace VegaISA
|
||||
src0.read();
|
||||
src1.read();
|
||||
|
||||
sdst = src0.rawData() * src1.rawData();
|
||||
ScalarRegI64 tmp = src0.rawData() * src1.rawData();
|
||||
sdst = tmp & mask(32);
|
||||
|
||||
sdst.write();
|
||||
} // execute
|
||||
|
||||
@@ -8583,7 +8583,7 @@ namespace VegaISA
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
threadMask = ((1LL << lane) - 1LL);
|
||||
threadMask = ((1ULL << lane) - 1ULL);
|
||||
vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
|
||||
src1[lane];
|
||||
}
|
||||
@@ -8633,7 +8633,7 @@ namespace VegaISA
|
||||
|
||||
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
|
||||
if (wf->execMask(lane)) {
|
||||
threadMask = ((1LL << lane) - 1LL);
|
||||
threadMask = ((1ULL << lane) - 1ULL);
|
||||
vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
|
||||
src1[lane];
|
||||
}
|
||||
|
||||
@@ -490,7 +490,7 @@ namespace VegaISA
|
||||
typename std::enable_if<Condition, void>::type
|
||||
setBit(int bit, int bit_val)
|
||||
{
|
||||
DataType &sgpr = *((DataType*)srfData.data());
|
||||
GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
|
||||
replaceBits(sgpr, bit, bit_val);
|
||||
}
|
||||
|
||||
@@ -739,7 +739,7 @@ namespace VegaISA
|
||||
* of a register is 1 dword. this class will take care to do the
|
||||
* proper packing/unpacking of sub-dword operands.
|
||||
*/
|
||||
std::array<ScalarRegU32, NumDwords> srfData;
|
||||
GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;
|
||||
};
|
||||
|
||||
// typedefs for the various sizes/types of scalar operands
|
||||
|
||||
@@ -54,55 +54,63 @@ GPUStaticInst::disassemble()
|
||||
return disassembly;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
GPUStaticInst::generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu,
|
||||
OperandInfo& op,
|
||||
std::vector<OperandInfo>& opVec,
|
||||
OpType opType)
|
||||
{
|
||||
std::vector<int> virt_idxs;
|
||||
std::vector<int> phys_idxs;
|
||||
|
||||
int num_dwords = op.sizeInDWords();
|
||||
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
|
||||
|
||||
int phys_idx = -1;
|
||||
for (int i = 0; i < num_dwords; i++) {
|
||||
if (opType == OpType::SRC_VEC || opType == OpType::DST_VEC) {
|
||||
phys_idx = cu->registerManager->mapVgpr(wf, virt_idx + i);
|
||||
} else {
|
||||
assert(opType == OpType::SRC_SCALAR ||
|
||||
opType == OpType::DST_SCALAR);
|
||||
phys_idx = cu->registerManager->mapSgpr(wf, virt_idx + i);
|
||||
}
|
||||
virt_idxs.push_back(virt_idx + i);
|
||||
phys_idxs.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
|
||||
"%d registers.\n", disassemble(),
|
||||
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
|
||||
"vector" : "scalar",
|
||||
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
|
||||
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
|
||||
|
||||
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
|
||||
|
||||
opVec.emplace_back(op);
|
||||
}
|
||||
|
||||
void
|
||||
GPUStaticInst::initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
|
||||
{
|
||||
// Lambda function, as this is only ever used here
|
||||
auto generateVirtToPhysMap = [&](OperandInfo& op,
|
||||
std::vector<OperandInfo>& opVec,
|
||||
MapRegFn mapFn, OpType opType)
|
||||
{
|
||||
std::vector<int> virt_idxs;
|
||||
std::vector<int> phys_idxs;
|
||||
|
||||
int num_dwords = op.sizeInDWords();
|
||||
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
|
||||
|
||||
int phys_idx = -1;
|
||||
for (int i = 0; i < num_dwords; i++){
|
||||
phys_idx = (cu->registerManager->*mapFn)(wf, virt_idx + i);
|
||||
virt_idxs.push_back(virt_idx + i);
|
||||
phys_idxs.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
|
||||
"%d registers.\n", disassemble(),
|
||||
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
|
||||
"vector" : "scalar",
|
||||
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
|
||||
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
|
||||
|
||||
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
|
||||
|
||||
opVec.emplace_back(op);
|
||||
};
|
||||
|
||||
for (auto& srcOp : srcOps) {
|
||||
if (srcOp.isVectorReg()) {
|
||||
generateVirtToPhysMap(srcOp, srcVecRegOps,
|
||||
&RegisterManager::mapVgpr, OpType::SRC_VEC);
|
||||
generateVirtToPhysMap(wf, cu, srcOp, srcVecRegOps,
|
||||
OpType::SRC_VEC);
|
||||
} else if (srcOp.isScalarReg()) {
|
||||
generateVirtToPhysMap(srcOp, srcScalarRegOps,
|
||||
&RegisterManager::mapSgpr, OpType::SRC_SCALAR);
|
||||
generateVirtToPhysMap(wf, cu, srcOp, srcScalarRegOps,
|
||||
OpType::SRC_SCALAR);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& dstOp : dstOps) {
|
||||
if (dstOp.isVectorReg()) {
|
||||
generateVirtToPhysMap(dstOp, dstVecRegOps,
|
||||
&RegisterManager::mapVgpr, OpType::DST_VEC);
|
||||
generateVirtToPhysMap(wf, cu, dstOp, dstVecRegOps,
|
||||
OpType::DST_VEC);
|
||||
} else if (dstOp.isScalarReg()) {
|
||||
generateVirtToPhysMap(dstOp, dstScalarRegOps,
|
||||
&RegisterManager::mapSgpr, OpType::DST_SCALAR);
|
||||
generateVirtToPhysMap(wf, cu, dstOp, dstScalarRegOps,
|
||||
OpType::DST_SCALAR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -321,6 +321,9 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||
int _ipdInstNum;
|
||||
|
||||
std::bitset<Num_Flags> _flags;
|
||||
|
||||
void generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu, OperandInfo& op,
|
||||
std::vector<OperandInfo>& opVec, OpType opType);
|
||||
};
|
||||
|
||||
class KernelLaunchStaticInst : public GPUStaticInst
|
||||
|
||||
Reference in New Issue
Block a user