arch-vega,gpu-compute: Fix misc ubsan runtime errors

Three main fixes:
 - Remove the initDynOperandInfo. UBSAN errors and exits due to things
   not being captured properly. After a few failed attempts playing with
   the capture list, just move the lambda to a new method.
 - Invalid data type size for some thread mask instructions. This might
   actually have caused silent bugs when the thread id was > 31.
 - Alignment issues with the operands.

Change-Id: I0297e10df0f0ab9730b6f1bd132602cd36b5e7ac
This commit is contained in:
Matthew Poremba
2024-04-24 10:42:22 -07:00
parent 3a2a917a53
commit 0faa9510f9
5 changed files with 54 additions and 42 deletions

View File

@@ -1224,7 +1224,8 @@ namespace VegaISA
src0.read();
src1.read();
sdst = src0.rawData() * src1.rawData();
ScalarRegI64 tmp = src0.rawData() * src1.rawData();
sdst = tmp & mask(32);
sdst.write();
} // execute

View File

@@ -8583,7 +8583,7 @@ namespace VegaISA
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
threadMask = ((1ULL << lane) - 1ULL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
src1[lane];
}
@@ -8633,7 +8633,7 @@ namespace VegaISA
for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
if (wf->execMask(lane)) {
threadMask = ((1LL << lane) - 1LL);
threadMask = ((1ULL << lane) - 1ULL);
vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
src1[lane];
}

View File

@@ -490,7 +490,7 @@ namespace VegaISA
typename std::enable_if<Condition, void>::type
setBit(int bit, int bit_val)
{
DataType &sgpr = *((DataType*)srfData.data());
GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
replaceBits(sgpr, bit, bit_val);
}
@@ -739,7 +739,7 @@ namespace VegaISA
* of a register is 1 dword. this class will take care to do the
* proper packing/unpacking of sub-dword operands.
*/
std::array<ScalarRegU32, NumDwords> srfData;
GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;
};
// typedefs for the various sizes/types of scalar operands

View File

@@ -54,55 +54,63 @@ GPUStaticInst::disassemble()
return disassembly;
}
void
GPUStaticInst::generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu,
OperandInfo& op,
std::vector<OperandInfo>& opVec,
OpType opType)
{
std::vector<int> virt_idxs;
std::vector<int> phys_idxs;
int num_dwords = op.sizeInDWords();
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
int phys_idx = -1;
for (int i = 0; i < num_dwords; i++) {
if (opType == OpType::SRC_VEC || opType == OpType::DST_VEC) {
phys_idx = cu->registerManager->mapVgpr(wf, virt_idx + i);
} else {
assert(opType == OpType::SRC_SCALAR ||
opType == OpType::DST_SCALAR);
phys_idx = cu->registerManager->mapSgpr(wf, virt_idx + i);
}
virt_idxs.push_back(virt_idx + i);
phys_idxs.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
"%d registers.\n", disassemble(),
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
"vector" : "scalar",
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
opVec.emplace_back(op);
}
void
GPUStaticInst::initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
{
// Lambda function, as this is only ever used here
auto generateVirtToPhysMap = [&](OperandInfo& op,
std::vector<OperandInfo>& opVec,
MapRegFn mapFn, OpType opType)
{
std::vector<int> virt_idxs;
std::vector<int> phys_idxs;
int num_dwords = op.sizeInDWords();
int virt_idx = op.registerIndex(wf->reservedScalarRegs);
int phys_idx = -1;
for (int i = 0; i < num_dwords; i++){
phys_idx = (cu->registerManager->*mapFn)(wf, virt_idx + i);
virt_idxs.push_back(virt_idx + i);
phys_idxs.push_back(phys_idx);
}
DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
"%d registers.\n", disassemble(),
(opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
"vector" : "scalar",
(opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
"src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
op.setVirtToPhysMapping(virt_idxs, phys_idxs);
opVec.emplace_back(op);
};
for (auto& srcOp : srcOps) {
if (srcOp.isVectorReg()) {
generateVirtToPhysMap(srcOp, srcVecRegOps,
&RegisterManager::mapVgpr, OpType::SRC_VEC);
generateVirtToPhysMap(wf, cu, srcOp, srcVecRegOps,
OpType::SRC_VEC);
} else if (srcOp.isScalarReg()) {
generateVirtToPhysMap(srcOp, srcScalarRegOps,
&RegisterManager::mapSgpr, OpType::SRC_SCALAR);
generateVirtToPhysMap(wf, cu, srcOp, srcScalarRegOps,
OpType::SRC_SCALAR);
}
}
for (auto& dstOp : dstOps) {
if (dstOp.isVectorReg()) {
generateVirtToPhysMap(dstOp, dstVecRegOps,
&RegisterManager::mapVgpr, OpType::DST_VEC);
generateVirtToPhysMap(wf, cu, dstOp, dstVecRegOps,
OpType::DST_VEC);
} else if (dstOp.isScalarReg()) {
generateVirtToPhysMap(dstOp, dstScalarRegOps,
&RegisterManager::mapSgpr, OpType::DST_SCALAR);
generateVirtToPhysMap(wf, cu, dstOp, dstScalarRegOps,
OpType::DST_SCALAR);
}
}
}

View File

@@ -321,6 +321,9 @@ class GPUStaticInst : public GPUStaticInstFlags
int _ipdInstNum;
std::bitset<Num_Flags> _flags;
void generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu, OperandInfo& op,
std::vector<OperandInfo>& opVec, OpType opType);
};
class KernelLaunchStaticInst : public GPUStaticInst