arch-vega,gpu-compute: Fix misc ubsan runtime errors

Three main fixes: - Remove the initDynOperandInfo. UBSAN errors and exits due to things not being captured properly. After a few failed attempts playing with the capture list, just move the lambda to a new method. - Invalid data type size for some thread mask instructions. This might actually have caused silent bugs when the thread id was > 31. - Alignment issues with the operands. Change-Id: I0297e10df0f0ab9730b6f1bd132602cd36b5e7ac
2024-04-24 10:42:22 -07:00
parent 3a2a917a53
commit 0faa9510f9
5 changed files with 54 additions and 42 deletions
--- a/src/arch/amdgpu/vega/insts/sop2.cc
+++ b/src/arch/amdgpu/vega/insts/sop2.cc
@@ -1224,7 +1224,8 @@ namespace VegaISA
        src0.read();
        src1.read();

-        sdst = src0.rawData() * src1.rawData();
+        ScalarRegI64 tmp = src0.rawData() * src1.rawData();
+        sdst = tmp & mask(32);

        sdst.write();
    } // execute
--- a/src/arch/amdgpu/vega/insts/vop3.cc
+++ b/src/arch/amdgpu/vega/insts/vop3.cc
@@ -8583,7 +8583,7 @@ namespace VegaISA

        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
            if (wf->execMask(lane)) {
-                threadMask = ((1LL << lane) - 1LL);
+                threadMask = ((1ULL << lane) - 1ULL);
                vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
                             src1[lane];
            }
@@ -8633,7 +8633,7 @@ namespace VegaISA

        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
            if (wf->execMask(lane)) {
-                threadMask = ((1LL << lane) - 1LL);
+                threadMask = ((1ULL << lane) - 1ULL);
                vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
                             src1[lane];
            }
--- a/src/arch/amdgpu/vega/operand.hh
+++ b/src/arch/amdgpu/vega/operand.hh
@@ -490,7 +490,7 @@ namespace VegaISA
        typename std::enable_if<Condition, void>::type
        setBit(int bit, int bit_val)
        {
-            DataType &sgpr = *((DataType*)srfData.data());
+            GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
            replaceBits(sgpr, bit, bit_val);
        }

@@ -739,7 +739,7 @@ namespace VegaISA
         * of a register is 1 dword. this class will take care to do the
         * proper packing/unpacking of sub-dword operands.
         */
-        std::array<ScalarRegU32, NumDwords> srfData;
+        GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;
    };

    // typedefs for the various sizes/types of scalar operands
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -54,55 +54,63 @@ GPUStaticInst::disassemble()
    return disassembly;
 }

+
+void
+GPUStaticInst::generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu,
+                                     OperandInfo& op,
+                                     std::vector<OperandInfo>& opVec,
+                                     OpType opType)
+{
+    std::vector<int> virt_idxs;
+    std::vector<int> phys_idxs;
+
+    int num_dwords = op.sizeInDWords();
+    int virt_idx = op.registerIndex(wf->reservedScalarRegs);
+
+    int phys_idx = -1;
+    for (int i = 0; i < num_dwords; i++) {
+        if (opType == OpType::SRC_VEC || opType == OpType::DST_VEC) {
+            phys_idx = cu->registerManager->mapVgpr(wf, virt_idx + i);
+        } else {
+            assert(opType == OpType::SRC_SCALAR ||
+                   opType == OpType::DST_SCALAR);
+            phys_idx = cu->registerManager->mapSgpr(wf, virt_idx + i);
+        }
+        virt_idxs.push_back(virt_idx + i);
+        phys_idxs.push_back(phys_idx);
+    }
+    DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
+            "%d registers.\n", disassemble(),
+            (opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
+            "vector" : "scalar",
+            (opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
+            "src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
+
+    op.setVirtToPhysMapping(virt_idxs, phys_idxs);
+
+    opVec.emplace_back(op);
+}
+
 void
 GPUStaticInst::initDynOperandInfo(Wavefront *wf, ComputeUnit *cu)
 {
-    // Lambda function, as this is only ever used here
-    auto generateVirtToPhysMap = [&](OperandInfo& op,
-                                     std::vector<OperandInfo>& opVec,
-                                     MapRegFn mapFn, OpType opType)
-    {
-        std::vector<int> virt_idxs;
-        std::vector<int> phys_idxs;
-
-        int num_dwords = op.sizeInDWords();
-        int virt_idx = op.registerIndex(wf->reservedScalarRegs);
-
-        int phys_idx = -1;
-        for (int i = 0; i < num_dwords; i++){
-            phys_idx = (cu->registerManager->*mapFn)(wf, virt_idx + i);
-            virt_idxs.push_back(virt_idx + i);
-            phys_idxs.push_back(phys_idx);
-        }
-        DPRINTF(GPUInst, "%s adding %s %s (%d->%d) operand that uses "
-                "%d registers.\n", disassemble(),
-                (opType == OpType::SRC_VEC || opType == OpType::DST_VEC) ?
-                "vector" : "scalar",
-                (opType == OpType::SRC_VEC || opType == OpType::SRC_SCALAR) ?
-                "src" : "dst", virt_idxs[0], phys_idxs[0], num_dwords);
-
-        op.setVirtToPhysMapping(virt_idxs, phys_idxs);
-
-        opVec.emplace_back(op);
-    };
-
    for (auto& srcOp : srcOps) {
        if (srcOp.isVectorReg()) {
-            generateVirtToPhysMap(srcOp, srcVecRegOps,
-                            &RegisterManager::mapVgpr, OpType::SRC_VEC);
+            generateVirtToPhysMap(wf, cu, srcOp, srcVecRegOps,
+                                  OpType::SRC_VEC);
        } else if (srcOp.isScalarReg()) {
-            generateVirtToPhysMap(srcOp, srcScalarRegOps,
-                            &RegisterManager::mapSgpr, OpType::SRC_SCALAR);
+            generateVirtToPhysMap(wf, cu, srcOp, srcScalarRegOps,
+                                  OpType::SRC_SCALAR);
        }
    }

    for (auto& dstOp : dstOps) {
        if (dstOp.isVectorReg()) {
-            generateVirtToPhysMap(dstOp, dstVecRegOps,
-                            &RegisterManager::mapVgpr, OpType::DST_VEC);
+            generateVirtToPhysMap(wf, cu, dstOp, dstVecRegOps,
+                                  OpType::DST_VEC);
        } else if (dstOp.isScalarReg()) {
-            generateVirtToPhysMap(dstOp, dstScalarRegOps,
-                            &RegisterManager::mapSgpr, OpType::DST_SCALAR);
+            generateVirtToPhysMap(wf, cu, dstOp, dstScalarRegOps,
+                                  OpType::DST_SCALAR);
        }
    }
 }
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -321,6 +321,9 @@ class GPUStaticInst : public GPUStaticInstFlags
    int _ipdInstNum;

    std::bitset<Num_Flags> _flags;
+
+    void generateVirtToPhysMap(Wavefront *wf, ComputeUnit *cu, OperandInfo& op,
+                               std::vector<OperandInfo>& opVec, OpType opType);
 };

 class KernelLaunchStaticInst : public GPUStaticInst