cpu-o3, arch: Fix SMT bug arising from v23.0 and make gem5 more robust with SMT (#828)

This PR is fixing https://github.com/gem5/gem5/issues/668. It fixes it
for all ISAs other than Arm with the first commit, which is setting the
number of architectural Matrix registers to 0 for those ISA which are
not using them.

It then partly fixes it for Arm as well with the 2nd commit: by removing
RenameMap::numFreeEntries we don't stall renaming unless a matrix
instruction is encountered... This means most binaries will run with SMT
as long as they don't use FEAT_SME instructions. Please note: this is
not simply a SMT fix, it will generally address a shortcoming in the way
we were renaming instructions.

If an Arm binary wants to use SMT with FEAT_SME, the 4th commit will
make sure the lack of physical registers is notified explicitly at the
beginning of simulation, rather than silently blocking renaming
This commit is contained in:
Giacomo Travaglini
2024-02-19 08:52:31 +00:00
committed by GitHub
7 changed files with 37 additions and 22 deletions

View File

@@ -105,7 +105,7 @@ constexpr RegClass vecElemClass(VecElemClass, VecElemClassName, 2,
debug::IntRegs);
constexpr RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
debug::IntRegs);
constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs);
constexpr RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
} // anonymous namespace

View File

@@ -57,7 +57,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
debug::IntRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs);
RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
} // anonymous namespace

View File

@@ -74,7 +74,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
debug::IntRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs);
RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
} // anonymous namespace

View File

@@ -147,7 +147,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
debug::IntRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs);
} // anonymous namespace

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2012, 2014, 2016, 2017, 2019-2020 ARM Limited
* Copyright (c) 2011-2012, 2014, 2016, 2017, 2019-2020, 2024 Arm Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@@ -193,18 +193,36 @@ CPU::CPU(const BaseO3CPUParams &params)
assert(numThreads);
const auto &regClasses = params.isa[0]->regClasses();
assert(params.numPhysIntRegs >=
numThreads * regClasses.at(IntRegClass)->numRegs());
assert(params.numPhysFloatRegs >=
numThreads * regClasses.at(FloatRegClass)->numRegs());
assert(params.numPhysVecRegs >=
numThreads * regClasses.at(VecRegClass)->numRegs());
assert(params.numPhysVecPredRegs >=
numThreads * regClasses.at(VecPredRegClass)->numRegs());
assert(params.numPhysMatRegs >=
numThreads * regClasses.at(MatRegClass)->numRegs());
assert(params.numPhysCCRegs >=
numThreads * regClasses.at(CCRegClass)->numRegs());
panic_if(params.numPhysIntRegs <=
numThreads * regClasses.at(IntRegClass)->numRegs() &&
regClasses.at(IntRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysIntRegs\n");
panic_if(params.numPhysFloatRegs <=
numThreads * regClasses.at(FloatRegClass)->numRegs() &&
regClasses.at(FloatRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysFloatRegs\n");
panic_if(params.numPhysVecRegs <=
numThreads * regClasses.at(VecRegClass)->numRegs() &&
regClasses.at(VecRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysVecRegs\n");
panic_if(params.numPhysVecPredRegs <=
numThreads * regClasses.at(VecPredRegClass)->numRegs() &&
regClasses.at(VecPredRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysVecPredRegs\n");
panic_if(params.numPhysMatRegs <=
numThreads * regClasses.at(MatRegClass)->numRegs() &&
regClasses.at(MatRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysMatRegs\n");
panic_if(params.numPhysCCRegs <=
numThreads * regClasses.at(CCRegClass)->numRegs() &&
regClasses.at(CCRegClass)->numRegs() != 0,
"Not enough physical registers, consider increasing "
"numPhysCCRegs\n");
// Just make this a warning and go ahead anyway, to keep from having to
// add checks everywhere.

View File

@@ -1225,9 +1225,6 @@ Rename::checkStall(ThreadID tid)
} else if (calcFreeLQEntries(tid) <= 0 && calcFreeSQEntries(tid) <= 0) {
DPRINTF(Rename,"[tid:%i] Stall: LSQ has 0 free entries.\n", tid);
ret_val = true;
} else if (renameMap[tid]->numFreeEntries() <= 0) {
DPRINTF(Rename,"[tid:%i] Stall: RenameMap has 0 free entries.\n", tid);
ret_val = true;
} else if (renameStatus[tid] == SerializeStall &&
(!emptyROB[tid] || instsInProgress[tid])) {
DPRINTF(Rename,"[tid:%i] Stall: Serialize stall and ROB is not "
@@ -1263,7 +1260,7 @@ Rename::readFreeEntries(ThreadID tid)
freeEntries[tid].robEntries,
freeEntries[tid].lqEntries,
freeEntries[tid].sqEntries,
renameMap[tid]->numFreeEntries(),
renameMap[tid]->minFreeEntries(),
renameMap[tid]->numFreeEntries(IntRegClass),
renameMap[tid]->numFreeEntries(FloatRegClass),
renameMap[tid]->numFreeEntries(VecRegClass),

View File

@@ -268,7 +268,7 @@ class UnifiedRenameMap
* of registers is requested.
*/
unsigned
numFreeEntries() const
minFreeEntries() const
{
auto min_free = std::numeric_limits<unsigned>::max();
for (auto &map: renameMaps) {