From d031244ca70d86e1fa44d8d92c0dd4701a9a3db3 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 15 Jan 2024 11:48:51 +0000 Subject: [PATCH 1/4] misc: When unused, set #MatRegClass registers to 0 This is working around an existing SMT issue [1]. The BaseO3CPU uses two physical matrix registers [2]. This is enough for a single threaded CPU which as of now uses 1 architectural matrix only. The problem arises when SMT is enabled. As 2 architectural matrices need to be supported by a single CPU, the O3CPU won't have any available register in the freeList for renaming. This causes the SMT O3CPU to indefinitely stall renaming [3] If the archtectural number of registers is seto to 0, the regclass won't be taken into consideration when evaluating if we can rename instructions. This issue has been implicitly fixed for RISCV by a preceding PR [4] [1]: https://github.com/gem5/gem5/issues/668 [2]: https://github.com/gem5/gem5/blob/stable/src/cpu/o3/BaseO3CPU.py#L170 [3]: https://github.com/gem5/gem5/blob/stable/src/cpu/o3/rename.cc#L1228 [4]: https://github.com/gem5/gem5/pull/83 Change-Id: I99bfdefff11a246b1f191251dc67689e95b3f0db Signed-off-by: Giacomo Travaglini --- src/arch/mips/isa.cc | 2 +- src/arch/power/isa.cc | 2 +- src/arch/sparc/isa.cc | 2 +- src/arch/x86/isa.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc index 92799ab291..7795456f15 100644 --- a/src/arch/mips/isa.cc +++ b/src/arch/mips/isa.cc @@ -105,7 +105,7 @@ constexpr RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); constexpr RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); -constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); +constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs); constexpr RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace diff --git a/src/arch/power/isa.cc b/src/arch/power/isa.cc index ecaebade9a..50fbfd346d 100644 --- a/src/arch/power/isa.cc +++ b/src/arch/power/isa.cc @@ -57,7 +57,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); -RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc index e7807c2b0a..9bb169aefe 100644 --- a/src/arch/sparc/isa.cc +++ b/src/arch/sparc/isa.cc @@ -74,7 +74,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); -RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 7d401a6c59..dd49326a4f 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -147,7 +147,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); -RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs); } // anonymous namespace From 86158de22090160547ef6de0c54357d4a003a340 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 17 Jan 2024 11:41:48 +0000 Subject: [PATCH 2/4] cpu-o3: Stop using RenameMap::numFreeEntries The method is extracting the minimum number of [1] non-zero free registers/entries across all register classes. This means that if we have saturated all register storage for a particular class, renaming will stop as a whole. I believe it does make sense to keep renaming and only block renaming in case an instruction requiring the particular register type is encountered. This would happen with the Rename::renameInsts method [1]: https://github.com/gem5/gem5/blob/stable/src/cpu/o3/rename_map.hh#L269 [2]: https://github.com/gem5/gem5/blob/stable/src/cpu/o3/rename.cc#L662 Change-Id: I932826a77a5c0b2e05d8fdcab0e6ca13cf0e3d23 Signed-off-by: Giacomo Travaglini --- src/cpu/o3/rename.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index c20edc2e46..9a5be35f38 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -1225,9 +1225,6 @@ Rename::checkStall(ThreadID tid) } else if (calcFreeLQEntries(tid) <= 0 && calcFreeSQEntries(tid) <= 0) { DPRINTF(Rename,"[tid:%i] Stall: LSQ has 0 free entries.\n", tid); ret_val = true; - } else if (renameMap[tid]->numFreeEntries() <= 0) { - DPRINTF(Rename,"[tid:%i] Stall: RenameMap has 0 free entries.\n", tid); - ret_val = true; } else if (renameStatus[tid] == SerializeStall && (!emptyROB[tid] || instsInProgress[tid])) { DPRINTF(Rename,"[tid:%i] Stall: Serialize stall and ROB is not " From 1fb7c1ad7e024881959a7ae8a8d87617eaf308c1 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 17 Jan 2024 13:03:11 +0000 Subject: [PATCH 3/4] cpu-o3: Rename numFreeEntries into minFreeEntries Change-Id: I89faeb001ebdcbc90ea88508f8d231ec6e7fe197 Signed-off-by: Giacomo Travaglini --- src/cpu/o3/rename.cc | 2 +- src/cpu/o3/rename_map.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index 9a5be35f38..980252e19c 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -1260,7 +1260,7 @@ Rename::readFreeEntries(ThreadID tid) freeEntries[tid].robEntries, freeEntries[tid].lqEntries, freeEntries[tid].sqEntries, - renameMap[tid]->numFreeEntries(), + renameMap[tid]->minFreeEntries(), renameMap[tid]->numFreeEntries(IntRegClass), renameMap[tid]->numFreeEntries(FloatRegClass), renameMap[tid]->numFreeEntries(VecRegClass), diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index 640c2acbba..b208f53a54 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -266,7 +266,7 @@ class UnifiedRenameMap * of registers is requested. */ unsigned - numFreeEntries() const + minFreeEntries() const { auto min_free = std::numeric_limits::max(); for (auto &map: renameMaps) { From 4eb0cd44fcc57329e83b0c738637913924356b06 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 17 Jan 2024 13:04:16 +0000 Subject: [PATCH 4/4] cpu-o3: Restrict constraint on number of physical registers Having the number of physical registers matching exactly the number of architectural ones does not guarantee a proper execution as it means the freeList would have 0 registers available for renaming. In this case the worst would happen: renaming would silently stall execution indefinitely. With this change we report the issue to the user and fail execution Change-Id: I1eb968802f1a1a5115012f44b541542a682f887d Signed-off-by: Giacomo Travaglini --- src/cpu/o3/cpu.cc | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index a93d5bc74d..e161d88daa 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019-2020 ARM Limited + * Copyright (c) 2011-2012, 2014, 2016, 2017, 2019-2020, 2024 Arm Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -193,18 +193,36 @@ CPU::CPU(const BaseO3CPUParams ¶ms) assert(numThreads); const auto ®Classes = params.isa[0]->regClasses(); - assert(params.numPhysIntRegs >= - numThreads * regClasses.at(IntRegClass)->numRegs()); - assert(params.numPhysFloatRegs >= - numThreads * regClasses.at(FloatRegClass)->numRegs()); - assert(params.numPhysVecRegs >= - numThreads * regClasses.at(VecRegClass)->numRegs()); - assert(params.numPhysVecPredRegs >= - numThreads * regClasses.at(VecPredRegClass)->numRegs()); - assert(params.numPhysMatRegs >= - numThreads * regClasses.at(MatRegClass)->numRegs()); - assert(params.numPhysCCRegs >= - numThreads * regClasses.at(CCRegClass)->numRegs()); + panic_if(params.numPhysIntRegs <= + numThreads * regClasses.at(IntRegClass)->numRegs() && + regClasses.at(IntRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysIntRegs\n"); + panic_if(params.numPhysFloatRegs <= + numThreads * regClasses.at(FloatRegClass)->numRegs() && + regClasses.at(FloatRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysFloatRegs\n"); + panic_if(params.numPhysVecRegs <= + numThreads * regClasses.at(VecRegClass)->numRegs() && + regClasses.at(VecRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysVecRegs\n"); + panic_if(params.numPhysVecPredRegs <= + numThreads * regClasses.at(VecPredRegClass)->numRegs() && + regClasses.at(VecPredRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysVecPredRegs\n"); + panic_if(params.numPhysMatRegs <= + numThreads * regClasses.at(MatRegClass)->numRegs() && + regClasses.at(MatRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysMatRegs\n"); + panic_if(params.numPhysCCRegs <= + numThreads * regClasses.at(CCRegClass)->numRegs() && + regClasses.at(CCRegClass)->numRegs() != 0, + "Not enough physical registers, consider increasing " + "numPhysCCRegs\n"); // Just make this a warning and go ahead anyway, to keep from having to // add checks everywhere.