From fed81f34084ad46fc663ec236ce0e700881cc3c2 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 9 Aug 2022 09:37:47 +0100 Subject: [PATCH] arch,cpu: Add boilerplate support for matrix registers We add initial support for matrix registers to the CPU models and add stubs in each architecture. There are no implementations of matrix registers added, but this provides the basic support for using them in the future. Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: I2ca6a21da932a58a801a0d08f0ad0cdca4968d02 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64333 Maintainer: Giacomo Travaglini Reviewed-by: Giacomo Travaglini Tested-by: kokoro --- src/arch/SConscript | 3 ++- src/arch/arm/isa.cc | 3 +++ src/arch/mips/isa.cc | 3 +++ src/arch/power/isa.cc | 3 +++ src/arch/riscv/isa.cc | 3 +++ src/arch/sparc/isa.cc | 3 +++ src/arch/x86/isa.cc | 3 +++ src/cpu/StaticInstFlags.py | 1 + src/cpu/minor/scoreboard.cc | 4 ++++ src/cpu/minor/scoreboard.hh | 5 ++++- src/cpu/o3/BaseO3CPU.py | 1 + src/cpu/o3/cpu.cc | 3 +++ src/cpu/o3/inst_queue.cc | 1 + src/cpu/o3/regfile.cc | 18 ++++++++++++++++++ src/cpu/o3/regfile.hh | 22 ++++++++++++++++++++++ src/cpu/o3/rename.cc | 9 ++++++++- src/cpu/o3/rename.hh | 1 + src/cpu/reg_class.hh | 2 ++ src/cpu/simple/base.cc | 6 ++++++ src/cpu/simple/exec_context.hh | 16 ++++++++++++++++ src/cpu/simple_thread.cc | 1 + src/cpu/simple_thread.hh | 1 + src/cpu/static_inst.hh | 1 + src/cpu/thread_context.cc | 14 ++++++++++++++ 24 files changed, 124 insertions(+), 3 deletions(-) diff --git a/src/arch/SConscript b/src/arch/SConscript index 90d7ad7700..7285c0ec59 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -231,10 +231,11 @@ DebugFlag('IntRegs') DebugFlag('FloatRegs') DebugFlag('VecRegs') DebugFlag('VecPredRegs') +DebugFlag('MatRegs') DebugFlag('CCRegs') DebugFlag('MiscRegs') CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs', - 'CCRegs', 'MiscRegs' ]) + 'MatRegs', 'CCRegs', 'MiscRegs' ]) DebugFlag('Decoder', "Decoder debug output") DebugFlag('Faults', "Information about faults, exceptions, interrupts, etc") diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 543e0eba7b..617f144bae 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -54,6 +54,7 @@ #include "cpu/reg_class.hh" #include "debug/Arm.hh" #include "debug/LLSC.hh" +#include "debug/MatRegs.hh" #include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "dev/arm/generic_timer.hh" @@ -75,6 +76,7 @@ namespace /* Not applicable to ARM */ RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace @@ -86,6 +88,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc index 6f39a81244..92799ab291 100644 --- a/src/arch/mips/isa.cc +++ b/src/arch/mips/isa.cc @@ -38,6 +38,7 @@ #include "cpu/base.hh" #include "cpu/reg_class.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "debug/MipsPRA.hh" #include "params/MipsISA.hh" @@ -104,6 +105,7 @@ constexpr RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); constexpr RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); constexpr RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -116,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), numThreads(p.num_threads), _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/power/isa.cc b/src/arch/power/isa.cc index 80c984cfc4..ecaebade9a 100644 --- a/src/arch/power/isa.cc +++ b/src/arch/power/isa.cc @@ -41,6 +41,7 @@ #include "arch/power/regs/int.hh" #include "arch/power/regs/misc.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "params/PowerISA.hh" namespace gem5 @@ -56,6 +57,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -67,6 +69,7 @@ ISA::ISA(const Params &p) : BaseISA(p) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); clear(); diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 3b4f378afa..6e4c380d98 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -48,6 +48,7 @@ #include "cpu/base.hh" #include "debug/Checkpoint.hh" #include "debug/LLSC.hh" +#include "debug/MatRegs.hh" #include "debug/RiscvMisc.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -235,6 +236,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -247,6 +249,7 @@ ISA::ISA(const Params &p) : _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc index 255dbb0b09..38b3d1c3e2 100644 --- a/src/arch/sparc/isa.cc +++ b/src/arch/sparc/isa.cc @@ -39,6 +39,7 @@ #include "base/trace.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "debug/Timer.hh" #include "params/SparcISA.hh" @@ -73,6 +74,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -84,6 +86,7 @@ ISA::ISA(const Params &p) : BaseISA(p) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc index 6578b1c716..31efae3a43 100644 --- a/src/arch/x86/isa.cc +++ b/src/arch/x86/isa.cc @@ -37,6 +37,7 @@ #include "base/compiler.hh" #include "cpu/base.hh" #include "cpu/thread_context.hh" +#include "debug/MatRegs.hh" #include "params/X86ISA.hh" #include "sim/serialize.hh" @@ -146,6 +147,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); } // anonymous namespace @@ -159,6 +161,7 @@ ISA::ISA(const X86ISAParams &p) : BaseISA(p), vendorString(p.vendor_string) _regClasses.push_back(&vecRegClass); _regClasses.push_back(&vecElemClass); _regClasses.push_back(&vecPredRegClass); + _regClasses.push_back(&matRegClass); _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py index b7e03a6fb9..d562dd5645 100644 --- a/src/cpu/StaticInstFlags.py +++ b/src/cpu/StaticInstFlags.py @@ -52,6 +52,7 @@ class StaticInstFlags(Enum): "IsFloating", # References FP regs. "IsVector", # References Vector regs. "IsVectorElem", # References Vector reg elems. + "IsMatrix", # References Matrix regs. "IsLoad", # Reads from memory (load or prefetch). "IsStore", # Writes to memory. "IsAtomic", # Does atomic RMW to memory. diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index 356fdc7e01..475d650d3a 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -70,6 +70,10 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) scoreboard_index = vecPredRegOffset + reg.index(); ret = true; break; + case MatRegClass: + scoreboard_index = matRegOffset + reg.index(); + ret = true; + break; case CCRegClass: scoreboard_index = ccRegOffset + reg.index(); ret = true; diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index bccb9c5b80..d3df324b99 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -72,6 +72,7 @@ class Scoreboard : public Named const unsigned ccRegOffset; const unsigned vecRegOffset; const unsigned vecPredRegOffset; + const unsigned matRegOffset; /** The number of registers in the Scoreboard. These * are just the integer, CC and float registers packed @@ -116,7 +117,9 @@ class Scoreboard : public Named vecRegOffset(ccRegOffset + reg_classes.at(CCRegClass)->numRegs()), vecPredRegOffset(vecRegOffset + reg_classes.at(VecElemClass)->numRegs()), - numRegs(vecPredRegOffset + reg_classes.at(VecPredRegClass)->numRegs()), + matRegOffset(vecPredRegOffset + + reg_classes.at(VecPredRegClass)->numRegs()), + numRegs(matRegOffset + reg_classes.at(MatRegClass)->numRegs()), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, invalidFUIndex), diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py index 07d9df6b7f..2e1a602e4c 100644 --- a/src/cpu/o3/BaseO3CPU.py +++ b/src/cpu/o3/BaseO3CPU.py @@ -168,6 +168,7 @@ class BaseO3CPU(BaseCPU): numPhysVecPredRegs = Param.Unsigned( 32, "Number of physical predicate registers" ) + numPhysMatRegs = Param.Unsigned(2, "Number of physical matrix registers") # most ISAs don't use condition-code regs, so default is 0 numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 48ccd94b54..d2bacaa523 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -90,6 +90,7 @@ CPU::CPU(const BaseO3CPUParams ¶ms) params.numPhysFloatRegs, params.numPhysVecRegs, params.numPhysVecPredRegs, + params.numPhysMatRegs, params.numPhysCCRegs, params.isa[0]->regClasses()), @@ -200,6 +201,8 @@ CPU::CPU(const BaseO3CPUParams ¶ms) numThreads * regClasses.at(VecRegClass)->numRegs()); assert(params.numPhysVecPredRegs >= numThreads * regClasses.at(VecPredRegClass)->numRegs()); + assert(params.numPhysMatRegs >= + numThreads * regClasses.at(MatRegClass)->numRegs()); assert(params.numPhysCCRegs >= numThreads * regClasses.at(CCRegClass)->numRegs()); diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 72cb7356ef..ee286fc585 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -108,6 +108,7 @@ InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, reg_classes.at(VecElemClass)->numRegs() / reg_classes.at(VecRegClass)->numRegs()) + params.numPhysVecPredRegs + + params.numPhysMatRegs + params.numPhysCCRegs; //Create an entry for each physical register within the diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index dcb8f704f0..1bc7032ebd 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -53,6 +53,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, unsigned _numPhysicalVecPredRegs, + unsigned _numPhysicalMatRegs, unsigned _numPhysicalCCRegs, const BaseISA::RegClasses ®_classes) : intRegFile(*reg_classes.at(IntRegClass), _numPhysicalIntRegs), @@ -63,6 +64,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, reg_classes.at(VecRegClass)->numRegs())), vecPredRegFile(*reg_classes.at(VecPredRegClass), _numPhysicalVecPredRegs), + matRegFile(*reg_classes.at(MatRegClass), _numPhysicalMatRegs), ccRegFile(*reg_classes.at(CCRegClass), _numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), @@ -71,12 +73,14 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, reg_classes.at(VecElemClass)->numRegs() / reg_classes.at(VecRegClass)->numRegs())), numPhysicalVecPredRegs(_numPhysicalVecPredRegs), + numPhysicalMatRegs(_numPhysicalMatRegs), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs + _numPhysicalVecRegs + numPhysicalVecElemRegs + _numPhysicalVecPredRegs + + _numPhysicalMatRegs + _numPhysicalCCRegs) { RegIndex phys_reg; @@ -115,6 +119,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, flat_reg_idx++); } + // The next batch of the registers are the matrix physical + // registers; put them onto the matrix free list. + for (phys_reg = 0; phys_reg < numPhysicalMatRegs; phys_reg++) { + matRegIds.emplace_back(*reg_classes.at(MatRegClass), phys_reg, + flat_reg_idx++); + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -167,6 +178,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) } freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end()); + /* The next batch of the registers are the matrix physical + * registers; put them onto the matrix free list. */ + for (reg_idx = 0; reg_idx < numPhysicalMatRegs; reg_idx++) { + assert(matRegIds[reg_idx].index() == reg_idx); + } + freeList->addRegs(matRegIds.begin(), matRegIds.end()); + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 4fea589ad7..13c9899f13 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -91,6 +91,10 @@ class PhysRegFile RegFile vecPredRegFile; std::vector vecPredRegIds; + /** Matrix register file. */ + RegFile matRegFile; + std::vector matRegIds; + /** Condition-code register file. */ RegFile ccRegFile; std::vector ccRegIds; @@ -123,6 +127,11 @@ class PhysRegFile */ unsigned numPhysicalVecPredRegs; + /** + * Number of physical matrix registers + */ + unsigned numPhysicalMatRegs; + /** * Number of physical CC registers */ @@ -140,6 +149,7 @@ class PhysRegFile unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, unsigned _numPhysicalVecPredRegs, + unsigned _numPhysicalMatRegs, unsigned _numPhysicalCCRegs, const BaseISA::RegClasses &classes); @@ -218,6 +228,11 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Access to predicate register %i, has " "data %s\n", idx, vecPredRegFile.regClass.valString(val)); break; + case MatRegClass: + matRegFile.get(idx, val); + DPRINTF(IEW, "RegFile: Access to matrix register %i, has " + "data %s\n", idx, matRegFile.regClass.valString(val)); + break; case CCRegClass: *(RegVal *)val = getReg(phys_reg); break; @@ -237,6 +252,8 @@ class PhysRegFile return vectorRegFile.ptr(idx); case VecPredRegClass: return vecPredRegFile.ptr(idx); + case MatRegClass: + return matRegFile.ptr(idx); default: panic("Unrecognized register class type %d.", type); } @@ -302,6 +319,11 @@ class PhysRegFile idx, vecPredRegFile.regClass.valString(val)); vecPredRegFile.set(idx, val); break; + case MatRegClass: + DPRINTF(IEW, "RegFile: Setting matrix register %i to %s\n", + idx, matRegFile.regClass.valString(val)); + matRegFile.set(idx, val); + break; case CCRegClass: setReg(phys_reg, *(RegVal *)val); break; diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc index f3783d402a..f8c305eb1c 100644 --- a/src/cpu/o3/rename.cc +++ b/src/cpu/o3/rename.cc @@ -134,6 +134,8 @@ Rename::RenameStats::RenameStats(statistics::Group *parent) "Number of vector rename lookups"), ADD_STAT(vecPredLookups, statistics::units::Count::get(), "Number of vector predicate rename lookups"), + ADD_STAT(matLookups, statistics::units::Count::get(), + "Number of matrix rename lookups"), ADD_STAT(committedMaps, statistics::units::Count::get(), "Number of HB maps that are committed"), ADD_STAT(undoneMaps, statistics::units::Count::get(), @@ -167,6 +169,7 @@ Rename::RenameStats::RenameStats(statistics::Group *parent) fpLookups.prereq(fpLookups); vecLookups.prereq(vecLookups); vecPredLookups.prereq(vecPredLookups); + matLookups.prereq(matLookups); committedMaps.prereq(committedMaps); undoneMaps.prereq(undoneMaps); @@ -1034,6 +1037,9 @@ Rename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) case VecPredRegClass: stats.vecPredLookups++; break; + case MatRegClass: + stats.matLookups++; + break; case CCRegClass: case MiscRegClass: break; @@ -1248,7 +1254,7 @@ Rename::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i] Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i)\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, @@ -1260,6 +1266,7 @@ Rename::readFreeEntries(ThreadID tid) renameMap[tid]->numFreeEntries(VecRegClass), renameMap[tid]->numFreeEntries(VecElemClass), renameMap[tid]->numFreeEntries(VecPredRegClass), + renameMap[tid]->numFreeEntries(MatRegClass), renameMap[tid]->numFreeEntries(CCRegClass)); DPRINTF(Rename, "[tid:%i] %i instructions not yet in ROB\n", diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index 0b42b6eaa0..61ef476501 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -521,6 +521,7 @@ class Rename statistics::Scalar fpLookups; statistics::Scalar vecLookups; statistics::Scalar vecPredLookups; + statistics::Scalar matLookups; /** Stat for total number of committed renaming mappings. */ statistics::Scalar committedMaps; /** Stat for total number of mappings that were undone due to a diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 080c758413..37618e530a 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -64,6 +64,7 @@ enum RegClassType /** Vector Register Native Elem lane. */ VecElemClass, VecPredRegClass, + MatRegClass, ///< Matrix Register CCRegClass, ///< Condition-code register MiscRegClass, ///< Control (misc) register InvalidRegClass = -1 @@ -75,6 +76,7 @@ inline constexpr char FloatRegClassName[] = "floating_point"; inline constexpr char VecRegClassName[] = "vector"; inline constexpr char VecElemClassName[] = "vector_element"; inline constexpr char VecPredRegClassName[] = "vector_predicate"; +inline constexpr char MatRegClassName[] = "matrix"; inline constexpr char CCRegClassName[] = "condition_code"; inline constexpr char MiscRegClassName[] = "miscellaneous"; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index ab67f39496..768f63ede5 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -418,6 +418,12 @@ BaseSimpleCPU::postExecute() t_info.execContextStats.numVecInsts++; } + //Matrix alu accesses + if (curStaticInst->isMatrix()){ + t_info.execContextStats.numMatAluAccesses++; + t_info.execContextStats.numMatInsts++; + } + //number of function calls/returns to get window accesses if (curStaticInst->isCall() || curStaticInst->isReturn()){ t_info.execContextStats.numCallsReturns++; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index e51ec88dce..0f20763f28 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -96,6 +96,8 @@ class SimpleExecContext : public ExecContext "Number of float alu accesses"), ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), "Number of vector alu accesses"), + ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), + "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), @@ -106,6 +108,8 @@ class SimpleExecContext : public ExecContext "Number of float instructions"), ADD_STAT(numVecInsts, statistics::units::Count::get(), "Number of vector instructions"), + ADD_STAT(numMatInsts, statistics::units::Count::get(), + "Number of matrix instructions"), ADD_STAT(numIntRegReads, statistics::units::Count::get(), "Number of times the integer registers were read"), ADD_STAT(numIntRegWrites, statistics::units::Count::get(), @@ -162,6 +166,7 @@ class SimpleExecContext : public ExecContext &numVecRegReads, &numVecRegReads, &numVecPredRegReads, + &numMatRegReads, &numCCRegReads }, numRegWrites{ @@ -170,6 +175,7 @@ class SimpleExecContext : public ExecContext &numVecRegWrites, &numVecRegWrites, &numVecPredRegWrites, + &numMatRegWrites, &numCCRegWrites } { @@ -220,6 +226,9 @@ class SimpleExecContext : public ExecContext // Number of vector alu accesses statistics::Scalar numVecAluAccesses; + // Number of matrix alu accesses + statistics::Scalar numMatAluAccesses; + // Number of function calls/returns statistics::Scalar numCallsReturns; @@ -235,6 +244,9 @@ class SimpleExecContext : public ExecContext // Number of vector instructions statistics::Scalar numVecInsts; + // Number of matrix instructions + statistics::Scalar numMatInsts; + // Number of integer register file accesses statistics::Scalar numIntRegReads; statistics::Scalar numIntRegWrites; @@ -251,6 +263,10 @@ class SimpleExecContext : public ExecContext mutable statistics::Scalar numVecPredRegReads; statistics::Scalar numVecPredRegWrites; + // Number of matrix register file accesses + mutable statistics::Scalar numMatRegReads; + statistics::Scalar numMatRegWrites; + // Number of condition code register file accesses statistics::Scalar numCCRegReads; statistics::Scalar numCCRegWrites; diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc index 4c4e7dcdb6..c28359a4ed 100644 --- a/src/cpu/simple_thread.cc +++ b/src/cpu/simple_thread.cc @@ -75,6 +75,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys, {*_isa->regClasses().at(VecRegClass)}, {*_isa->regClasses().at(VecElemClass)}, {*_isa->regClasses().at(VecPredRegClass)}, + {*_isa->regClasses().at(MatRegClass)}, {*_isa->regClasses().at(CCRegClass)} }}, isa(_isa), diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 5a60d2ac16..b9129734f1 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -57,6 +57,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/MatRegs.hh" #include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "mem/htm.hh" diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 3ab78345bc..7ecc57d2f0 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -155,6 +155,7 @@ class StaticInst : public RefCounted, public StaticInstFlags bool isInteger() const { return flags[IsInteger]; } bool isFloating() const { return flags[IsFloating]; } bool isVector() const { return flags[IsVector]; } + bool isMatrix() const { return flags[IsMatrix]; } bool isControl() const { return flags[IsControl]; } bool isCall() const { return flags[IsCall]; } diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index 69094f87af..146f9e74d0 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -109,6 +109,20 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) } } + // Then loop through the matrix registers. + const auto *mat_class = regClasses.at(MatRegClass); + std::vector mat1(mat_class->regBytes()); + std::vector mat2(mat_class->regBytes()); + for (auto &id: *regClasses.at(MatRegClass)) { + one->getReg(id, mat1.data()); + two->getReg(id, mat2.data()); + if (mat1 != mat2) { + panic("Mat reg idx %d doesn't match, one: %#x, two: %#x", + id.index(), mat_class->valString(mat1.data()), + mat_class->valString(mat2.data())); + } + } + for (int i = 0; i < regClasses.at(MiscRegClass)->numRegs(); ++i) { RegVal t1 = one->readMiscRegNoEffect(i); RegVal t2 = two->readMiscRegNoEffect(i);