From 32b18dcc608065a9dca77a36b48f858b98e7cce3 Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Mon, 13 Mar 2023 02:34:14 -0700 Subject: [PATCH] cpu: Move execute stats from simple and minor to base Created stat group ExecuteCPUStats in BaseCPU and moved stats from the simple and minor cpu models. The stats moved from SimpleCPU are dcacheStallCycles, icacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses, numFpRegReads, numFpRegWrits, numIntAluAccesses, numIntRegReads, numIntRegWrites, numMemRegs, numMiscRegReads, numMiscRegWrites, numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites, numVecRegReads, numVecRegWrites. The stat moved from MinorCPU is numDiscardedOps. These stats should both be outputting under executeStats in BaseCPU, as well as in the simple and minor cpu models at this point. Change-Id: I95fe43b14f5c2ad4939463d8086b6b858ba1a2a1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69098 Maintainer: Bobby Bruce Tested-by: kokoro Reviewed-by: Bobby Bruce --- src/cpu/base.cc | 76 ++++++++++++++++++++ src/cpu/base.hh | 48 +++++++++++++ src/cpu/minor/execute.cc | 5 +- src/cpu/o3/cpu.cc | 125 +++++++++++++++++++++++++++++++++ src/cpu/o3/cpu.hh | 12 ++++ src/cpu/o3/dyn_inst.hh | 15 ++++ src/cpu/simple/base.cc | 8 +++ src/cpu/simple/exec_context.hh | 37 ++++++---- 8 files changed, 311 insertions(+), 15 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 1d293397e5..641152ede2 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -193,8 +193,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) }); // create a stat group object for each thread on this core fetchStats.reserve(numThreads); + executeStats.reserve(numThreads); for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); + executeStats.emplace_back(new ExecuteCPUStats(this, i)); } } @@ -846,4 +848,78 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) } +// means it is incremented in a vector indexing and not directly +BaseCPU:: +ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) + : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit") +{ + dcacheStallCycles + .prereq(dcacheStallCycles); + numCCRegReads + .prereq(numCCRegReads) + .flags(statistics::nozero); + numCCRegWrites + .prereq(numCCRegWrites) + .flags(statistics::nozero); + numFpAluAccesses + .prereq(numFpAluAccesses); + numFpRegReads + .prereq(numFpRegReads); + numIntAluAccesses + .prereq(numIntAluAccesses); + numIntRegReads + .prereq(numIntRegReads); + numIntRegWrites + .prereq(numIntRegWrites); + numMiscRegReads + .prereq(numMiscRegReads); + numMiscRegWrites + .prereq(numMiscRegWrites); + numVecPredRegReads + .prereq(numVecPredRegReads); + numVecPredRegWrites + .prereq(numVecPredRegWrites); + numVecRegReads + .prereq(numVecRegReads); + numVecRegWrites + .prereq(numVecRegWrites); +} + } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index e8fb777a76..acf78bbd81 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -692,7 +692,55 @@ class BaseCPU : public ClockedObject }; + struct ExecuteCPUStats: public statistics::Group + { + ExecuteCPUStats(statistics::Group *parent, int thread_id); + + /* Number of cycles stalled for D-cache responses */ + statistics::Scalar dcacheStallCycles; + + /* Number of condition code register file accesses */ + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + /* number of float alu accesses */ + statistics::Scalar numFpAluAccesses; + + /* Number of float register file accesses */ + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + /* Number of integer alu accesses */ + statistics::Scalar numIntAluAccesses; + + /* Number of integer register file accesses */ + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + /* number of simulated memory references */ + statistics::Scalar numMemRefs; + + /* Number of misc register file accesses */ + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + /* Number of vector alu accesses */ + statistics::Scalar numVecAluAccesses; + + /* Number of predicate register file accesses */ + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + + /* Number of vector register file accesses */ + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + /* Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + }; + std::vector> fetchStats; + std::vector> executeStats; }; } // namespace gem5 diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index c37c6c6696..42c7b1af0c 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -1369,8 +1369,11 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, " state was unexpected, expected: %d\n", *inst, ex_info.streamSeqNum); - if (fault == NoFault) + if (fault == NoFault) { + // output both old and new stats cpu.stats.numDiscardedOps++; + cpu.executeStats[thread_id]->numDiscardedOps++; + } } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index d2bacaa523..6732c4310e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1019,7 +1019,10 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { + // output both old and new stats, keep + // return value the same cpuStats.miscRegfileReads++; + executeStats[tid]->numMiscRegReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -1032,7 +1035,9 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { + // output both old and new stats cpuStats.miscRegfileWrites++; + executeStats[tid]->numMiscRegWrites++; isa[tid]->setMiscReg(misc_reg, val); } @@ -1156,6 +1161,126 @@ CPU::setReg(PhysRegIdPtr phys_reg, const void *val) regFile.setReg(phys_reg, val); } +RegVal +CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegReads++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegReads++; + break; + case CCRegClass: + executeStats[tid]->numCCRegReads++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + return regFile.getReg(phys_reg); +} + +void +CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegReads++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegReads++; + break; + case CCRegClass: + executeStats[tid]->numCCRegReads++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + regFile.getReg(phys_reg, val); +} + +void * +CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case VecRegClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + return regFile.getWritableReg(phys_reg); +} + +void +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegWrites++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegWrites++; + break; + case CCRegClass: + executeStats[tid]->numCCRegWrites++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegWrites++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegWrites++; + break; + default: + break; + } + regFile.setReg(phys_reg, val); +} + +void +CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegWrites++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegWrites++; + break; + case CCRegClass: + executeStats[tid]->numCCRegWrites++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegWrites++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegWrites++; + break; + default: + break; + } + regFile.setReg(phys_reg, val); +} + RegVal CPU::getArchReg(const RegId ®, ThreadID tid) { diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 76a9060f0a..d6317d6ea2 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -317,6 +317,18 @@ class CPU : public BaseCPU void setReg(PhysRegIdPtr phys_reg, RegVal val); void setReg(PhysRegIdPtr phys_reg, const void *val); + /** These functions are duplicated so that one set + * doesn't use thread ID, while the other does. + * This allows us to still output both old and + * new versions of the stats. + */ + RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); + void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); + void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); + + void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index d6df09ce4a..4f762b4551 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1085,11 +1085,16 @@ class DynInst : public ExecContext, public RefCounted continue; if (bytes == sizeof(RegVal)) { + // call both old and new functions setRegOperand(staticInst.get(), idx, cpu->getReg(prev_phys_reg)); + setRegOperand(staticInst.get(), idx, + cpu->getReg(prev_phys_reg, threadNumber)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; + // call both old and new functions cpu->getReg(prev_phys_reg, val); + cpu->getReg(prev_phys_reg, val, threadNumber); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,6 +1121,8 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; + // call new function, only return old value + cpu->getReg(reg, threadNumber); return cpu->getReg(reg); } @@ -1125,12 +1132,16 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new function cpu->getReg(reg, val); + cpu->getReg(reg, val, threadNumber); } void * getWritableRegOperand(const StaticInst *si, int idx) override { + // call both old and new function + return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); return cpu->getWritableReg(renamedDestIdx(idx)); } @@ -1143,7 +1154,9 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new functions cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } @@ -1153,7 +1166,9 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new functions cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 1632f545a2..d97e1a9964 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -388,7 +388,9 @@ BaseSimpleCPU::postExecute() Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { + // update both old and new stats t_info.execContextStats.numMemRefs++; + executeStats[t_info.thread->threadId()]->numMemRefs++; } if (curStaticInst->isLoad()) { @@ -404,18 +406,24 @@ BaseSimpleCPU::postExecute() /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numIntAluAccesses++; t_info.execContextStats.numIntAluAccesses++; t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numFpAluAccesses++; t_info.execContextStats.numFpAluAccesses++; t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numVecAluAccesses++; t_info.execContextStats.numVecAluAccesses++; t_info.execContextStats.numVecInsts++; } diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 0f20763f28..31aa5d44c7 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -161,22 +161,23 @@ class SimpleExecContext : public ExecContext ADD_STAT(statExecutedInstType, statistics::units::Count::get(), "Class of executed instruction."), numRegReads{ - &numIntRegReads, - &numFpRegReads, - &numVecRegReads, - &numVecRegReads, - &numVecPredRegReads, - &numMatRegReads, - &numCCRegReads + &(cpu->executeStats[thread->threadId()]->numIntRegReads), + &(cpu->executeStats[thread->threadId()]->numFpRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), + &(cpu->executeStats[thread->threadId()]->numCCRegReads), + &numMatRegReads }, numRegWrites{ - &numIntRegWrites, - &numFpRegWrites, - &numVecRegWrites, - &numVecRegWrites, - &numVecPredRegWrites, - &numMatRegWrites, - &numCCRegWrites + &(cpu->executeStats[thread->threadId()]->numIntRegWrites), + &(cpu->executeStats[thread->threadId()]->numFpRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()] + ->numVecPredRegWrites), + &(cpu->executeStats[thread->threadId()]->numCCRegWrites), + &numMatRegWrites } { numCCRegReads @@ -368,7 +369,9 @@ class SimpleExecContext : public ExecContext RegVal readMiscRegOperand(const StaticInst *si, int idx) override { + // update both old and new stats execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -377,7 +380,9 @@ class SimpleExecContext : public ExecContext void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { + // update both old and new stats execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -390,7 +395,9 @@ class SimpleExecContext : public ExecContext RegVal readMiscReg(int misc_reg) override { + // update both old and new stats execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -401,7 +408,9 @@ class SimpleExecContext : public ExecContext void setMiscReg(int misc_reg, RegVal val) override { + // update both old and new stats execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; thread->setMiscReg(misc_reg, val); }