diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 1d293397e5..641152ede2 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -193,8 +193,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) }); // create a stat group object for each thread on this core fetchStats.reserve(numThreads); + executeStats.reserve(numThreads); for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); + executeStats.emplace_back(new ExecuteCPUStats(this, i)); } } @@ -846,4 +848,78 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) } +// means it is incremented in a vector indexing and not directly +BaseCPU:: +ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) + : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit") +{ + dcacheStallCycles + .prereq(dcacheStallCycles); + numCCRegReads + .prereq(numCCRegReads) + .flags(statistics::nozero); + numCCRegWrites + .prereq(numCCRegWrites) + .flags(statistics::nozero); + numFpAluAccesses + .prereq(numFpAluAccesses); + numFpRegReads + .prereq(numFpRegReads); + numIntAluAccesses + .prereq(numIntAluAccesses); + numIntRegReads + .prereq(numIntRegReads); + numIntRegWrites + .prereq(numIntRegWrites); + numMiscRegReads + .prereq(numMiscRegReads); + numMiscRegWrites + .prereq(numMiscRegWrites); + numVecPredRegReads + .prereq(numVecPredRegReads); + numVecPredRegWrites + .prereq(numVecPredRegWrites); + numVecRegReads + .prereq(numVecRegReads); + numVecRegWrites + .prereq(numVecRegWrites); +} + } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index e8fb777a76..acf78bbd81 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -692,7 +692,55 @@ class BaseCPU : public ClockedObject }; + struct ExecuteCPUStats: public statistics::Group + { + ExecuteCPUStats(statistics::Group *parent, int thread_id); + + /* Number of cycles stalled for D-cache responses */ + statistics::Scalar dcacheStallCycles; + + /* Number of condition code register file accesses */ + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + /* number of float alu accesses */ + statistics::Scalar numFpAluAccesses; + + /* Number of float register file accesses */ + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + /* Number of integer alu accesses */ + statistics::Scalar numIntAluAccesses; + + /* Number of integer register file accesses */ + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + /* number of simulated memory references */ + statistics::Scalar numMemRefs; + + /* Number of misc register file accesses */ + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + /* Number of vector alu accesses */ + statistics::Scalar numVecAluAccesses; + + /* Number of predicate register file accesses */ + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + + /* Number of vector register file accesses */ + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + /* Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + }; + std::vector> fetchStats; + std::vector> executeStats; }; } // namespace gem5 diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index c37c6c6696..42c7b1af0c 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -1369,8 +1369,11 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, " state was unexpected, expected: %d\n", *inst, ex_info.streamSeqNum); - if (fault == NoFault) + if (fault == NoFault) { + // output both old and new stats cpu.stats.numDiscardedOps++; + cpu.executeStats[thread_id]->numDiscardedOps++; + } } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index d2bacaa523..6732c4310e 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -1019,7 +1019,10 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { + // output both old and new stats, keep + // return value the same cpuStats.miscRegfileReads++; + executeStats[tid]->numMiscRegReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -1032,7 +1035,9 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { + // output both old and new stats cpuStats.miscRegfileWrites++; + executeStats[tid]->numMiscRegWrites++; isa[tid]->setMiscReg(misc_reg, val); } @@ -1156,6 +1161,126 @@ CPU::setReg(PhysRegIdPtr phys_reg, const void *val) regFile.setReg(phys_reg, val); } +RegVal +CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegReads++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegReads++; + break; + case CCRegClass: + executeStats[tid]->numCCRegReads++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + return regFile.getReg(phys_reg); +} + +void +CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegReads++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegReads++; + break; + case CCRegClass: + executeStats[tid]->numCCRegReads++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + regFile.getReg(phys_reg, val); +} + +void * +CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case VecRegClass: + executeStats[tid]->numVecRegReads++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegReads++; + break; + default: + break; + } + return regFile.getWritableReg(phys_reg); +} + +void +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegWrites++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegWrites++; + break; + case CCRegClass: + executeStats[tid]->numCCRegWrites++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegWrites++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegWrites++; + break; + default: + break; + } + regFile.setReg(phys_reg, val); +} + +void +CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) +{ + switch (phys_reg->classValue()) { + case IntRegClass: + executeStats[tid]->numIntRegWrites++; + break; + case FloatRegClass: + executeStats[tid]->numFpRegWrites++; + break; + case CCRegClass: + executeStats[tid]->numCCRegWrites++; + break; + case VecRegClass: + case VecElemClass: + executeStats[tid]->numVecRegWrites++; + break; + case VecPredRegClass: + executeStats[tid]->numVecPredRegWrites++; + break; + default: + break; + } + regFile.setReg(phys_reg, val); +} + RegVal CPU::getArchReg(const RegId ®, ThreadID tid) { diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 76a9060f0a..d6317d6ea2 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -317,6 +317,18 @@ class CPU : public BaseCPU void setReg(PhysRegIdPtr phys_reg, RegVal val); void setReg(PhysRegIdPtr phys_reg, const void *val); + /** These functions are duplicated so that one set + * doesn't use thread ID, while the other does. + * This allows us to still output both old and + * new versions of the stats. + */ + RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); + void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); + void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); + + void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); + /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the * architected register first, then accesses that physical diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index d6df09ce4a..4f762b4551 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1085,11 +1085,16 @@ class DynInst : public ExecContext, public RefCounted continue; if (bytes == sizeof(RegVal)) { + // call both old and new functions setRegOperand(staticInst.get(), idx, cpu->getReg(prev_phys_reg)); + setRegOperand(staticInst.get(), idx, + cpu->getReg(prev_phys_reg, threadNumber)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; + // call both old and new functions cpu->getReg(prev_phys_reg, val); + cpu->getReg(prev_phys_reg, val, threadNumber); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,6 +1121,8 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; + // call new function, only return old value + cpu->getReg(reg, threadNumber); return cpu->getReg(reg); } @@ -1125,12 +1132,16 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new function cpu->getReg(reg, val); + cpu->getReg(reg, val, threadNumber); } void * getWritableRegOperand(const StaticInst *si, int idx) override { + // call both old and new function + return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); return cpu->getWritableReg(renamedDestIdx(idx)); } @@ -1143,7 +1154,9 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new functions cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } @@ -1153,7 +1166,9 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; + // call both old and new functions cpu->setReg(reg, val); + cpu->setReg(reg, val, threadNumber); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 1632f545a2..d97e1a9964 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -388,7 +388,9 @@ BaseSimpleCPU::postExecute() Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { + // update both old and new stats t_info.execContextStats.numMemRefs++; + executeStats[t_info.thread->threadId()]->numMemRefs++; } if (curStaticInst->isLoad()) { @@ -404,18 +406,24 @@ BaseSimpleCPU::postExecute() /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numIntAluAccesses++; t_info.execContextStats.numIntAluAccesses++; t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numFpAluAccesses++; t_info.execContextStats.numFpAluAccesses++; t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ + // update both old and new stats + executeStats[t_info.thread->threadId()]->numVecAluAccesses++; t_info.execContextStats.numVecAluAccesses++; t_info.execContextStats.numVecInsts++; } diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index 0f20763f28..31aa5d44c7 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -161,22 +161,23 @@ class SimpleExecContext : public ExecContext ADD_STAT(statExecutedInstType, statistics::units::Count::get(), "Class of executed instruction."), numRegReads{ - &numIntRegReads, - &numFpRegReads, - &numVecRegReads, - &numVecRegReads, - &numVecPredRegReads, - &numMatRegReads, - &numCCRegReads + &(cpu->executeStats[thread->threadId()]->numIntRegReads), + &(cpu->executeStats[thread->threadId()]->numFpRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecRegReads), + &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), + &(cpu->executeStats[thread->threadId()]->numCCRegReads), + &numMatRegReads }, numRegWrites{ - &numIntRegWrites, - &numFpRegWrites, - &numVecRegWrites, - &numVecRegWrites, - &numVecPredRegWrites, - &numMatRegWrites, - &numCCRegWrites + &(cpu->executeStats[thread->threadId()]->numIntRegWrites), + &(cpu->executeStats[thread->threadId()]->numFpRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()]->numVecRegWrites), + &(cpu->executeStats[thread->threadId()] + ->numVecPredRegWrites), + &(cpu->executeStats[thread->threadId()]->numCCRegWrites), + &numMatRegWrites } { numCCRegReads @@ -368,7 +369,9 @@ class SimpleExecContext : public ExecContext RegVal readMiscRegOperand(const StaticInst *si, int idx) override { + // update both old and new stats execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -377,7 +380,9 @@ class SimpleExecContext : public ExecContext void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { + // update both old and new stats execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -390,7 +395,9 @@ class SimpleExecContext : public ExecContext RegVal readMiscReg(int misc_reg) override { + // update both old and new stats execContextStats.numMiscRegReads++; + cpu->executeStats[thread->threadId()]->numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -401,7 +408,9 @@ class SimpleExecContext : public ExecContext void setMiscReg(int misc_reg, RegVal val) override { + // update both old and new stats execContextStats.numMiscRegWrites++; + cpu->executeStats[thread->threadId()]->numMiscRegWrites++; thread->setMiscReg(misc_reg, val); }