diff --git a/src/cpu/base.cc b/src/cpu/base.cc index cee76472f5..d2c0a78d44 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -191,30 +191,6 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) modelResetPort.onChange([this](const bool &new_val) { setReset(new_val); }); - // create a stat group object for each thread on this core - fetchStats.reserve(numThreads); - executeStats.reserve(numThreads); - commitStats.reserve(numThreads); - for (int i = 0; i < numThreads; i++) { - // create fetchStat object for thread i and set rate formulas - FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i); - fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles; - fetchStatptr->branchRate = fetchStatptr->numBranches / - baseStats.numCycles; - fetchStats.emplace_back(fetchStatptr); - - // create executeStat object for thread i and set rate formulas - ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i); - executeStatptr->instRate = executeStatptr->numInsts / - baseStats.numCycles; - executeStats.emplace_back(executeStatptr); - - // create commitStat object for thread i and set ipc, cpi formulas - CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); - commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; - commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts; - commitStats.emplace_back(commitStatptr); - } } void @@ -407,28 +383,13 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc) BaseCPU:: BaseCPUStats::BaseCPUStats(statistics::Group *parent) : statistics::Group(parent), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (core level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (core level)"), ADD_STAT(numCycles, statistics::units::Cycle::get(), "Number of cpu cycles simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (core level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (core level)"), ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(), "Number of work items this cpu started"), ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(), "Number of work items this cpu completed") { - cpi.precision(6); - cpi = numCycles / numInsts; - - ipc.precision(6); - ipc = numInsts / numCycles; } void @@ -866,215 +827,4 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) hostOpRate = simOps / hostSeconds; } -BaseCPU:: -FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions fetched (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) fetched (thread level)"), - ADD_STAT(fetchRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Number of inst fetches per cycle"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches fetched"), - ADD_STAT(branchRate, statistics::units::Ratio::get(), - "Number of branch fetches per cycle"), - ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), - "ICache total stall cycles"), - ADD_STAT(numFetchSuspends, statistics::units::Count::get(), - "Number of times Execute suspended instruction fetching") - -{ - fetchRate - .flags(statistics::total); - - numBranches - .prereq(numBranches); - - branchRate - .flags(statistics::total); - - icacheStallCycles - .prereq(icacheStallCycles); - -} - -// means it is incremented in a vector indexing and not directly -BaseCPU:: -ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of executed instructions"), - ADD_STAT(numNop, statistics::units::Count::get(), - "Number of nop insts executed"), - ADD_STAT(numBranches, statistics::units::Count::get(), - "Number of branches executed"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions executed"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of stores executed"), - ADD_STAT(instRate, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "Inst execution rate"), - ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), - "DCache total stall cycles"), - ADD_STAT(numCCRegReads, statistics::units::Count::get(), - "Number of times the CC registers were read"), - ADD_STAT(numCCRegWrites, statistics::units::Count::get(), - "Number of times the CC registers were written"), - ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), - "Number of float alu accesses"), - ADD_STAT(numFpRegReads, statistics::units::Count::get(), - "Number of times the floating registers were read"), - ADD_STAT(numFpRegWrites, statistics::units::Count::get(), - "Number of times the floating registers were written"), - ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), - "Number of integer alu accesses"), - ADD_STAT(numIntRegReads, statistics::units::Count::get(), - "Number of times the integer registers were read"), - ADD_STAT(numIntRegWrites, statistics::units::Count::get(), - "Number of times the integer registers were written"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory refs"), - ADD_STAT(numMiscRegReads, statistics::units::Count::get(), - "Number of times the Misc registers were read"), - ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), - "Number of times the Misc registers were written"), - ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), - "Number of vector alu accesses"), - ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), - "Number of times the predicate registers were read"), - ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), - "Number of times the predicate registers were written"), - ADD_STAT(numVecRegReads, statistics::units::Count::get(), - "Number of times the vector registers were read"), - ADD_STAT(numVecRegWrites, statistics::units::Count::get(), - "Number of times the vector registers were written"), - ADD_STAT(numDiscardedOps, statistics::units::Count::get(), - "Number of ops (including micro ops) which were discarded before " - "commit") -{ - numStoreInsts = numMemRefs - numLoadInsts; - - dcacheStallCycles - .prereq(dcacheStallCycles); - numCCRegReads - .prereq(numCCRegReads) - .flags(statistics::nozero); - numCCRegWrites - .prereq(numCCRegWrites) - .flags(statistics::nozero); - numFpAluAccesses - .prereq(numFpAluAccesses); - numFpRegReads - .prereq(numFpRegReads); - numIntAluAccesses - .prereq(numIntAluAccesses); - numIntRegReads - .prereq(numIntRegReads); - numIntRegWrites - .prereq(numIntRegWrites); - numMiscRegReads - .prereq(numMiscRegReads); - numMiscRegWrites - .prereq(numMiscRegWrites); - numVecPredRegReads - .prereq(numVecPredRegReads); - numVecPredRegWrites - .prereq(numVecPredRegWrites); - numVecRegReads - .prereq(numVecRegReads); - numVecRegWrites - .prereq(numVecRegWrites); -} - -BaseCPU:: -CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) - : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), - ADD_STAT(numInsts, statistics::units::Count::get(), - "Number of instructions committed (thread level)"), - ADD_STAT(numOps, statistics::units::Count::get(), - "Number of ops (including micro ops) committed (thread level)"), - ADD_STAT(numInstsNotNOP, statistics::units::Count::get(), - "Number of instructions committed excluding NOPs or prefetches"), - ADD_STAT(numOpsNotNOP, statistics::units::Count::get(), - "Number of Ops (including micro ops) Simulated"), - ADD_STAT(cpi, statistics::units::Rate< - statistics::units::Cycle, statistics::units::Count>::get(), - "CPI: cycles per instruction (thread level)"), - ADD_STAT(ipc, statistics::units::Rate< - statistics::units::Count, statistics::units::Cycle>::get(), - "IPC: instructions per cycle (thread level)"), - ADD_STAT(numMemRefs, statistics::units::Count::get(), - "Number of memory references committed"), - ADD_STAT(numFpInsts, statistics::units::Count::get(), - "Number of float instructions"), - ADD_STAT(numIntInsts, statistics::units::Count::get(), - "Number of integer instructions"), - ADD_STAT(numLoadInsts, statistics::units::Count::get(), - "Number of load instructions"), - ADD_STAT(numStoreInsts, statistics::units::Count::get(), - "Number of store instructions"), - ADD_STAT(numVecInsts, statistics::units::Count::get(), - "Number of vector instructions"), - ADD_STAT(committedInstType, statistics::units::Count::get(), - "Class of committed instruction."), - ADD_STAT(committedControl, statistics::units::Count::get(), - "Class of control type instructions committed") -{ - numInsts - .prereq(numInsts); - - cpi.precision(6); - ipc.precision(6); - - committedInstType - .init(enums::Num_OpClass) - .flags(statistics::total | statistics::pdf | statistics::dist); - - for (unsigned i = 0; i < Num_OpClasses; ++i) { - committedInstType.subname(i, enums::OpClassStrings[i]); - } - - committedControl - .init(StaticInstFlags::Flags::Num_Flags) - .flags(statistics::nozero); - - for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) { - committedControl.subname(i, StaticInstFlags::FlagsStrings[i]); - } -} - - -void -BaseCPU:: -CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst) -{ - /* Add a count for every control instruction type */ - if (staticInst->isControl()) { - if (staticInst->isReturn()) { - committedControl[gem5::StaticInstFlags::Flags::IsReturn]++; - } - if (staticInst->isCall()) { - committedControl[gem5::StaticInstFlags::Flags::IsCall]++; - } - if (staticInst->isDirectCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++; - } - if (staticInst->isIndirectCtrl()) { - committedControl - [gem5::StaticInstFlags::Flags::IsIndirectControl]++; - } - if (staticInst->isCondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++; - } - if (staticInst->isUncondCtrl()) { - committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++; - } - committedControl[gem5::StaticInstFlags::Flags::IsControl]++; - } - -} - } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index fc22abc5aa..084d9b9305 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -43,7 +43,6 @@ #define __CPU_BASE_HH__ #include -#include #include "arch/generic/interrupts.hh" #include "base/statistics.hh" @@ -633,14 +632,8 @@ class BaseCPU : public ClockedObject struct BaseCPUStats : public statistics::Group { BaseCPUStats(statistics::Group *parent); - // Number of CPU insts and ops committed at CPU core level - statistics::Scalar numInsts; - statistics::Scalar numOps; // Number of CPU cycles simulated statistics::Scalar numCycles; - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; statistics::Scalar numWorkItemsStarted; statistics::Scalar numWorkItemsCompleted; } baseStats; @@ -683,141 +676,6 @@ class BaseCPU : public ClockedObject const Cycles pwrGatingLatency; const bool powerGatingOnIdle; EventFunctionWrapper enterPwrGatingEvent; - - public: - struct FetchCPUStats : public statistics::Group - { - FetchCPUStats(statistics::Group *parent, int thread_id); - - /* Total number of instructions fetched */ - statistics::Scalar numInsts; - - /* Total number of operations fetched */ - statistics::Scalar numOps; - - /* Number of instruction fetched per cycle. */ - statistics::Formula fetchRate; - - /* Total number of branches fetched */ - statistics::Scalar numBranches; - - /* Number of branch fetches per cycle. */ - statistics::Formula branchRate; - - /* Number of cycles stalled due to an icache miss */ - statistics::Scalar icacheStallCycles; - - /* Number of times fetch was asked to suspend by Execute */ - statistics::Scalar numFetchSuspends; - - }; - - struct ExecuteCPUStats: public statistics::Group - { - ExecuteCPUStats(statistics::Group *parent, int thread_id); - - /* Stat for total number of executed instructions */ - statistics::Scalar numInsts; - /* Number of executed nops */ - statistics::Scalar numNop; - /* Number of executed branches */ - statistics::Scalar numBranches; - /* Stat for total number of executed load instructions */ - statistics::Scalar numLoadInsts; - /* Number of executed store instructions */ - statistics::Formula numStoreInsts; - /* Number of instructions executed per cycle */ - statistics::Formula instRate; - - /* Number of cycles stalled for D-cache responses */ - statistics::Scalar dcacheStallCycles; - - /* Number of condition code register file accesses */ - statistics::Scalar numCCRegReads; - statistics::Scalar numCCRegWrites; - - /* number of float alu accesses */ - statistics::Scalar numFpAluAccesses; - - /* Number of float register file accesses */ - statistics::Scalar numFpRegReads; - statistics::Scalar numFpRegWrites; - - /* Number of integer alu accesses */ - statistics::Scalar numIntAluAccesses; - - /* Number of integer register file accesses */ - statistics::Scalar numIntRegReads; - statistics::Scalar numIntRegWrites; - - /* number of simulated memory references */ - statistics::Scalar numMemRefs; - - /* Number of misc register file accesses */ - statistics::Scalar numMiscRegReads; - statistics::Scalar numMiscRegWrites; - - /* Number of vector alu accesses */ - statistics::Scalar numVecAluAccesses; - - /* Number of predicate register file accesses */ - mutable statistics::Scalar numVecPredRegReads; - statistics::Scalar numVecPredRegWrites; - - /* Number of vector register file accesses */ - mutable statistics::Scalar numVecRegReads; - statistics::Scalar numVecRegWrites; - - /* Number of ops discarded before committing */ - statistics::Scalar numDiscardedOps; - }; - - struct CommitCPUStats: public statistics::Group - { - CommitCPUStats(statistics::Group *parent, int thread_id); - - /* Number of simulated instructions committed */ - statistics::Scalar numInsts; - statistics::Scalar numOps; - - /* Number of instructions committed that are not NOP or prefetches */ - statistics::Scalar numInstsNotNOP; - statistics::Scalar numOpsNotNOP; - - /* CPI/IPC for total cycle counts and macro insts */ - statistics::Formula cpi; - statistics::Formula ipc; - - /* Number of committed memory references. */ - statistics::Scalar numMemRefs; - - /* Number of float instructions */ - statistics::Scalar numFpInsts; - - /* Number of int instructions */ - statistics::Scalar numIntInsts; - - /* number of load instructions */ - statistics::Scalar numLoadInsts; - - /* Number of store instructions */ - statistics::Scalar numStoreInsts; - - /* Number of vector instructions */ - statistics::Scalar numVecInsts; - - /* Number of instructions committed by type (OpClass) */ - statistics::Vector committedInstType; - - /* number of control instructions committed by control inst type */ - statistics::Vector committedControl; - void updateComCtrlStats(const StaticInstPtr staticInst); - - }; - - std::vector> fetchStats; - std::vector> executeStats; - std::vector> commitStats; }; } // namespace gem5 diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc index e22e1628d2..b76bddc2fd 100644 --- a/src/cpu/kvm/base.cc +++ b/src/cpu/kvm/base.cc @@ -261,6 +261,8 @@ BaseKvmCPU::restartEqThread() BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent) : statistics::Group(parent), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of instructions committed"), ADD_STAT(numVMExits, statistics::units::Count::get(), "total number of KVM exits"), ADD_STAT(numVMHalfEntries, statistics::units::Count::get(), @@ -776,8 +778,7 @@ BaseKvmCPU::kvmRun(Tick ticks) /* Update statistics */ baseStats.numCycles += simCyclesExecuted;; - commitStats[thread->threadId()]->numInsts += instsExecuted; - baseStats.numInsts += instsExecuted; + stats.committedInsts += instsExecuted; ctrInsts += instsExecuted; DPRINTF(KvmRun, diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh index 7bbf393f9b..2d81c7c7eb 100644 --- a/src/cpu/kvm/base.hh +++ b/src/cpu/kvm/base.hh @@ -804,6 +804,7 @@ class BaseKvmCPU : public BaseCPU struct StatGroup : public statistics::Group { StatGroup(statistics::Group *parent); + statistics::Scalar committedInsts; statistics::Scalar numVMExits; statistics::Scalar numVMHalfEntries; statistics::Scalar numExitSignal; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 2908c2266f..5eaaf5804e 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -871,18 +871,49 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) { thread->numInst++; thread->threadStats.numInsts++; - cpu.commitStats[inst->id.threadId]->numInsts++; - cpu.baseStats.numInsts++; + cpu.stats.numInsts++; /* Act on events related to instruction counts */ thread->comInstEventQueue.serviceEvents(thread->numInst); } thread->numOp++; thread->threadStats.numOps++; - cpu.commitStats[inst->id.threadId]->numOps++; - cpu.baseStats.numOps++; - cpu.commitStats[inst->id.threadId] - ->committedInstType[inst->staticInst->opClass()]++; + cpu.stats.numOps++; + cpu.stats.committedInstType[inst->id.threadId] + [inst->staticInst->opClass()]++; + + /** Add a count for every control instruction */ + if (inst->staticInst->isControl()) { + if (inst->staticInst->isReturn()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsReturn]++; + } + if (inst->staticInst->isCall()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCall]++; + } + if (inst->staticInst->isDirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsDirectControl]++; + } + if (inst->staticInst->isIndirectCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsIndirectControl]++; + } + if (inst->staticInst->isCondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsCondControl]++; + } + if (inst->staticInst->isUncondCtrl()) { + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsUncondControl]++; + + } + cpu.stats.committedControl[inst->id.threadId] + [gem5::StaticInstFlags::Flags::IsControl]++; + } + + /* Set the CP SeqNum to the numOps commit number */ if (inst->traceData) @@ -1023,7 +1054,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue, DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute" " inst: %s\n", thread_id, *inst); - cpu.fetchStats[thread_id]->numFetchSuspends++; + cpu.stats.numFetchSuspends++; updateBranchData(thread_id, BranchData::SuspendThread, inst, resume_pc, branch); @@ -1337,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard, *inst, ex_info.streamSeqNum); if (fault == NoFault) - cpu.executeStats[thread_id]->numDiscardedOps++; + cpu.stats.numDiscardedOps++; } /* Mark the mem inst as being in the LSQ */ diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc index e31cbe93a1..64d4c475e0 100644 --- a/src/cpu/minor/stats.cc +++ b/src/cpu/minor/stats.cc @@ -45,13 +45,47 @@ namespace minor MinorStats::MinorStats(BaseCPU *base_cpu) : statistics::Group(base_cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numDiscardedOps, statistics::units::Count::get(), + "Number of ops (including micro ops) which were discarded before " + "commit"), + ADD_STAT(numFetchSuspends, statistics::units::Count::get(), + "Number of times Execute suspended instruction fetching"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle"), + ADD_STAT(committedInstType, statistics::units::Count::get(), + "Class of committed instruction"), + ADD_STAT(committedControl, statistics::units::Count::get(), + "Class of control type instructions committed") { quiesceCycles.prereq(quiesceCycles); + cpi.precision(6); + cpi = base_cpu->baseStats.numCycles / numInsts; + + ipc.precision(6); + ipc = numInsts / base_cpu->baseStats.numCycles; + + committedInstType + .init(base_cpu->numThreads, enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + committedInstType.ysubnames(enums::OpClassStrings); + + committedControl + .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags) + .flags(statistics::nozero); + committedControl.ysubnames(StaticInstFlags::FlagsStrings); } } // namespace minor diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh index 98ac80f15c..1ab81f4407 100644 --- a/src/cpu/minor/stats.hh +++ b/src/cpu/minor/stats.hh @@ -59,9 +59,31 @@ struct MinorStats : public statistics::Group { MinorStats(BaseCPU *parent); + /** Number of simulated instructions */ + statistics::Scalar numInsts; + + /** Number of simulated insts and microops */ + statistics::Scalar numOps; + + /** Number of ops discarded before committing */ + statistics::Scalar numDiscardedOps; + + /** Number of times fetch was asked to suspend by Execute */ + statistics::Scalar numFetchSuspends; + /** Number of cycles in quiescent state */ statistics::Scalar quiesceCycles; + /** CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; + + /** Number of instructions by type (OpClass) */ + statistics::Vector2d committedInstType; + + /** Number of branches commited */ + statistics::Vector2d committedControl; + }; } // namespace minor diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index e1f01680ca..38dce831b1 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -156,10 +156,25 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) "The number of times a branch was mispredicted"), ADD_STAT(numCommittedDist, statistics::units::Count::get(), "Number of insts commited each cycle"), + ADD_STAT(instsCommitted, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(opsCommitted, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(memRefs, statistics::units::Count::get(), + "Number of memory references committed"), + ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"), ADD_STAT(amos, statistics::units::Count::get(), "Number of atomic instructions committed"), ADD_STAT(membars, statistics::units::Count::get(), "Number of memory barriers committed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches committed"), + ADD_STAT(vectorInstructions, statistics::units::Count::get(), + "Number of committed Vector instructions."), + ADD_STAT(floating, statistics::units::Count::get(), + "Number of committed floating point instructions."), + ADD_STAT(integer, statistics::units::Count::get(), + "Number of committed integer instructions."), ADD_STAT(functionCalls, statistics::units::Count::get(), "Number of function calls committed."), ADD_STAT(committedInstType, statistics::units::Count::get(), @@ -177,6 +192,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(0,commit->commitWidth,1) .flags(statistics::pdf); + instsCommitted + .init(cpu->numThreads) + .flags(total); + + opsCommitted + .init(cpu->numThreads) + .flags(total); + + memRefs + .init(cpu->numThreads) + .flags(total); + + loads + .init(cpu->numThreads) + .flags(total); + amos .init(cpu->numThreads) .flags(total); @@ -185,6 +216,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit) .init(cpu->numThreads) .flags(total); + branches + .init(cpu->numThreads) + .flags(total); + + vectorInstructions + .init(cpu->numThreads) + .flags(total); + + floating + .init(cpu->numThreads) + .flags(total); + + integer + .init(cpu->numThreads) + .flags(total); + functionCalls .init(commit->numThreads) .flags(total); @@ -1336,12 +1383,9 @@ Commit::updateComInstStats(const DynInstPtr &inst) { ThreadID tid = inst->threadNumber; - if (!inst->isMicroop() || inst->isLastMicroop()) { - cpu->commitStats[tid]->numInsts++; - cpu->baseStats.numInsts++; - } - cpu->commitStats[tid]->numOps++; - cpu->baseStats.numOps++; + if (!inst->isMicroop() || inst->isLastMicroop()) + stats.instsCommitted[tid]++; + stats.opsCommitted[tid]++; // To match the old model, don't count nops and instruction // prefetches towards the total commit count. @@ -1352,20 +1396,21 @@ Commit::updateComInstStats(const DynInstPtr &inst) // // Control Instructions // - cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst); + if (inst->isControl()) + stats.branches[tid]++; // // Memory references // if (inst->isMemRef()) { - cpu->commitStats[tid]->numMemRefs++; + stats.memRefs[tid]++; if (inst->isLoad()) { - cpu->commitStats[tid]->numLoadInsts++; + stats.loads[tid]++; } - if (inst->isStore()) { - cpu->commitStats[tid]->numStoreInsts++; + if (inst->isAtomic()) { + stats.amos[tid]++; } } @@ -1375,14 +1420,14 @@ Commit::updateComInstStats(const DynInstPtr &inst) // Integer Instruction if (inst->isInteger()) - cpu->commitStats[tid]->numIntInsts++; + stats.integer[tid]++; // Floating Point Instruction if (inst->isFloating()) - cpu->commitStats[tid]->numFpInsts++; + stats.floating[tid]++; // Vector Instruction if (inst->isVector()) - cpu->commitStats[tid]->numVecInsts++; + stats.vectorInstructions[tid]++; // Function Calls if (inst->isCall()) diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index eccd023d45..cf4eaf5d92 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -479,10 +479,26 @@ class Commit /** Distribution of the number of committed instructions each cycle. */ statistics::Distribution numCommittedDist; + /** Total number of instructions committed. */ + statistics::Vector instsCommitted; + /** Total number of ops (including micro ops) committed. */ + statistics::Vector opsCommitted; + /** Stat for the total number of committed memory references. */ + statistics::Vector memRefs; + /** Stat for the total number of committed loads. */ + statistics::Vector loads; /** Stat for the total number of committed atomics. */ statistics::Vector amos; /** Total number of committed memory barriers. */ statistics::Vector membars; + /** Total number of committed branches. */ + statistics::Vector branches; + /** Total number of vector instructions */ + statistics::Vector vectorInstructions; + /** Total number of floating point instructions */ + statistics::Vector floating; + /** Total number of integer instructions */ + statistics::Vector integer; /** Total number of function calls */ statistics::Vector functionCalls; /** Committed instructions by instruction type (OpClass) */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 93c58fef63..d2bacaa523 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -328,7 +328,47 @@ CPU::CPUStats::CPUStats(CPU *cpu) "to idling"), ADD_STAT(quiesceCycles, statistics::units::Cycle::get(), "Total number of cycles that CPU has spent quiesced or waiting " - "for an interrupt") + "for an interrupt"), + ADD_STAT(committedInsts, statistics::units::Count::get(), + "Number of Instructions Simulated"), + ADD_STAT(committedOps, statistics::units::Count::get(), + "Number of Ops (including micro ops) Simulated"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Cycles Per Instruction"), + ADD_STAT(totalCpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: Total CPI of All Threads"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Instructions Per Cycle"), + ADD_STAT(totalIpc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: Total IPC of All Threads"), + ADD_STAT(intRegfileReads, statistics::units::Count::get(), + "Number of integer regfile reads"), + ADD_STAT(intRegfileWrites, statistics::units::Count::get(), + "Number of integer regfile writes"), + ADD_STAT(fpRegfileReads, statistics::units::Count::get(), + "Number of floating regfile reads"), + ADD_STAT(fpRegfileWrites, statistics::units::Count::get(), + "Number of floating regfile writes"), + ADD_STAT(vecRegfileReads, statistics::units::Count::get(), + "number of vector regfile reads"), + ADD_STAT(vecRegfileWrites, statistics::units::Count::get(), + "number of vector regfile writes"), + ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(), + "number of predicate regfile reads"), + ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(), + "number of predicate regfile writes"), + ADD_STAT(ccRegfileReads, statistics::units::Count::get(), + "number of cc regfile reads"), + ADD_STAT(ccRegfileWrites, statistics::units::Count::get(), + "number of cc regfile writes"), + ADD_STAT(miscRegfileReads, statistics::units::Count::get(), + "number of misc regfile reads"), + ADD_STAT(miscRegfileWrites, statistics::units::Count::get(), + "number of misc regfile writes") { // Register any of the O3CPU's stats here. timesIdled @@ -340,6 +380,69 @@ CPU::CPUStats::CPUStats(CPU *cpu) quiesceCycles .prereq(quiesceCycles); + // Number of Instructions simulated + // -------------------------------- + // Should probably be in Base CPU but need templated + // MaxThreads so put in here instead + committedInsts + .init(cpu->numThreads) + .flags(statistics::total); + + committedOps + .init(cpu->numThreads) + .flags(statistics::total); + + cpi + .precision(6); + cpi = cpu->baseStats.numCycles / committedInsts; + + totalCpi + .precision(6); + totalCpi = cpu->baseStats.numCycles / sum(committedInsts); + + ipc + .precision(6); + ipc = committedInsts / cpu->baseStats.numCycles; + + totalIpc + .precision(6); + totalIpc = sum(committedInsts) / cpu->baseStats.numCycles; + + intRegfileReads + .prereq(intRegfileReads); + + intRegfileWrites + .prereq(intRegfileWrites); + + fpRegfileReads + .prereq(fpRegfileReads); + + fpRegfileWrites + .prereq(fpRegfileWrites); + + vecRegfileReads + .prereq(vecRegfileReads); + + vecRegfileWrites + .prereq(vecRegfileWrites); + + vecPredRegfileReads + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .prereq(vecPredRegfileWrites); + + ccRegfileReads + .prereq(ccRegfileReads); + + ccRegfileWrites + .prereq(ccRegfileWrites); + + miscRegfileReads + .prereq(miscRegfileReads); + + miscRegfileWrites + .prereq(miscRegfileWrites); } void @@ -916,7 +1019,7 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const RegVal CPU::readMiscReg(int misc_reg, ThreadID tid) { - executeStats[tid]->numMiscRegReads++; + cpuStats.miscRegfileReads++; return isa[tid]->readMiscReg(misc_reg); } @@ -929,29 +1032,29 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid) void CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid) { - executeStats[tid]->numMiscRegWrites++; + cpuStats.miscRegfileWrites++; isa[tid]->setMiscReg(misc_reg, val); } RegVal -CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -960,24 +1063,24 @@ CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid) } void -CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) +CPU::getReg(PhysRegIdPtr phys_reg, void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegReads++; + cpuStats.intRegfileReads++; break; case FloatRegClass: - executeStats[tid]->numFpRegReads++; + cpuStats.fpRegfileReads++; break; case CCRegClass: - executeStats[tid]->numCCRegReads++; + cpuStats.ccRegfileReads++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -986,14 +1089,14 @@ CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid) } void * -CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) +CPU::getWritableReg(PhysRegIdPtr phys_reg) { switch (phys_reg->classValue()) { case VecRegClass: - executeStats[tid]->numVecRegReads++; + cpuStats.vecRegfileReads++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegReads++; + cpuStats.vecPredRegfileReads++; break; default: break; @@ -1002,24 +1105,24 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid) } void -CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, RegVal val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1028,24 +1131,24 @@ CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid) } void -CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid) +CPU::setReg(PhysRegIdPtr phys_reg, const void *val) { switch (phys_reg->classValue()) { case IntRegClass: - executeStats[tid]->numIntRegWrites++; + cpuStats.intRegfileWrites++; break; case FloatRegClass: - executeStats[tid]->numFpRegWrites++; + cpuStats.fpRegfileWrites++; break; case CCRegClass: - executeStats[tid]->numCCRegWrites++; + cpuStats.ccRegfileWrites++; break; case VecRegClass: case VecElemClass: - executeStats[tid]->numVecRegWrites++; + cpuStats.vecRegfileWrites++; break; case VecPredRegClass: - executeStats[tid]->numVecPredRegWrites++; + cpuStats.vecPredRegfileWrites++; break; default: break; @@ -1127,14 +1230,14 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst) if (!inst->isMicroop() || inst->isLastMicroop()) { thread[tid]->numInst++; thread[tid]->threadStats.numInsts++; - commitStats[tid]->numInstsNotNOP++; + cpuStats.committedInsts[tid]++; // Check for instruction-count-based events. thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst); } thread[tid]->numOp++; thread[tid]->threadStats.numOps++; - commitStats[tid]->numOpsNotNOP++; + cpuStats.committedOps[tid]++; probeInstCommit(inst->staticInst, inst->pcState().instAddr()); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 7dc378428b..08a1312e73 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -310,12 +310,12 @@ class CPU : public BaseCPU */ void setMiscReg(int misc_reg, RegVal val, ThreadID tid); - RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid); - void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid); - void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid); + RegVal getReg(PhysRegIdPtr phys_reg); + void getReg(PhysRegIdPtr phys_reg, void *val); + void *getWritableReg(PhysRegIdPtr phys_reg); - void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid); - void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid); + void setReg(PhysRegIdPtr phys_reg, RegVal val); + void setReg(PhysRegIdPtr phys_reg, const void *val); /** Architectural register accessors. Looks up in the commit * rename table to obtain the true physical index of the @@ -581,7 +581,38 @@ class CPU : public BaseCPU /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ statistics::Scalar quiesceCycles; + /** Stat for the number of committed instructions per thread. */ + statistics::Vector committedInsts; + /** Stat for the number of committed ops (including micro ops) per + * thread. */ + statistics::Vector committedOps; + /** Stat for the CPI per thread. */ + statistics::Formula cpi; + /** Stat for the total CPI. */ + statistics::Formula totalCpi; + /** Stat for the IPC per thread. */ + statistics::Formula ipc; + /** Stat for the total IPC. */ + statistics::Formula totalIpc; + //number of integer register file accesses + statistics::Scalar intRegfileReads; + statistics::Scalar intRegfileWrites; + //number of float register file accesses + statistics::Scalar fpRegfileReads; + statistics::Scalar fpRegfileWrites; + //number of vector register file accesses + mutable statistics::Scalar vecRegfileReads; + statistics::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable statistics::Scalar vecPredRegfileReads; + statistics::Scalar vecPredRegfileWrites; + //number of CC register file accesses + statistics::Scalar ccRegfileReads; + statistics::Scalar ccRegfileWrites; + //number of misc + statistics::Scalar miscRegfileReads; + statistics::Scalar miscRegfileWrites; } cpuStats; public: diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index c759c5eb38..54c0385374 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -1086,10 +1086,10 @@ class DynInst : public ExecContext, public RefCounted if (bytes == sizeof(RegVal)) { setRegOperand(staticInst.get(), idx, - cpu->getReg(prev_phys_reg, threadNumber)); + cpu->getReg(prev_phys_reg)); } else { uint8_t val[original_dest_reg.regClass().regBytes()]; - cpu->getReg(prev_phys_reg, val, threadNumber); + cpu->getReg(prev_phys_reg, val); setRegOperand(staticInst.get(), idx, val); } } @@ -1116,7 +1116,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return 0; - return cpu->getReg(reg, threadNumber); + return cpu->getReg(reg); } void @@ -1125,13 +1125,13 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedSrcIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->getReg(reg, val, threadNumber); + cpu->getReg(reg, val); } void * getWritableRegOperand(const StaticInst *si, int idx) override { - return cpu->getWritableReg(renamedDestIdx(idx), threadNumber); + return cpu->getWritableReg(renamedDestIdx(idx)); } /** @todo: Make results into arrays so they can handle multiple dest @@ -1143,7 +1143,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } @@ -1153,7 +1153,7 @@ class DynInst : public ExecContext, public RefCounted const PhysRegIdPtr reg = renamedDestIdx(idx); if (reg->is(InvalidRegClass)) return; - cpu->setReg(reg, val, threadNumber); + cpu->setReg(reg, val); setResult(reg->regClass(), val); } }; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index f5fc6c62ec..d3cdd2c761 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -158,6 +158,12 @@ Fetch::regProbePoints() Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) : statistics::Group(cpu, "fetch"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "Number of cycles fetch is stalled on an Icache miss"), + ADD_STAT(insts, statistics::units::Count::get(), + "Number of instructions fetch has processed"), + ADD_STAT(branches, statistics::units::Count::get(), + "Number of branches that fetch encountered"), ADD_STAT(predictedBranches, statistics::units::Count::get(), "Number of branches that fetch has predicted taken"), ADD_STAT(cycles, statistics::units::Cycle::get(), @@ -194,8 +200,21 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) "Number of instructions fetched each cycle (Total)"), ADD_STAT(idleRate, statistics::units::Ratio::get(), "Ratio of cycles fetch was idle", - idleCycles / cpu->baseStats.numCycles) + idleCycles / cpu->baseStats.numCycles), + ADD_STAT(branchRate, statistics::units::Ratio::get(), + "Number of branch fetches per cycle", + branches / cpu->baseStats.numCycles), + ADD_STAT(rate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Number of inst fetches per cycle", + insts / cpu->baseStats.numCycles) { + icacheStallCycles + .prereq(icacheStallCycles); + insts + .prereq(insts); + branches + .prereq(branches); predictedBranches .prereq(predictedBranches); cycles @@ -233,6 +252,10 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch) .flags(statistics::pdf); idleRate .prereq(idleRate); + branchRate + .flags(statistics::total); + rate + .flags(statistics::total); } void Fetch::setTimeBuffer(TimeBuffer *time_buffer) @@ -517,7 +540,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc) inst->setPredTarg(next_pc); inst->setPredTaken(predict_taken); - cpu->fetchStats[tid]->numBranches++; + ++fetchStats.branches; if (predict_taken) { ++fetchStats.predictedBranches; @@ -1123,7 +1146,7 @@ Fetch::fetch(bool &status_change) fetchCacheLine(fetchAddr, tid, this_pc.instAddr()); if (fetchStatus[tid] == IcacheWaitResponse) - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; else if (fetchStatus[tid] == ItlbWait) ++fetchStats.tlbCycles; else @@ -1219,7 +1242,7 @@ Fetch::fetch(bool &status_change) staticInst = dec_ptr->decode(this_pc); // Increment stat of fetched instructions. - cpu->fetchStats[tid]->numInsts++; + ++fetchStats.insts; if (staticInst->isMacroop()) { curMacroop = staticInst; @@ -1549,7 +1572,7 @@ Fetch::profileStall(ThreadID tid) ++fetchStats.squashCycles; DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { - cpu->fetchStats[tid]->icacheStallCycles++; + ++fetchStats.icacheStallCycles; DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", tid); } else if (fetchStatus[tid] == ItlbWait) { diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 6add31444d..cd311913f5 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -533,6 +533,12 @@ class Fetch FetchStatGroup(CPU *cpu, Fetch *fetch); // @todo: Consider making these // vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + statistics::Scalar icacheStallCycles; + /** Stat for total number of fetched instructions. */ + statistics::Scalar insts; + /** Total number of fetched branches. */ + statistics::Scalar branches; /** Stat for total number of predicted branches. */ statistics::Scalar predictedBranches; /** Stat for total number of cycles spent fetching. */ @@ -575,6 +581,10 @@ class Fetch statistics::Distribution nisnDist; /** Rate of how often fetch was idle. */ statistics::Formula idleRate; + /** Number of branch fetches per cycle. */ + statistics::Formula branchRate; + /** Number of instruction fetched per cycle. */ + statistics::Formula rate; } fetchStats; }; diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc index 92d281ce93..7cf6c54542 100644 --- a/src/cpu/o3/iew.cc +++ b/src/cpu/o3/iew.cc @@ -217,14 +217,52 @@ IEW::IEWStats::IEWStats(CPU *cpu) IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu) : statistics::Group(cpu), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of executed instructions"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions executed"), ADD_STAT(numSquashedInsts, statistics::units::Count::get(), "Number of squashed instructions skipped in execute"), ADD_STAT(numSwp, statistics::units::Count::get(), - "Number of swp insts executed") + "Number of swp insts executed"), + ADD_STAT(numNop, statistics::units::Count::get(), + "Number of nop insts executed"), + ADD_STAT(numRefs, statistics::units::Count::get(), + "Number of memory reference insts executed"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches executed"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of stores executed"), + ADD_STAT(numRate, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "Inst execution rate", numInsts / cpu->baseStats.numCycles) { + numLoadInsts + .init(cpu->numThreads) + .flags(statistics::total); + numSwp .init(cpu->numThreads) .flags(statistics::total); + + numNop + .init(cpu->numThreads) + .flags(statistics::total); + + numRefs + .init(cpu->numThreads) + .flags(statistics::total); + + numBranches + .init(cpu->numThreads) + .flags(statistics::total); + + numStoreInsts + .flags(statistics::total); + numStoreInsts = numRefs - numLoadInsts; + + numRate + .flags(statistics::total); } void @@ -1015,7 +1053,7 @@ IEW::dispatchInsts(ThreadID tid) instQueue.recordProducer(inst); - cpu->executeStats[tid]->numNop++; + iewStats.executedInstStats.numNop[tid]++; add_to_iq = false; } else { @@ -1523,7 +1561,7 @@ IEW::updateExeInstStats(const DynInstPtr& inst) { ThreadID tid = inst->threadNumber; - cpu->executeStats[tid]->numInsts++; + iewStats.executedInstStats.numInsts++; #if TRACING_ON if (debug::O3PipeView) { @@ -1535,16 +1573,16 @@ IEW::updateExeInstStats(const DynInstPtr& inst) // Control operations // if (inst->isControl()) - cpu->executeStats[tid]->numBranches++; + iewStats.executedInstStats.numBranches[tid]++; // // Memory operations // if (inst->isMemRef()) { - cpu->executeStats[tid]->numMemRefs++; + iewStats.executedInstStats.numRefs[tid]++; if (inst->isLoad()) { - cpu->executeStats[tid]->numLoadInsts++; + iewStats.executedInstStats.numLoadInsts[tid]++; } } } diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 4fe8227dcc..80fed295df 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -455,11 +455,25 @@ class IEW { ExecutedInstStats(CPU *cpu); + /** Stat for total number of executed instructions. */ + statistics::Scalar numInsts; + /** Stat for total number of executed load instructions. */ + statistics::Vector numLoadInsts; /** Stat for total number of squashed instructions skipped at * execute. */ statistics::Scalar numSquashedInsts; /** Number of executed software prefetches. */ statistics::Vector numSwp; + /** Number of executed nops. */ + statistics::Vector numNop; + /** Number of executed meomory references. */ + statistics::Vector numRefs; + /** Number of executed branches. */ + statistics::Vector numBranches; + /** Number of executed store instructions. */ + statistics::Formula numStoreInsts; + /** Number of instructions executed per cycle. */ + statistics::Formula numRate; } executedInstStats; /** Number of instructions sent to commit. */ diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 35d149097c..768f63ede5 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -154,36 +154,10 @@ BaseSimpleCPU::countInst() if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { t_info.numInst++; + t_info.execContextStats.numInsts++; } t_info.numOp++; -} - -void -BaseSimpleCPU::countFetchInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level numInsts fetched count - fetchStats[t_info.thread->threadId()]->numInsts++; - } - // increment thread level numOps fetched count - fetchStats[t_info.thread->threadId()]->numOps++; -} - -void -BaseSimpleCPU::countCommitInst() -{ - SimpleExecContext& t_info = *threadInfo[curThread]; - - if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { - // increment thread level and core level numInsts count - commitStats[t_info.thread->threadId()]->numInsts++; - baseStats.numInsts++; - } - // increment thread level and core level numOps count - commitStats[t_info.thread->threadId()]->numOps++; - baseStats.numOps++; + t_info.execContextStats.numOps++; } Counter @@ -402,11 +376,6 @@ BaseSimpleCPU::preExecute() if (predict_taken) ++t_info.execContextStats.numPredictedBranches; } - - // increment the fetch instruction stat counters - if (curStaticInst) { - countFetchInst(); - } } void @@ -419,7 +388,7 @@ BaseSimpleCPU::postExecute() Addr instAddr = threadContexts[curThread]->pcState().instAddr(); if (curStaticInst->isMemRef()) { - executeStats[t_info.thread->threadId()]->numMemRefs++; + t_info.execContextStats.numMemRefs++; } if (curStaticInst->isLoad()) { @@ -427,26 +396,26 @@ BaseSimpleCPU::postExecute() } if (curStaticInst->isControl()) { - ++fetchStats[t_info.thread->threadId()]->numBranches; + ++t_info.execContextStats.numBranches; } /* Power model statistics */ //integer alu accesses if (curStaticInst->isInteger()){ - executeStats[t_info.thread->threadId()]->numIntAluAccesses++; - commitStats[t_info.thread->threadId()]->numIntInsts++; + t_info.execContextStats.numIntAluAccesses++; + t_info.execContextStats.numIntInsts++; } //float alu accesses if (curStaticInst->isFloating()){ - executeStats[t_info.thread->threadId()]->numFpAluAccesses++; - commitStats[t_info.thread->threadId()]->numFpInsts++; + t_info.execContextStats.numFpAluAccesses++; + t_info.execContextStats.numFpInsts++; } //vector alu accesses if (curStaticInst->isVector()){ - executeStats[t_info.thread->threadId()]->numVecAluAccesses++; - commitStats[t_info.thread->threadId()]->numVecInsts++; + t_info.execContextStats.numVecAluAccesses++; + t_info.execContextStats.numVecInsts++; } //Matrix alu accesses @@ -460,22 +429,22 @@ BaseSimpleCPU::postExecute() t_info.execContextStats.numCallsReturns++; } + //the number of branch predictions that will be made + if (curStaticInst->isCondCtrl()){ + t_info.execContextStats.numCondCtrlInsts++; + } + //result bus acceses if (curStaticInst->isLoad()){ - commitStats[t_info.thread->threadId()]->numLoadInsts++; + t_info.execContextStats.numLoadInsts++; } if (curStaticInst->isStore() || curStaticInst->isAtomic()){ - commitStats[t_info.thread->threadId()]->numStoreInsts++; + t_info.execContextStats.numStoreInsts++; } /* End power model statistics */ - commitStats[t_info.thread->threadId()] - ->committedInstType[curStaticInst->opClass()]++; - commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); - - /* increment the committed numInsts and numOps stats */ - countCommitInst(); + t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++; if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index 46a25a0a42..df5290cf3c 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -182,8 +182,6 @@ class BaseSimpleCPU : public BaseCPU } void countInst(); - void countFetchInst(); - void countCommitInst(); Counter totalInsts() const override; Counter totalOps() const override; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index c0927fcadd..0f20763f28 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -86,12 +86,60 @@ class SimpleExecContext : public ExecContext : statistics::Group(cpu, csprintf("exec_context.thread_%i", thread->threadId()).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed"), + ADD_STAT(numIntAluAccesses, statistics::units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(numFpAluAccesses, statistics::units::Count::get(), + "Number of float alu accesses"), + ADD_STAT(numVecAluAccesses, statistics::units::Count::get(), + "Number of vector alu accesses"), ADD_STAT(numMatAluAccesses, statistics::units::Count::get(), "Number of matrix alu accesses"), ADD_STAT(numCallsReturns, statistics::units::Count::get(), "Number of times a function call or return occured"), + ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(), + "Number of instructions that are conditional controls"), + ADD_STAT(numIntInsts, statistics::units::Count::get(), + "Number of integer instructions"), + ADD_STAT(numFpInsts, statistics::units::Count::get(), + "Number of float instructions"), + ADD_STAT(numVecInsts, statistics::units::Count::get(), + "Number of vector instructions"), ADD_STAT(numMatInsts, statistics::units::Count::get(), "Number of matrix instructions"), + ADD_STAT(numIntRegReads, statistics::units::Count::get(), + "Number of times the integer registers were read"), + ADD_STAT(numIntRegWrites, statistics::units::Count::get(), + "Number of times the integer registers were written"), + ADD_STAT(numFpRegReads, statistics::units::Count::get(), + "Number of times the floating registers were read"), + ADD_STAT(numFpRegWrites, statistics::units::Count::get(), + "Number of times the floating registers were written"), + ADD_STAT(numVecRegReads, statistics::units::Count::get(), + "Number of times the vector registers were read"), + ADD_STAT(numVecRegWrites, statistics::units::Count::get(), + "Number of times the vector registers were written"), + ADD_STAT(numVecPredRegReads, statistics::units::Count::get(), + "Number of times the predicate registers were read"), + ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(), + "Number of times the predicate registers were written"), + ADD_STAT(numCCRegReads, statistics::units::Count::get(), + "Number of times the CC registers were read"), + ADD_STAT(numCCRegWrites, statistics::units::Count::get(), + "Number of times the CC registers were written"), + ADD_STAT(numMiscRegReads, statistics::units::Count::get(), + "Number of times the Misc registers were read"), + ADD_STAT(numMiscRegWrites, statistics::units::Count::get(), + "Number of times the Misc registers were written"), + ADD_STAT(numMemRefs, statistics::units::Count::get(), + "Number of memory refs"), + ADD_STAT(numLoadInsts, statistics::units::Count::get(), + "Number of load instructions"), + ADD_STAT(numStoreInsts, statistics::units::Count::get(), + "Number of store instructions"), ADD_STAT(numIdleCycles, statistics::units::Cycle::get(), "Number of idle cycles"), ADD_STAT(numBusyCycles, statistics::units::Cycle::get(), @@ -100,35 +148,64 @@ class SimpleExecContext : public ExecContext "Percentage of non-idle cycles"), ADD_STAT(idleFraction, statistics::units::Ratio::get(), "Percentage of idle cycles"), + ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(), + "ICache total stall cycles"), + ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(), + "DCache total stall cycles"), + ADD_STAT(numBranches, statistics::units::Count::get(), + "Number of branches fetched"), ADD_STAT(numPredictedBranches, statistics::units::Count::get(), "Number of branches predicted as taken"), ADD_STAT(numBranchMispred, statistics::units::Count::get(), "Number of branch mispredictions"), + ADD_STAT(statExecutedInstType, statistics::units::Count::get(), + "Class of executed instruction."), numRegReads{ - &(cpu->executeStats[thread->threadId()]->numIntRegReads), - &(cpu->executeStats[thread->threadId()]->numFpRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecRegReads), - &(cpu->executeStats[thread->threadId()]->numVecPredRegReads), - &(cpu->executeStats[thread->threadId()]->numCCRegReads), - &numMatRegReads + &numIntRegReads, + &numFpRegReads, + &numVecRegReads, + &numVecRegReads, + &numVecPredRegReads, + &numMatRegReads, + &numCCRegReads }, numRegWrites{ - &(cpu->executeStats[thread->threadId()]->numIntRegWrites), - &(cpu->executeStats[thread->threadId()]->numFpRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()]->numVecRegWrites), - &(cpu->executeStats[thread->threadId()] - ->numVecPredRegWrites), - &(cpu->executeStats[thread->threadId()]->numCCRegWrites), - &numMatRegWrites + &numIntRegWrites, + &numFpRegWrites, + &numVecRegWrites, + &numVecRegWrites, + &numVecPredRegWrites, + &numMatRegWrites, + &numCCRegWrites } { + numCCRegReads + .flags(statistics::nozero); + + numCCRegWrites + .flags(statistics::nozero); + + icacheStallCycles + .prereq(icacheStallCycles); + + dcacheStallCycles + .prereq(dcacheStallCycles); + + statExecutedInstType + .init(enums::Num_OpClass) + .flags(statistics::total | statistics::pdf | statistics::dist); + + for (unsigned i = 0; i < Num_OpClasses; ++i) { + statExecutedInstType.subname(i, enums::OpClassStrings[i]); + } idleFraction = statistics::constant(1.0) - notIdleFraction; numIdleCycles = idleFraction * cpu->baseStats.numCycles; numBusyCycles = notIdleFraction * cpu->baseStats.numCycles; + numBranches + .prereq(numBranches); + numPredictedBranches .prereq(numPredictedBranches); @@ -136,19 +213,73 @@ class SimpleExecContext : public ExecContext .prereq(numBranchMispred); } + // Number of simulated instructions + statistics::Scalar numInsts; + statistics::Scalar numOps; + + // Number of integer alu accesses + statistics::Scalar numIntAluAccesses; + + // Number of float alu accesses + statistics::Scalar numFpAluAccesses; + + // Number of vector alu accesses + statistics::Scalar numVecAluAccesses; + // Number of matrix alu accesses statistics::Scalar numMatAluAccesses; // Number of function calls/returns statistics::Scalar numCallsReturns; + // Conditional control instructions; + statistics::Scalar numCondCtrlInsts; + + // Number of int instructions + statistics::Scalar numIntInsts; + + // Number of float instructions + statistics::Scalar numFpInsts; + + // Number of vector instructions + statistics::Scalar numVecInsts; + // Number of matrix instructions statistics::Scalar numMatInsts; + // Number of integer register file accesses + statistics::Scalar numIntRegReads; + statistics::Scalar numIntRegWrites; + + // Number of float register file accesses + statistics::Scalar numFpRegReads; + statistics::Scalar numFpRegWrites; + + // Number of vector register file accesses + mutable statistics::Scalar numVecRegReads; + statistics::Scalar numVecRegWrites; + + // Number of predicate register file accesses + mutable statistics::Scalar numVecPredRegReads; + statistics::Scalar numVecPredRegWrites; + // Number of matrix register file accesses mutable statistics::Scalar numMatRegReads; statistics::Scalar numMatRegWrites; + // Number of condition code register file accesses + statistics::Scalar numCCRegReads; + statistics::Scalar numCCRegWrites; + + // Number of misc register file accesses + statistics::Scalar numMiscRegReads; + statistics::Scalar numMiscRegWrites; + + // Number of simulated memory references + statistics::Scalar numMemRefs; + statistics::Scalar numLoadInsts; + statistics::Scalar numStoreInsts; + // Number of idle cycles statistics::Formula numIdleCycles; @@ -159,13 +290,24 @@ class SimpleExecContext : public ExecContext statistics::Average notIdleFraction; statistics::Formula idleFraction; + // Number of cycles stalled for I-cache responses + statistics::Scalar icacheStallCycles; + + // Number of cycles stalled for D-cache responses + statistics::Scalar dcacheStallCycles; + /// @{ + /// Total number of branches fetched + statistics::Scalar numBranches; /// Number of branches predicted as taken statistics::Scalar numPredictedBranches; /// Number of misprediced branches statistics::Scalar numBranchMispred; /// @} + // Instruction mix histogram by OpClass + statistics::Vector statExecutedInstType; + std::array numRegReads; std::array numRegWrites; @@ -226,7 +368,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscRegOperand(const StaticInst *si, int idx) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; const RegId& reg = si->srcRegIdx(idx); assert(reg.is(MiscRegClass)); return thread->readMiscReg(reg.index()); @@ -235,7 +377,7 @@ class SimpleExecContext : public ExecContext void setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; const RegId& reg = si->destRegIdx(idx); assert(reg.is(MiscRegClass)); thread->setMiscReg(reg.index(), val); @@ -248,7 +390,7 @@ class SimpleExecContext : public ExecContext RegVal readMiscReg(int misc_reg) override { - cpu->executeStats[thread->threadId()]->numMiscRegReads++; + execContextStats.numMiscRegReads++; return thread->readMiscReg(misc_reg); } @@ -259,7 +401,7 @@ class SimpleExecContext : public ExecContext void setMiscReg(int misc_reg, RegVal val) override { - cpu->executeStats[thread->threadId()]->numMiscRegWrites++; + execContextStats.numMiscRegWrites++; thread->setMiscReg(misc_reg, val); }