From 2f93672bddd2705c53e51281a3ff3f749acb138c Mon Sep 17 00:00:00 2001 From: Melissa Jost Date: Mon, 13 Mar 2023 03:09:38 -0700 Subject: [PATCH] cpu: Move numInsts, numOps, ipc, cpi to BaseCPU In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core committed instructions and operations. In BaseCPU::FetchCPUStats, numInsts and numOps track per thread fetched instructions and operations. In BaseCPU::CommitCPUStats, numInsts and numOps track per thread committed instructions and operations. In BaseSimpleCPU, the countInst() function has been split into countInst(), countFetchInst(), and countCommitInst(). countFetchInst() increments numInsts and numOps of the FetchCPUStats group for a thread. countCommitInst() increments the numInsts and numOps of the CommitCPUStats group for a thread and of the BaseCPUStats group for a CPU core. These functions are called in the appropriate stage within timing.cc and atomic.cc. The call to countInst() is left unchanged. countFetchInst() is called in preExecute(). countCommitInst() is called in postExecute(). For MinorCPU, only the commit level numInsts and numOps stats have been implemented. IPC and CPI stats have been added to BaseCPUStats (core level) and CommitCPUStats (thread level). The formulas for the IPC and CPI stats in CommitCPUStats are set in the BaseCPU constructor, after the CommitCPUStats stat group object has been created. Change-Id: I71c831c44202fc7d14c75b27a33eb91204f3a273 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69100 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- src/cpu/base.cc | 38 +++++++++++++++++++++++++++++++++++++- src/cpu/base.hh | 20 ++++++++++++++++++++ src/cpu/minor/execute.cc | 5 +++++ src/cpu/simple/base.cc | 36 ++++++++++++++++++++++++++++++++++++ src/cpu/simple/base.hh | 2 ++ 5 files changed, 100 insertions(+), 1 deletion(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 5592bf0d55..d7dda13ab1 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -198,7 +198,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker) for (int i = 0; i < numThreads; i++) { fetchStats.emplace_back(new FetchCPUStats(this, i)); executeStats.emplace_back(new ExecuteCPUStats(this, i)); - commitStats.emplace_back(new CommitCPUStats(this, i)); + // create commitStat object for thread i and set ipc, cpi formulas + CommitCPUStats* commitStatptr = new CommitCPUStats(this, i); + commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles; + commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts; + commitStats.emplace_back(commitStatptr); } } @@ -392,13 +396,28 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc) BaseCPU:: BaseCPUStats::BaseCPUStats(statistics::Group *parent) : statistics::Group(parent), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed (core level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed (core level)"), ADD_STAT(numCycles, statistics::units::Cycle::get(), "Number of cpu cycles simulated"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction (core level)"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle (core level)"), ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(), "Number of work items this cpu started"), ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(), "Number of work items this cpu completed") { + cpi.precision(6); + cpi = numCycles / numInsts; + + ipc.precision(6); + ipc = numInsts / numCycles; } void @@ -839,6 +858,10 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent) BaseCPU:: FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id) : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions fetched (thread level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) fetched (thread level)"), ADD_STAT(numBranches, statistics::units::Count::get(), "Number of branches fetched"), ADD_STAT(numFetchSuspends, statistics::units::Count::get(), @@ -927,6 +950,16 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id) BaseCPU:: CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()), + ADD_STAT(numInsts, statistics::units::Count::get(), + "Number of instructions committed (thread level)"), + ADD_STAT(numOps, statistics::units::Count::get(), + "Number of ops (including micro ops) committed (thread level)"), + ADD_STAT(cpi, statistics::units::Rate< + statistics::units::Cycle, statistics::units::Count>::get(), + "CPI: cycles per instruction (thread level)"), + ADD_STAT(ipc, statistics::units::Rate< + statistics::units::Count, statistics::units::Cycle>::get(), + "IPC: instructions per cycle (thread level)"), ADD_STAT(numMemRefs, statistics::units::Count::get(), "Number of memory references committed"), ADD_STAT(numFpInsts, statistics::units::Count::get(), @@ -944,6 +977,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id) ADD_STAT(committedControl, statistics::units::Count::get(), "Class of control type instructions committed") { + cpi.precision(6); + ipc.precision(6); + committedInstType .init(enums::Num_OpClass) .flags(statistics::total | statistics::pdf | statistics::dist); diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 934e56fd05..5e2432f01d 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -633,8 +633,14 @@ class BaseCPU : public ClockedObject struct BaseCPUStats : public statistics::Group { BaseCPUStats(statistics::Group *parent); + // Number of CPU insts and ops committed at CPU core level + statistics::Scalar numInsts; + statistics::Scalar numOps; // Number of CPU cycles simulated statistics::Scalar numCycles; + /* CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; statistics::Scalar numWorkItemsStarted; statistics::Scalar numWorkItemsCompleted; } baseStats; @@ -684,6 +690,12 @@ class BaseCPU : public ClockedObject { FetchCPUStats(statistics::Group *parent, int thread_id); + /* Total number of instructions fetched */ + statistics::Scalar numInsts; + + /* Total number of operations fetched */ + statistics::Scalar numOps; + /* Total number of branches fetched */ statistics::Scalar numBranches; @@ -743,6 +755,14 @@ class BaseCPU : public ClockedObject { CommitCPUStats(statistics::Group *parent, int thread_id); + /* Number of simulated instructions committed */ + statistics::Scalar numInsts; + statistics::Scalar numOps; + + /* CPI/IPC for total cycle counts and macro insts */ + statistics::Formula cpi; + statistics::Formula ipc; + /* Number of committed memory references. */ statistics::Scalar numMemRefs; diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc index 99d12d65b5..a65a77e643 100644 --- a/src/cpu/minor/execute.cc +++ b/src/cpu/minor/execute.cc @@ -872,6 +872,9 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) thread->numInst++; thread->threadStats.numInsts++; cpu.stats.numInsts++; + // update both old and new stas + cpu.commitStats[inst->id.threadId]->numInsts++; + cpu.baseStats.numInsts++; /* Act on events related to instruction counts */ thread->comInstEventQueue.serviceEvents(thread->numInst); @@ -880,6 +883,8 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst) thread->threadStats.numOps++; cpu.stats.numOps++; // update both old and new stats + cpu.commitStats[inst->id.threadId]->numOps++; + cpu.baseStats.numOps++; cpu.commitStats[inst->id.threadId] ->committedInstType[inst->staticInst->opClass()]++; cpu.stats.committedInstType[inst->id.threadId] diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc index 40f0fa7684..9e831a25f6 100644 --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -160,6 +160,34 @@ BaseSimpleCPU::countInst() t_info.execContextStats.numOps++; } +void +BaseSimpleCPU::countFetchInst() +{ + SimpleExecContext& t_info = *threadInfo[curThread]; + + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + // increment thread level numInsts fetched count + fetchStats[t_info.thread->threadId()]->numInsts++; + } + // increment thread level numOps fetched count + fetchStats[t_info.thread->threadId()]->numOps++; +} + +void +BaseSimpleCPU::countCommitInst() +{ + SimpleExecContext& t_info = *threadInfo[curThread]; + + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + // increment thread level and core level numInsts count + commitStats[t_info.thread->threadId()]->numInsts++; + baseStats.numInsts++; + } + // increment thread level and core level numOps count + commitStats[t_info.thread->threadId()]->numOps++; + baseStats.numOps++; +} + Counter BaseSimpleCPU::totalInsts() const { @@ -376,6 +404,11 @@ BaseSimpleCPU::preExecute() if (predict_taken) ++t_info.execContextStats.numPredictedBranches; } + + // increment the fetch instruction stat counters + if (curStaticInst) { + countFetchInst(); + } } void @@ -467,6 +500,9 @@ BaseSimpleCPU::postExecute() commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst); t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++; + /* increment the committed numInsts and numOps stats */ + countCommitInst(); + if (FullSystem) traceFunctions(instAddr); diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh index df5290cf3c..46a25a0a42 100644 --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -182,6 +182,8 @@ class BaseSimpleCPU : public BaseCPU } void countInst(); + void countFetchInst(); + void countCommitInst(); Counter totalInsts() const override; Counter totalOps() const override;