cpu-o3: convert fetch to new style stats

Change-Id: Ib50a303570ac1dd45ff11a32a823f47a6c4c02cd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/33815
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
eavivi
2020-08-31 12:10:19 -07:00
committed by Eden Avivi
parent d2fb5c047f
commit 30dbd90783
3 changed files with 173 additions and 198 deletions

View File

@@ -441,7 +441,6 @@ FullO3CPU<Impl>::regStats()
.precision(6);
totalIpc = sum(committedInsts) / numCycles;
this->fetch.regStats();
this->decode.regStats();
this->rename.regStats();
this->iew.regStats();

View File

@@ -222,8 +222,6 @@ class DefaultFetch
/** Returns the name of fetch. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Registers probes. */
void regProbePoints();
@@ -330,7 +328,8 @@ class DefaultFetch
const DynInstPtr squashInst, ThreadID tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode that should be sqaushed.
* remove any instructions between fetch and decode
* that should be sqaushed.
*/
void squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst,
@@ -546,57 +545,65 @@ class DefaultFetch
/** Event used to delay fault generation of translation faults */
FinishTranslationEvent finishTranslationEvent;
// @todo: Consider making these vectors and tracking on a per thread basis.
/** Stat for total number of cycles stalled due to an icache miss. */
Stats::Scalar icacheStallCycles;
/** Stat for total number of fetched instructions. */
Stats::Scalar fetchedInsts;
/** Total number of fetched branches. */
Stats::Scalar fetchedBranches;
/** Stat for total number of predicted branches. */
Stats::Scalar predictedBranches;
/** Stat for total number of cycles spent fetching. */
Stats::Scalar fetchCycles;
/** Stat for total number of cycles spent squashing. */
Stats::Scalar fetchSquashCycles;
/** Stat for total number of cycles spent waiting for translation */
Stats::Scalar fetchTlbCycles;
/** Stat for total number of cycles spent blocked due to other stages in
* the pipeline.
*/
Stats::Scalar fetchIdleCycles;
/** Total number of cycles spent blocked. */
Stats::Scalar fetchBlockedCycles;
/** Total number of cycles spent in any other state. */
Stats::Scalar fetchMiscStallCycles;
/** Total number of cycles spent in waiting for drains. */
Stats::Scalar fetchPendingDrainCycles;
/** Total number of stall cycles caused by no active threads to run. */
Stats::Scalar fetchNoActiveThreadStallCycles;
/** Total number of stall cycles caused by pending traps. */
Stats::Scalar fetchPendingTrapStallCycles;
/** Total number of stall cycles caused by pending quiesce instructions. */
Stats::Scalar fetchPendingQuiesceStallCycles;
/** Total number of stall cycles caused by I-cache wait retrys. */
Stats::Scalar fetchIcacheWaitRetryStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar fetchedCacheLines;
/** Total number of outstanding icache accesses that were dropped
* due to a squash.
*/
Stats::Scalar fetchIcacheSquashes;
/** Total number of outstanding tlb accesses that were dropped
* due to a squash.
*/
Stats::Scalar fetchTlbSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution fetchNisnDist;
/** Rate of how often fetch was idle. */
Stats::Formula idleRate;
/** Number of branch fetches per cycle. */
Stats::Formula branchRate;
/** Number of instruction fetched per cycle. */
Stats::Formula fetchRate;
protected:
struct FetchStatGroup : public Stats::Group
{
FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch);
// @todo: Consider making these
// vectors and tracking on a per thread basis.
/** Stat for total number of cycles stalled due to an icache miss. */
Stats::Scalar icacheStallCycles;
/** Stat for total number of fetched instructions. */
Stats::Scalar insts;
/** Total number of fetched branches. */
Stats::Scalar branches;
/** Stat for total number of predicted branches. */
Stats::Scalar predictedBranches;
/** Stat for total number of cycles spent fetching. */
Stats::Scalar cycles;
/** Stat for total number of cycles spent squashing. */
Stats::Scalar squashCycles;
/** Stat for total number of cycles spent waiting for translation */
Stats::Scalar tlbCycles;
/** Stat for total number of cycles
* spent blocked due to other stages in
* the pipeline.
*/
Stats::Scalar idleCycles;
/** Total number of cycles spent blocked. */
Stats::Scalar blockedCycles;
/** Total number of cycles spent in any other state. */
Stats::Scalar miscStallCycles;
/** Total number of cycles spent in waiting for drains. */
Stats::Scalar pendingDrainCycles;
/** Total number of stall cycles caused by no active threads to run. */
Stats::Scalar noActiveThreadStallCycles;
/** Total number of stall cycles caused by pending traps. */
Stats::Scalar pendingTrapStallCycles;
/** Total number of stall cycles
* caused by pending quiesce instructions. */
Stats::Scalar pendingQuiesceStallCycles;
/** Total number of stall cycles caused by I-cache wait retrys. */
Stats::Scalar icacheWaitRetryStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar cacheLines;
/** Total number of outstanding icache accesses that were dropped
* due to a squash.
*/
Stats::Scalar icacheSquashes;
/** Total number of outstanding tlb accesses that were dropped
* due to a squash.
*/
Stats::Scalar tlbSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution nisnDist;
/** Rate of how often fetch was idle. */
Stats::Formula idleRate;
/** Number of branch fetches per cycle. */
Stats::Formula branchRate;
/** Number of instruction fetched per cycle. */
Stats::Formula rate;
} fetchStats;
};
#endif //__CPU_O3_FETCH_HH__

View File

@@ -94,7 +94,7 @@ DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)
numThreads(params->numThreads),
numFetchingThreads(params->smtNumFetchingThreads),
icachePort(this, _cpu),
finishTranslationEvent(this)
finishTranslationEvent(this), fetchStats(_cpu, this)
{
if (numThreads > Impl::MaxThreads)
fatal("numThreads (%d) is larger than compiled limit (%d),\n"
@@ -163,128 +163,97 @@ DefaultFetch<Impl>::regProbePoints()
}
template <class Impl>
void
DefaultFetch<Impl>::regStats()
DefaultFetch<Impl>::
FetchStatGroup::FetchStatGroup(O3CPU *cpu, DefaultFetch *fetch)
: Stats::Group(cpu, "fetch"),
ADD_STAT(icacheStallCycles,
"Number of cycles fetch is stalled on an Icache miss"),
ADD_STAT(insts, "Number of instructions fetch has processed"),
ADD_STAT(branches, "Number of branches that fetch encountered"),
ADD_STAT(predictedBranches,
"Number of branches that fetch has predicted taken"),
ADD_STAT(cycles,
"Number of cycles fetch has run and was not squashing or blocked"),
ADD_STAT(squashCycles, "Number of cycles fetch has spent squashing"),
ADD_STAT(tlbCycles,
"Number of cycles fetch has spent waiting for tlb"),
ADD_STAT(idleCycles, "Number of cycles fetch was idle"),
ADD_STAT(blockedCycles, "Number of cycles fetch has spent blocked"),
ADD_STAT(miscStallCycles,
"Number of cycles fetch has spent waiting on interrupts,"
"or bad addresses, or out of MSHRs"),
ADD_STAT(pendingDrainCycles,
"Number of cycles fetch has spent waiting on pipes to drain"),
ADD_STAT(noActiveThreadStallCycles,
"Number of stall cycles due to no active thread to fetch from"),
ADD_STAT(pendingTrapStallCycles,
"Number of stall cycles due to pending traps"),
ADD_STAT(pendingQuiesceStallCycles,
"Number of stall cycles due to pending quiesce instructions"),
ADD_STAT(icacheWaitRetryStallCycles,
"Number of stall cycles due to full MSHR"),
ADD_STAT(cacheLines, "Number of cache lines fetched"),
ADD_STAT(icacheSquashes,
"Number of outstanding Icache misses that were squashed"),
ADD_STAT(tlbSquashes,
"Number of outstanding ITLB misses that were squashed"),
ADD_STAT(nisnDist,
"Number of instructions fetched each cycle (Total)"),
ADD_STAT(idleRate, "Percent of cycles fetch was idle",
idleCycles * 100 / cpu->numCycles),
ADD_STAT(branchRate, "Number of branch fetches per cycle",
branches / cpu->numCycles),
ADD_STAT(rate, "Number of inst fetches per cycle",
insts / cpu->numCycles)
{
icacheStallCycles
.name(name() + ".icacheStallCycles")
.desc("Number of cycles fetch is stalled on an Icache miss")
.prereq(icacheStallCycles);
fetchedInsts
.name(name() + ".Insts")
.desc("Number of instructions fetch has processed")
.prereq(fetchedInsts);
fetchedBranches
.name(name() + ".Branches")
.desc("Number of branches that fetch encountered")
.prereq(fetchedBranches);
predictedBranches
.name(name() + ".predictedBranches")
.desc("Number of branches that fetch has predicted taken")
.prereq(predictedBranches);
fetchCycles
.name(name() + ".Cycles")
.desc("Number of cycles fetch has run and was not squashing or"
" blocked")
.prereq(fetchCycles);
fetchSquashCycles
.name(name() + ".SquashCycles")
.desc("Number of cycles fetch has spent squashing")
.prereq(fetchSquashCycles);
fetchTlbCycles
.name(name() + ".TlbCycles")
.desc("Number of cycles fetch has spent waiting for tlb")
.prereq(fetchTlbCycles);
fetchIdleCycles
.name(name() + ".IdleCycles")
.desc("Number of cycles fetch was idle")
.prereq(fetchIdleCycles);
fetchBlockedCycles
.name(name() + ".BlockedCycles")
.desc("Number of cycles fetch has spent blocked")
.prereq(fetchBlockedCycles);
fetchedCacheLines
.name(name() + ".CacheLines")
.desc("Number of cache lines fetched")
.prereq(fetchedCacheLines);
fetchMiscStallCycles
.name(name() + ".MiscStallCycles")
.desc("Number of cycles fetch has spent waiting on interrupts, or "
"bad addresses, or out of MSHRs")
.prereq(fetchMiscStallCycles);
fetchPendingDrainCycles
.name(name() + ".PendingDrainCycles")
.desc("Number of cycles fetch has spent waiting on pipes to drain")
.prereq(fetchPendingDrainCycles);
fetchNoActiveThreadStallCycles
.name(name() + ".NoActiveThreadStallCycles")
.desc("Number of stall cycles due to no active thread to fetch from")
.prereq(fetchNoActiveThreadStallCycles);
fetchPendingTrapStallCycles
.name(name() + ".PendingTrapStallCycles")
.desc("Number of stall cycles due to pending traps")
.prereq(fetchPendingTrapStallCycles);
fetchPendingQuiesceStallCycles
.name(name() + ".PendingQuiesceStallCycles")
.desc("Number of stall cycles due to pending quiesce instructions")
.prereq(fetchPendingQuiesceStallCycles);
fetchIcacheWaitRetryStallCycles
.name(name() + ".IcacheWaitRetryStallCycles")
.desc("Number of stall cycles due to full MSHR")
.prereq(fetchIcacheWaitRetryStallCycles);
fetchIcacheSquashes
.name(name() + ".IcacheSquashes")
.desc("Number of outstanding Icache misses that were squashed")
.prereq(fetchIcacheSquashes);
fetchTlbSquashes
.name(name() + ".ItlbSquashes")
.desc("Number of outstanding ITLB misses that were squashed")
.prereq(fetchTlbSquashes);
fetchNisnDist
.init(/* base value */ 0,
/* last value */ fetchWidth,
icacheStallCycles
.prereq(icacheStallCycles);
insts
.prereq(insts);
branches
.prereq(branches);
predictedBranches
.prereq(predictedBranches);
cycles
.prereq(cycles);
squashCycles
.prereq(squashCycles);
tlbCycles
.prereq(tlbCycles);
idleCycles
.prereq(idleCycles);
blockedCycles
.prereq(blockedCycles);
cacheLines
.prereq(cacheLines);
miscStallCycles
.prereq(miscStallCycles);
pendingDrainCycles
.prereq(pendingDrainCycles);
noActiveThreadStallCycles
.prereq(noActiveThreadStallCycles);
pendingTrapStallCycles
.prereq(pendingTrapStallCycles);
pendingQuiesceStallCycles
.prereq(pendingQuiesceStallCycles);
icacheWaitRetryStallCycles
.prereq(icacheWaitRetryStallCycles);
icacheSquashes
.prereq(icacheSquashes);
tlbSquashes
.prereq(tlbSquashes);
nisnDist
.init(/* base value */ 0,
/* last value */ fetch->fetchWidth,
/* bucket size */ 1)
.name(name() + ".rateDist")
.desc("Number of instructions fetched each cycle (Total)")
.flags(Stats::pdf);
idleRate
.name(name() + ".idleRate")
.desc("Percent of cycles fetch was idle")
.prereq(idleRate);
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
branchRate
.name(name() + ".branchRate")
.desc("Number of branch fetches per cycle")
.flags(Stats::total);
branchRate = fetchedBranches / cpu->numCycles;
fetchRate
.name(name() + ".rate")
.desc("Number of inst fetches per cycle")
.flags(Stats::total);
fetchRate = fetchedInsts / cpu->numCycles;
.flags(Stats::pdf);
idleRate
.prereq(idleRate);
branchRate
.flags(Stats::total);
rate
.flags(Stats::total);
}
template<class Impl>
void
DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
@@ -393,7 +362,7 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
// to return.
if (fetchStatus[tid] != IcacheWaitResponse ||
pkt->req != memReq[tid]) {
++fetchIcacheSquashes;
++fetchStats.icacheSquashes;
delete pkt;
return;
}
@@ -586,10 +555,10 @@ DefaultFetch<Impl>::lookupAndUpdateNextPC(
inst->setPredTarg(nextPC);
inst->setPredTaken(predict_taken);
++fetchedBranches;
++fetchStats.branches;
if (predict_taken) {
++predictedBranches;
++fetchStats.predictedBranches;
}
return predict_taken;
@@ -662,7 +631,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
mem_req->getVaddr() != memReq[tid]->getVaddr()) {
DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
tid);
++fetchTlbSquashes;
++fetchStats.tlbSquashes;
return;
}
@@ -688,7 +657,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
fetchBufferValid[tid] = false;
DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
fetchedCacheLines++;
fetchStats.cacheLines++;
// Access the cache.
if (!icachePort.sendTimingReq(data_pkt)) {
@@ -801,7 +770,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
// some opportunities to handle interrupts may be missed.
delayedCommit[tid] = true;
++fetchSquashCycles;
++fetchStats.squashCycles;
}
template<class Impl>
@@ -930,7 +899,7 @@ DefaultFetch<Impl>::tick()
}
// Record number of instructions fetched this cycle for distribution.
fetchNisnDist.sample(numInst);
fetchStats.nisnDist.sample(numInst);
if (status_change) {
// Change the fetch stage status if there was a status change.
@@ -1202,23 +1171,23 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
if (fetchStatus[tid] == IcacheWaitResponse)
++icacheStallCycles;
++fetchStats.icacheStallCycles;
else if (fetchStatus[tid] == ItlbWait)
++fetchTlbCycles;
++fetchStats.tlbCycles;
else
++fetchMiscStallCycles;
++fetchStats.miscStallCycles;
return;
} else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
// Stall CPU if an interrupt is posted and we're not issuing
// an delayed commit micro-op currently (delayed commit instructions
// are not interruptable by interrupts, only faults)
++fetchMiscStallCycles;
++fetchStats.miscStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
return;
}
} else {
if (fetchStatus[tid] == Idle) {
++fetchIdleCycles;
++fetchStats.idleCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
}
@@ -1226,7 +1195,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
return;
}
++fetchCycles;
++fetchStats.cycles;
TheISA::PCState nextPC = thisPC;
@@ -1296,7 +1265,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
staticInst = decoder[tid]->decode(thisPC);
// Increment stat of fetched instructions.
++fetchedInsts;
++fetchStats.insts;
if (staticInst->isMacroop()) {
curMacroop = staticInst;
@@ -1625,35 +1594,35 @@ DefaultFetch<Impl>::profileStall(ThreadID tid) {
// @todo Per-thread stats
if (stalls[tid].drain) {
++fetchPendingDrainCycles;
++fetchStats.pendingDrainCycles;
DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
} else if (activeThreads->empty()) {
++fetchNoActiveThreadStallCycles;
++fetchStats.noActiveThreadStallCycles;
DPRINTF(Fetch, "Fetch has no active thread!\n");
} else if (fetchStatus[tid] == Blocked) {
++fetchBlockedCycles;
++fetchStats.blockedCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
} else if (fetchStatus[tid] == Squashing) {
++fetchSquashCycles;
++fetchStats.squashCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitResponse) {
++icacheStallCycles;
++fetchStats.icacheStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
tid);
} else if (fetchStatus[tid] == ItlbWait) {
++fetchTlbCycles;
++fetchStats.tlbCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
"finish!\n", tid);
} else if (fetchStatus[tid] == TrapPending) {
++fetchPendingTrapStallCycles;
++fetchStats.pendingTrapStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
tid);
} else if (fetchStatus[tid] == QuiescePending) {
++fetchPendingQuiesceStallCycles;
++fetchStats.pendingQuiesceStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
"instruction!\n", tid);
} else if (fetchStatus[tid] == IcacheWaitRetry) {
++fetchIcacheWaitRetryStallCycles;
++fetchStats.icacheWaitRetryStallCycles;
DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
tid);
} else if (fetchStatus[tid] == NoGoodAddr) {