base-stats,misc: Rename Stats namespace as statistics

As part of recent decisions regarding namespace
naming conventions, all namespaces will be changed
to snake case.

::Stats became ::statistics.

"statistics" was chosen over "stats" to avoid generating
conflicts with the already existing variables (there are
way too many "stats" in the codebase), which would make
this patch even more disturbing for the users.

Change-Id: If877b12d7dac356f86e3b3d941bf7558a4fd8719
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/45421
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Daniel R. Carvalho
2021-05-06 20:00:51 -03:00
committed by Daniel Carvalho
parent fa505f1c23
commit 98ac080ec4
228 changed files with 3078 additions and 2970 deletions

View File

@@ -2104,8 +2104,9 @@ ComputeUnit::LDSPort::recvReqRetry()
}
}
ComputeUnit::ComputeUnitStats::ComputeUnitStats(Stats::Group *parent, int n_wf)
: Stats::Group(parent),
ComputeUnit::ComputeUnitStats::ComputeUnitStats(statistics::Group *parent,
int n_wf)
: statistics::Group(parent),
ADD_STAT(vALUInsts, "Number of vector ALU insts issued."),
ADD_STAT(vALUInstsPerWF, "The avg. number of vector ALU insts issued "
"per-wavefront."),
@@ -2290,7 +2291,8 @@ ComputeUnit::ComputeUnitStats::ComputeUnitStats(Stats::Group *parent, int n_wf)
activeLanesPerGMemInstrDist.init(1, cu->wfSize(), 4);
activeLanesPerLMemInstrDist.init(1, cu->wfSize(), 4);
headTailLatency.init(0, 1000000, 10000).flags(Stats::pdf | Stats::oneline);
headTailLatency.init(0, 1000000, 10000).flags(statistics::pdf |
statistics::oneline);
waveLevelParallelism.init(0, n_wf * cu->numVectorALUs, 1);
instInterleave.init(cu->numVectorALUs, 0, 20, 1);

View File

@@ -939,152 +939,152 @@ class ComputeUnit : public ClockedObject
void updateInstStats(GPUDynInstPtr gpuDynInst);
int activeWaves;
struct ComputeUnitStats : public Stats::Group
struct ComputeUnitStats : public statistics::Group
{
ComputeUnitStats(Stats::Group *parent, int n_wf);
ComputeUnitStats(statistics::Group *parent, int n_wf);
Stats::Scalar vALUInsts;
Stats::Formula vALUInstsPerWF;
Stats::Scalar sALUInsts;
Stats::Formula sALUInstsPerWF;
Stats::Scalar instCyclesVALU;
Stats::Scalar instCyclesSALU;
Stats::Scalar threadCyclesVALU;
Stats::Formula vALUUtilization;
Stats::Scalar ldsNoFlatInsts;
Stats::Formula ldsNoFlatInstsPerWF;
Stats::Scalar flatVMemInsts;
Stats::Formula flatVMemInstsPerWF;
Stats::Scalar flatLDSInsts;
Stats::Formula flatLDSInstsPerWF;
Stats::Scalar vectorMemWrites;
Stats::Formula vectorMemWritesPerWF;
Stats::Scalar vectorMemReads;
Stats::Formula vectorMemReadsPerWF;
Stats::Scalar scalarMemWrites;
Stats::Formula scalarMemWritesPerWF;
Stats::Scalar scalarMemReads;
Stats::Formula scalarMemReadsPerWF;
statistics::Scalar vALUInsts;
statistics::Formula vALUInstsPerWF;
statistics::Scalar sALUInsts;
statistics::Formula sALUInstsPerWF;
statistics::Scalar instCyclesVALU;
statistics::Scalar instCyclesSALU;
statistics::Scalar threadCyclesVALU;
statistics::Formula vALUUtilization;
statistics::Scalar ldsNoFlatInsts;
statistics::Formula ldsNoFlatInstsPerWF;
statistics::Scalar flatVMemInsts;
statistics::Formula flatVMemInstsPerWF;
statistics::Scalar flatLDSInsts;
statistics::Formula flatLDSInstsPerWF;
statistics::Scalar vectorMemWrites;
statistics::Formula vectorMemWritesPerWF;
statistics::Scalar vectorMemReads;
statistics::Formula vectorMemReadsPerWF;
statistics::Scalar scalarMemWrites;
statistics::Formula scalarMemWritesPerWF;
statistics::Scalar scalarMemReads;
statistics::Formula scalarMemReadsPerWF;
Stats::Formula vectorMemReadsPerKiloInst;
Stats::Formula vectorMemWritesPerKiloInst;
Stats::Formula vectorMemInstsPerKiloInst;
Stats::Formula scalarMemReadsPerKiloInst;
Stats::Formula scalarMemWritesPerKiloInst;
Stats::Formula scalarMemInstsPerKiloInst;
statistics::Formula vectorMemReadsPerKiloInst;
statistics::Formula vectorMemWritesPerKiloInst;
statistics::Formula vectorMemInstsPerKiloInst;
statistics::Formula scalarMemReadsPerKiloInst;
statistics::Formula scalarMemWritesPerKiloInst;
statistics::Formula scalarMemInstsPerKiloInst;
// Cycles required to send register source (addr and data) from
// register files to memory pipeline, per SIMD.
Stats::Vector instCyclesVMemPerSimd;
Stats::Vector instCyclesScMemPerSimd;
Stats::Vector instCyclesLdsPerSimd;
statistics::Vector instCyclesVMemPerSimd;
statistics::Vector instCyclesScMemPerSimd;
statistics::Vector instCyclesLdsPerSimd;
Stats::Scalar globalReads;
Stats::Scalar globalWrites;
Stats::Formula globalMemInsts;
Stats::Scalar argReads;
Stats::Scalar argWrites;
Stats::Formula argMemInsts;
Stats::Scalar spillReads;
Stats::Scalar spillWrites;
Stats::Formula spillMemInsts;
Stats::Scalar groupReads;
Stats::Scalar groupWrites;
Stats::Formula groupMemInsts;
Stats::Scalar privReads;
Stats::Scalar privWrites;
Stats::Formula privMemInsts;
Stats::Scalar readonlyReads;
Stats::Scalar readonlyWrites;
Stats::Formula readonlyMemInsts;
Stats::Scalar kernargReads;
Stats::Scalar kernargWrites;
Stats::Formula kernargMemInsts;
statistics::Scalar globalReads;
statistics::Scalar globalWrites;
statistics::Formula globalMemInsts;
statistics::Scalar argReads;
statistics::Scalar argWrites;
statistics::Formula argMemInsts;
statistics::Scalar spillReads;
statistics::Scalar spillWrites;
statistics::Formula spillMemInsts;
statistics::Scalar groupReads;
statistics::Scalar groupWrites;
statistics::Formula groupMemInsts;
statistics::Scalar privReads;
statistics::Scalar privWrites;
statistics::Formula privMemInsts;
statistics::Scalar readonlyReads;
statistics::Scalar readonlyWrites;
statistics::Formula readonlyMemInsts;
statistics::Scalar kernargReads;
statistics::Scalar kernargWrites;
statistics::Formula kernargMemInsts;
Stats::Distribution waveLevelParallelism;
statistics::Distribution waveLevelParallelism;
// the following stats compute the avg. TLB accesslatency per
// uncoalesced request (only for data)
Stats::Scalar tlbRequests;
Stats::Scalar tlbCycles;
Stats::Formula tlbLatency;
statistics::Scalar tlbRequests;
statistics::Scalar tlbCycles;
statistics::Formula tlbLatency;
// hitsPerTLBLevel[x] are the hits in Level x TLB.
// x = 0 is the page table.
Stats::Vector hitsPerTLBLevel;
statistics::Vector hitsPerTLBLevel;
Stats::Scalar ldsBankAccesses;
Stats::Distribution ldsBankConflictDist;
statistics::Scalar ldsBankAccesses;
statistics::Distribution ldsBankConflictDist;
// over all memory instructions executed over all wavefronts
// how many touched 0-4 pages, 4-8, ..., 60-64 pages
Stats::Distribution pageDivergenceDist;
statistics::Distribution pageDivergenceDist;
// count of non-flat global memory vector instructions executed
Stats::Scalar dynamicGMemInstrCnt;
statistics::Scalar dynamicGMemInstrCnt;
// count of flat global memory vector instructions executed
Stats::Scalar dynamicFlatMemInstrCnt;
Stats::Scalar dynamicLMemInstrCnt;
statistics::Scalar dynamicFlatMemInstrCnt;
statistics::Scalar dynamicLMemInstrCnt;
Stats::Scalar wgBlockedDueBarrierAllocation;
Stats::Scalar wgBlockedDueLdsAllocation;
statistics::Scalar wgBlockedDueBarrierAllocation;
statistics::Scalar wgBlockedDueLdsAllocation;
// Number of instructions executed, i.e. if 64 (or 32 or 7) lanes are
// active when the instruction is committed, this number is still
// incremented by 1
Stats::Scalar numInstrExecuted;
statistics::Scalar numInstrExecuted;
// Number of cycles among successive instruction executions across all
// wavefronts of the same CU
Stats::Distribution execRateDist;
statistics::Distribution execRateDist;
// number of individual vector operations executed
Stats::Scalar numVecOpsExecuted;
statistics::Scalar numVecOpsExecuted;
// number of individual f16 vector operations executed
Stats::Scalar numVecOpsExecutedF16;
statistics::Scalar numVecOpsExecutedF16;
// number of individual f32 vector operations executed
Stats::Scalar numVecOpsExecutedF32;
statistics::Scalar numVecOpsExecutedF32;
// number of individual f64 vector operations executed
Stats::Scalar numVecOpsExecutedF64;
statistics::Scalar numVecOpsExecutedF64;
// number of individual FMA 16,32,64 vector operations executed
Stats::Scalar numVecOpsExecutedFMA16;
Stats::Scalar numVecOpsExecutedFMA32;
Stats::Scalar numVecOpsExecutedFMA64;
statistics::Scalar numVecOpsExecutedFMA16;
statistics::Scalar numVecOpsExecutedFMA32;
statistics::Scalar numVecOpsExecutedFMA64;
// number of individual MAC 16,32,64 vector operations executed
Stats::Scalar numVecOpsExecutedMAC16;
Stats::Scalar numVecOpsExecutedMAC32;
Stats::Scalar numVecOpsExecutedMAC64;
statistics::Scalar numVecOpsExecutedMAC16;
statistics::Scalar numVecOpsExecutedMAC32;
statistics::Scalar numVecOpsExecutedMAC64;
// number of individual MAD 16,32,64 vector operations executed
Stats::Scalar numVecOpsExecutedMAD16;
Stats::Scalar numVecOpsExecutedMAD32;
Stats::Scalar numVecOpsExecutedMAD64;
statistics::Scalar numVecOpsExecutedMAD16;
statistics::Scalar numVecOpsExecutedMAD32;
statistics::Scalar numVecOpsExecutedMAD64;
// total number of two op FP vector operations executed
Stats::Scalar numVecOpsExecutedTwoOpFP;
statistics::Scalar numVecOpsExecutedTwoOpFP;
// Total cycles that something is running on the GPU
Stats::Scalar totalCycles;
Stats::Formula vpc; // vector ops per cycle
Stats::Formula vpc_f16; // vector ops per cycle
Stats::Formula vpc_f32; // vector ops per cycle
Stats::Formula vpc_f64; // vector ops per cycle
Stats::Formula ipc; // vector instructions per cycle
Stats::Distribution controlFlowDivergenceDist;
Stats::Distribution activeLanesPerGMemInstrDist;
Stats::Distribution activeLanesPerLMemInstrDist;
statistics::Scalar totalCycles;
statistics::Formula vpc; // vector ops per cycle
statistics::Formula vpc_f16; // vector ops per cycle
statistics::Formula vpc_f32; // vector ops per cycle
statistics::Formula vpc_f64; // vector ops per cycle
statistics::Formula ipc; // vector instructions per cycle
statistics::Distribution controlFlowDivergenceDist;
statistics::Distribution activeLanesPerGMemInstrDist;
statistics::Distribution activeLanesPerLMemInstrDist;
// number of vector ALU instructions received
Stats::Formula numALUInstsExecuted;
statistics::Formula numALUInstsExecuted;
// number of times a WG cannot start due to lack of free VGPRs in SIMDs
Stats::Scalar numTimesWgBlockedDueVgprAlloc;
statistics::Scalar numTimesWgBlockedDueVgprAlloc;
// number of times a WG cannot start due to lack of free SGPRs in SIMDs
Stats::Scalar numTimesWgBlockedDueSgprAlloc;
Stats::Scalar numCASOps;
Stats::Scalar numFailedCASOps;
Stats::Scalar completedWfs;
Stats::Scalar completedWGs;
statistics::Scalar numTimesWgBlockedDueSgprAlloc;
statistics::Scalar numCASOps;
statistics::Scalar numFailedCASOps;
statistics::Scalar completedWfs;
statistics::Scalar completedWGs;
// distrubtion in latency difference between first and last cache block
// arrival ticks
Stats::Distribution headTailLatency;
statistics::Distribution headTailLatency;
// Track the amount of interleaving between wavefronts on each SIMD.
// This stat is sampled using instExecPerSimd to compute the number
// of instructions that have been executed on a SIMD between a WF
// executing two successive instructions.
Stats::VectorDistribution instInterleave;
statistics::VectorDistribution instInterleave;
} stats;
};

View File

@@ -344,8 +344,9 @@ GPUDispatcher::scheduleDispatch()
}
}
GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats(Stats::Group *parent)
: Stats::Group(parent),
GPUDispatcher::GPUDispatcherStats::GPUDispatcherStats(
statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(numKernelLaunched, "number of kernel launched"),
ADD_STAT(cyclesWaitingForDispatch, "number of cycles with outstanding "
"wavefronts that are waiting to be dispatched")

View File

@@ -93,12 +93,12 @@ class GPUDispatcher : public SimObject
bool dispatchActive;
protected:
struct GPUDispatcherStats : public Stats::Group
struct GPUDispatcherStats : public statistics::Group
{
GPUDispatcherStats(Stats::Group *parent);
GPUDispatcherStats(statistics::Group *parent);
Stats::Scalar numKernelLaunched;
Stats::Scalar cyclesWaitingForDispatch;
statistics::Scalar numKernelLaunched;
statistics::Scalar cyclesWaitingForDispatch;
} stats;
};

View File

@@ -197,8 +197,8 @@ ExecStage::exec()
collectStatistics(PostExec, 0);
}
ExecStage::ExecStageStats::ExecStageStats(Stats::Group *parent)
: Stats::Group(parent, "ExecStage"),
ExecStage::ExecStageStats::ExecStageStats(statistics::Group *parent)
: statistics::Group(parent, "ExecStage"),
ADD_STAT(numTransActiveIdle,
"number of CU transitions from active to idle"),
ADD_STAT(numCyclesWithNoIssue, "number of cycles the CU issues nothing"),

View File

@@ -97,27 +97,27 @@ class ExecStage
const std::string _name;
protected:
struct ExecStageStats : public Stats::Group
struct ExecStageStats : public statistics::Group
{
ExecStageStats(Stats::Group *parent);
ExecStageStats(statistics::Group *parent);
// number of transitions from active to idle
Stats::Scalar numTransActiveIdle;
statistics::Scalar numTransActiveIdle;
// number of idle cycles
Stats::Scalar numCyclesWithNoIssue;
statistics::Scalar numCyclesWithNoIssue;
// number of busy cycles
Stats::Scalar numCyclesWithInstrIssued;
statistics::Scalar numCyclesWithInstrIssued;
// SIMDs active per cycle
Stats::Distribution spc;
statistics::Distribution spc;
// duration of idle periods in cycles
Stats::Distribution idleDur;
statistics::Distribution idleDur;
// number of cycles during which at least one
// instruction was issued to an execution resource type
Stats::Vector numCyclesWithInstrTypeIssued;
statistics::Vector numCyclesWithInstrTypeIssued;
// number of idle cycles during which the scheduler
// issued no instructions targeting a specific
// execution resource type
Stats::Vector numCyclesWithNoInstrTypeIssued;
statistics::Vector numCyclesWithNoInstrTypeIssued;
} stats;
};

View File

@@ -90,8 +90,8 @@ FetchStage::fetch(PacketPtr pkt, Wavefront *wavefront)
_fetchUnit[wavefront->simdId].fetch(pkt, wavefront);
}
FetchStage::FetchStageStats::FetchStageStats(Stats::Group *parent)
: Stats::Group(parent, "FetchStage"),
FetchStage::FetchStageStats::FetchStageStats(statistics::Group *parent)
: statistics::Group(parent, "FetchStage"),
ADD_STAT(instFetchInstReturned, "For each instruction fetch request "
"received record how many instructions you got from it")
{

View File

@@ -74,11 +74,11 @@ class FetchStage
const std::string _name;
protected:
struct FetchStageStats : public Stats::Group
struct FetchStageStats : public statistics::Group
{
FetchStageStats(Stats::Group *parent);
FetchStageStats(statistics::Group *parent);
Stats::Distribution instFetchInstReturned;
statistics::Distribution instFetchInstReturned;
} stats;
};

View File

@@ -290,8 +290,8 @@ GlobalMemPipeline::handleResponse(GPUDynInstPtr gpuDynInst)
}
GlobalMemPipeline::
GlobalMemPipelineStats::GlobalMemPipelineStats(Stats::Group *parent)
: Stats::Group(parent, "GlobalMemPipeline"),
GlobalMemPipelineStats::GlobalMemPipelineStats(statistics::Group *parent)
: statistics::Group(parent, "GlobalMemPipeline"),
ADD_STAT(loadVrfBankConflictCycles, "total number of cycles GM data "
"are delayed before updating the VRF")
{

View File

@@ -143,14 +143,14 @@ class GlobalMemPipeline
std::queue<GPUDynInstPtr> gmIssuedRequests;
protected:
struct GlobalMemPipelineStats : public Stats::Group
struct GlobalMemPipelineStats : public statistics::Group
{
GlobalMemPipelineStats(Stats::Group *parent);
GlobalMemPipelineStats(statistics::Group *parent);
// number of cycles of delaying the update of a VGPR that is the
// target of a load instruction (or the load component of an atomic)
// The delay is due to VRF bank conflicts
Stats::Scalar loadVrfBankConflictCycles;
statistics::Scalar loadVrfBankConflictCycles;
} stats;
};

View File

@@ -1430,8 +1430,8 @@ namespace X86ISA
TLBFootprint.clear();
}
GpuTLB::GpuTLBStats::GpuTLBStats(Stats::Group *parent)
: Stats::Group(parent),
GpuTLB::GpuTLBStats::GpuTLBStats(statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(localNumTLBAccesses, "Number of TLB accesses"),
ADD_STAT(localNumTLBHits, "Number of TLB hits"),
ADD_STAT(localNumTLBMisses, "Number of TLB misses"),

View File

@@ -400,37 +400,37 @@ namespace X86ISA
EventFunctionWrapper exitEvent;
protected:
struct GpuTLBStats : public Stats::Group
struct GpuTLBStats : public statistics::Group
{
GpuTLBStats(Stats::Group *parent);
GpuTLBStats(statistics::Group *parent);
// local_stats are as seen from the TLB
// without taking into account coalescing
Stats::Scalar localNumTLBAccesses;
Stats::Scalar localNumTLBHits;
Stats::Scalar localNumTLBMisses;
Stats::Formula localTLBMissRate;
statistics::Scalar localNumTLBAccesses;
statistics::Scalar localNumTLBHits;
statistics::Scalar localNumTLBMisses;
statistics::Formula localTLBMissRate;
// global_stats are as seen from the
// CU's perspective taking into account
// all coalesced requests.
Stats::Scalar globalNumTLBAccesses;
Stats::Scalar globalNumTLBHits;
Stats::Scalar globalNumTLBMisses;
Stats::Formula globalTLBMissRate;
statistics::Scalar globalNumTLBAccesses;
statistics::Scalar globalNumTLBHits;
statistics::Scalar globalNumTLBMisses;
statistics::Formula globalTLBMissRate;
// from the CU perspective (global)
Stats::Scalar accessCycles;
statistics::Scalar accessCycles;
// from the CU perspective (global)
Stats::Scalar pageTableCycles;
Stats::Scalar numUniquePages;
statistics::Scalar pageTableCycles;
statistics::Scalar numUniquePages;
// from the perspective of this TLB
Stats::Scalar localCycles;
statistics::Scalar localCycles;
// from the perspective of this TLB
Stats::Formula localLatency;
statistics::Formula localLatency;
// I take the avg. per page and then
// the avg. over all pages.
Stats::Scalar avgReuseDistance;
statistics::Scalar avgReuseDistance;
} stats;
};
}

View File

@@ -126,8 +126,8 @@ LocalMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)
LocalMemPipeline::
LocalMemPipelineStats::LocalMemPipelineStats(Stats::Group *parent)
: Stats::Group(parent, "LocalMemPipeline"),
LocalMemPipelineStats::LocalMemPipelineStats(statistics::Group *parent)
: statistics::Group(parent, "LocalMemPipeline"),
ADD_STAT(loadVrfBankConflictCycles, "total number of cycles LDS data "
"are delayed before updating the VRF")
{

View File

@@ -97,11 +97,11 @@ class LocalMemPipeline
std::queue<GPUDynInstPtr> lmReturnedRequests;
protected:
struct LocalMemPipelineStats : public Stats::Group
struct LocalMemPipelineStats : public statistics::Group
{
LocalMemPipelineStats(Stats::Group *parent);
LocalMemPipelineStats(statistics::Group *parent);
Stats::Scalar loadVrfBankConflictCycles;
statistics::Scalar loadVrfBankConflictCycles;
} stats;
};

View File

@@ -189,8 +189,8 @@ RegisterFile::dispatchInstruction(GPUDynInstPtr ii)
{
}
RegisterFile::RegisterFileStats::RegisterFileStats(Stats::Group *parent)
: Stats::Group(parent),
RegisterFile::RegisterFileStats::RegisterFileStats(statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(registerReads,
"Total number of DWORDs read from register file"),
ADD_STAT(registerWrites,

View File

@@ -151,21 +151,21 @@ class RegisterFile : public SimObject
// numer of registers in this register file
int _numRegs;
struct RegisterFileStats : public Stats::Group
struct RegisterFileStats : public statistics::Group
{
RegisterFileStats(Stats::Group *parent);
RegisterFileStats(statistics::Group *parent);
// Total number of register reads per DWORD per thread
Stats::Scalar registerReads;
statistics::Scalar registerReads;
// Total number of register writes per DWORD per thread
Stats::Scalar registerWrites;
statistics::Scalar registerWrites;
// Number of register file SRAM activations for reads.
// The register file may be implemented with multiple SRAMs. This stat
// tracks how many times the SRAMs are accessed for reads.
Stats::Scalar sramReads;
statistics::Scalar sramReads;
// Number of register file SRAM activations for writes
Stats::Scalar sramWrites;
statistics::Scalar sramWrites;
} stats;
};

View File

@@ -784,9 +784,9 @@ ScheduleStage::deleteFromSch(Wavefront *w)
wavesInSch.erase(w->wfDynId);
}
ScheduleStage::ScheduleStageStats::ScheduleStageStats(Stats::Group *parent,
int num_exec_units)
: Stats::Group(parent, "ScheduleStage"),
ScheduleStage::ScheduleStageStats::ScheduleStageStats(
statistics::Group *parent, int num_exec_units)
: statistics::Group(parent, "ScheduleStage"),
ADD_STAT(rdyListEmpty ,"number of cycles no wave on ready list per "
"execution resource"),
ADD_STAT(rdyListNotEmpty, "number of cycles one or more wave on ready "

View File

@@ -184,49 +184,49 @@ class ScheduleStage
std::vector<std::deque<std::pair<GPUDynInstPtr, SCH_STATUS>>> schList;
protected:
struct ScheduleStageStats : public Stats::Group
struct ScheduleStageStats : public statistics::Group
{
ScheduleStageStats(Stats::Group *parent, int num_exec_units);
ScheduleStageStats(statistics::Group *parent, int num_exec_units);
// Number of cycles with empty (or not empty) readyList, per execution
// resource, when the CU is active (not sleeping)
Stats::Vector rdyListEmpty;
Stats::Vector rdyListNotEmpty;
statistics::Vector rdyListEmpty;
statistics::Vector rdyListNotEmpty;
// Number of cycles, per execution resource, when at least one wave
// was on the readyList and picked by scheduler, but was unable to be
// added to the schList, when the CU is active (not sleeping)
Stats::Vector addToSchListStalls;
statistics::Vector addToSchListStalls;
// Number of cycles, per execution resource, when a wave is selected
// as candidate for dispatchList from schList
// Note: may be arbitrated off dispatchList (e.g., LDS arbitration)
Stats::Vector schListToDispList;
statistics::Vector schListToDispList;
// Per execution resource stat, incremented once per cycle if no wave
// was selected as candidate for dispatch and moved to dispatchList
Stats::Vector schListToDispListStalls;
statistics::Vector schListToDispListStalls;
// Number of times a wave is selected by the scheduler but cannot
// be added to the schList due to register files not being able to
// support reads or writes of operands. RF_ACCESS_NRDY condition is
// always incremented if at least one read/write not supported, other
// conditions are incremented independently from each other.
Stats::Vector rfAccessStalls;
statistics::Vector rfAccessStalls;
// Number of times a wave is executing FLAT instruction and
// forces another wave occupying its required local memory resource
// to be deselected for execution, and placed back on schList
Stats::Scalar ldsBusArbStalls;
statistics::Scalar ldsBusArbStalls;
// Count of times VRF and/or SRF blocks waves on schList from
// performing RFBUSY->RFREADY transition
Stats::Vector opdNrdyStalls;
statistics::Vector opdNrdyStalls;
// Count of times resource required for dispatch is not ready and
// blocks wave in RFREADY state on schList from potentially moving
// to dispatchList
Stats::Vector dispNrdyStalls;
statistics::Vector dispNrdyStalls;
} stats;
};

View File

@@ -277,8 +277,8 @@ ScoreboardCheckStage::exec()
}
ScoreboardCheckStage::
ScoreboardCheckStageStats::ScoreboardCheckStageStats(Stats::Group *parent)
: Stats::Group(parent, "ScoreboardCheckStage"),
ScoreboardCheckStageStats::ScoreboardCheckStageStats(statistics::Group *parent)
: statistics::Group(parent, "ScoreboardCheckStage"),
ADD_STAT(stallCycles, "number of cycles wave stalled in SCB")
{
stallCycles.init(NRDY_CONDITIONS);

View File

@@ -99,11 +99,11 @@ class ScoreboardCheckStage
const std::string _name;
protected:
struct ScoreboardCheckStageStats : public Stats::Group
struct ScoreboardCheckStageStats : public statistics::Group
{
ScoreboardCheckStageStats(Stats::Group *parent);
ScoreboardCheckStageStats(statistics::Group *parent);
Stats::Vector stallCycles;
statistics::Vector stallCycles;
} stats;
};

View File

@@ -522,8 +522,8 @@ Shader::notifyCuSleep() {
stats.shaderActiveTicks += curTick() - _lastInactiveTick;
}
Shader::ShaderStats::ShaderStats(Stats::Group *parent, int wf_size)
: Stats::Group(parent),
Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size)
: statistics::Group(parent),
ADD_STAT(allLatencyDist, "delay distribution for all"),
ADD_STAT(loadLatencyDist, "delay distribution for loads"),
ADD_STAT(storeLatencyDist, "delay distribution for stores"),
@@ -546,40 +546,40 @@ Shader::ShaderStats::ShaderStats(Stats::Group *parent, int wf_size)
{
allLatencyDist
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
loadLatencyDist
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
storeLatencyDist
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
initToCoalesceLatency
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
rubyNetworkLatency
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
gmEnqueueLatency
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
gmToCompleteLatency
.init(0, 1600000, 10000)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
coalsrLineAddresses
.init(0, 20, 1)
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
vectorInstSrcOperand.init(4);
vectorInstDstOperand.init(4);
cacheBlockRoundTrip = new Stats::Distribution[wf_size];
cacheBlockRoundTrip = new statistics::Distribution[wf_size];
for (int idx = 0; idx < wf_size; ++idx) {
std::stringstream namestr;
ccprintf(namestr, "%s.cacheBlockRoundTrip%d",
@@ -588,6 +588,6 @@ Shader::ShaderStats::ShaderStats(Stats::Group *parent, int wf_size)
.init(0, 1600000, 10000)
.name(namestr.str())
.desc("Coalsr-to-coalsr time for the Nth cache block in an inst")
.flags(Stats::pdf | Stats::oneline);
.flags(statistics::pdf | statistics::oneline);
}
}

View File

@@ -281,37 +281,37 @@ class Shader : public ClockedObject
}
protected:
struct ShaderStats : public Stats::Group
struct ShaderStats : public statistics::Group
{
ShaderStats(Stats::Group *parent, int wf_size);
ShaderStats(statistics::Group *parent, int wf_size);
// some stats for measuring latency
Stats::Distribution allLatencyDist;
Stats::Distribution loadLatencyDist;
Stats::Distribution storeLatencyDist;
statistics::Distribution allLatencyDist;
statistics::Distribution loadLatencyDist;
statistics::Distribution storeLatencyDist;
// average ticks from vmem inst initiateAcc to coalescer issue,
Stats::Distribution initToCoalesceLatency;
statistics::Distribution initToCoalesceLatency;
// average ticks from coalescer issue to coalescer hit callback,
Stats::Distribution rubyNetworkLatency;
statistics::Distribution rubyNetworkLatency;
// average ticks from coalescer hit callback to GM pipe enqueue,
Stats::Distribution gmEnqueueLatency;
statistics::Distribution gmEnqueueLatency;
// average ticks spent in GM pipe's ordered resp buffer.
Stats::Distribution gmToCompleteLatency;
statistics::Distribution gmToCompleteLatency;
// average number of cache blocks requested by vmem inst
Stats::Distribution coalsrLineAddresses;
statistics::Distribution coalsrLineAddresses;
// average ticks for cache blocks to main memory for the Nth
// cache block generated by a vmem inst.
Stats::Distribution *cacheBlockRoundTrip;
statistics::Distribution *cacheBlockRoundTrip;
Stats::Scalar shaderActiveTicks;
Stats::Vector vectorInstSrcOperand;
Stats::Vector vectorInstDstOperand;
statistics::Scalar shaderActiveTicks;
statistics::Vector vectorInstSrcOperand;
statistics::Vector vectorInstDstOperand;
} stats;
};

View File

@@ -521,8 +521,8 @@ TLBCoalescer::processCleanupEvent()
}
}
TLBCoalescer::TLBCoalescerStats::TLBCoalescerStats(Stats::Group *parent)
: Stats::Group(parent),
TLBCoalescer::TLBCoalescerStats::TLBCoalescerStats(statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(uncoalescedAccesses, "Number of uncoalesced TLB accesses"),
ADD_STAT(coalescedAccesses, "Number of coalesced TLB accesses"),
ADD_STAT(queuingCycles, "Number of cycles spent in queue"),

View File

@@ -195,26 +195,26 @@ class TLBCoalescer : public ClockedObject
std::queue<Addr> cleanupQueue;
protected:
struct TLBCoalescerStats : public Stats::Group
struct TLBCoalescerStats : public statistics::Group
{
TLBCoalescerStats(Stats::Group *parent);
TLBCoalescerStats(statistics::Group *parent);
// number of packets the coalescer receives
Stats::Scalar uncoalescedAccesses;
statistics::Scalar uncoalescedAccesses;
// number packets the coalescer send to the TLB
Stats::Scalar coalescedAccesses;
statistics::Scalar coalescedAccesses;
// Number of cycles the coalesced requests spend waiting in
// coalescerFIFO. For each packet the coalescer receives we take into
// account the number of all uncoalesced requests this pkt "represents"
Stats::Scalar queuingCycles;
statistics::Scalar queuingCycles;
// On average how much time a request from the
// uncoalescedAccesses that reaches the TLB
// spends waiting?
Stats::Scalar localqueuingCycles;
statistics::Scalar localqueuingCycles;
// localqueuingCycles/uncoalescedAccesses
Stats::Formula localLatency;
statistics::Formula localLatency;
} stats;
};

View File

@@ -1435,8 +1435,8 @@ Wavefront::releaseBarrier()
barId = WFBarrier::InvalidID;
}
Wavefront::WavefrontStats::WavefrontStats(Stats::Group *parent)
: Stats::Group(parent),
Wavefront::WavefrontStats::WavefrontStats(statistics::Group *parent)
: statistics::Group(parent),
ADD_STAT(numInstrExecuted,
"number of instructions executed by this WF slot"),
ADD_STAT(schCycles, "number of cycles spent in schedule stage"),

View File

@@ -330,49 +330,49 @@ class Wavefront : public SimObject
int barId;
public:
struct WavefrontStats : public Stats::Group
struct WavefrontStats : public statistics::Group
{
WavefrontStats(Stats::Group *parent);
WavefrontStats(statistics::Group *parent);
// Number of instructions executed by this wavefront slot across all
// dynamic wavefronts
Stats::Scalar numInstrExecuted;
statistics::Scalar numInstrExecuted;
// Number of cycles this WF spends in SCH stage
Stats::Scalar schCycles;
statistics::Scalar schCycles;
// Number of stall cycles encounterd by this WF in SCH stage
Stats::Scalar schStalls;
statistics::Scalar schStalls;
// The following stats sum to the value of schStalls, and record, per
// WF slot, what the cause of each stall was at a coarse granularity.
// Cycles WF is selected by scheduler, but RFs cannot support
// instruction
Stats::Scalar schRfAccessStalls;
statistics::Scalar schRfAccessStalls;
// Cycles spent waiting for execution resources
Stats::Scalar schResourceStalls;
statistics::Scalar schResourceStalls;
// cycles spent waiting for RF reads to complete in SCH stage
Stats::Scalar schOpdNrdyStalls;
statistics::Scalar schOpdNrdyStalls;
// LDS arbitration stall cycles. WF attempts to execute LM instruction,
// but another wave is executing FLAT, which requires LM and GM and
// forces this WF to stall.
Stats::Scalar schLdsArbStalls;
statistics::Scalar schLdsArbStalls;
// number of times an instruction of a WF is blocked from being issued
// due to WAR and WAW dependencies
Stats::Scalar numTimesBlockedDueWAXDependencies;
statistics::Scalar numTimesBlockedDueWAXDependencies;
// number of times an instruction of a WF is blocked from being issued
// due to WAR and WAW dependencies
Stats::Scalar numTimesBlockedDueRAWDependencies;
statistics::Scalar numTimesBlockedDueRAWDependencies;
// Distribution to track the distance between producer and consumer
// for vector register values
Stats::Distribution vecRawDistance;
statistics::Distribution vecRawDistance;
// Distribution to track the number of times every vector register
// value produced is consumed.
Stats::Distribution readsPerWrite;
statistics::Distribution readsPerWrite;
} stats;
};