arch-vega: Fix output warnings, gem5.fast (#1023)
Fix gem5.fast build not building when using gpu model. Removes very spammy stat distribution bucket size prints when running gpu model.
This commit is contained in:
@@ -517,12 +517,23 @@ namespace VegaISA
|
||||
switch(_opIdx) {
|
||||
case REG_EXEC_LO:
|
||||
{
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
|
||||
execMask().to_ullong();
|
||||
std::memcpy((void*)srfData.data(), (void*)&exec_mask,
|
||||
sizeof(exec_mask));
|
||||
DPRINTF(GPUSRF, "Read EXEC\n");
|
||||
DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
|
||||
if constexpr (NumDwords == 2) {
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
|
||||
execMask().to_ullong();
|
||||
std::memcpy((void*)srfData.data(), (void*)&exec_mask,
|
||||
sizeof(exec_mask));
|
||||
DPRINTF(GPUSRF, "Read EXEC\n");
|
||||
DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
|
||||
} else {
|
||||
ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
|
||||
execMask().to_ullong();
|
||||
|
||||
ScalarRegU32 exec_mask_lo = bits(exec_mask, 31, 0);
|
||||
std::memcpy((void*)srfData.data(),
|
||||
(void*)&exec_mask_lo, sizeof(exec_mask_lo));
|
||||
DPRINTF(GPUSRF, "Read EXEC_LO\n");
|
||||
DPRINTF(GPUSRF, "EXEC_LO = %#x\n", exec_mask_lo);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_EXEC_HI:
|
||||
@@ -550,39 +561,55 @@ namespace VegaISA
|
||||
break;
|
||||
case REG_SHARED_BASE:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_base = cu->shader->ldsApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&shared_base,
|
||||
sizeof(shared_base));
|
||||
DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", shared_base);
|
||||
assert(NumDwords == 2);
|
||||
if constexpr (NumDwords == 2) {
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_base = cu->shader->ldsApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&shared_base,
|
||||
sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n",
|
||||
shared_base);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_SHARED_LIMIT:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
|
||||
std::memcpy((void*)srfData.data(), (void*)&shared_limit,
|
||||
sizeof(shared_limit));
|
||||
DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", shared_limit);
|
||||
assert(NumDwords == 2);
|
||||
if constexpr (NumDwords == 2) {
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
|
||||
std::memcpy((void*)srfData.data(),
|
||||
(void*)&shared_limit, sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n",
|
||||
shared_limit);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_PRIVATE_BASE:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_base = cu->shader->scratchApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_base,
|
||||
sizeof(priv_base));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", priv_base);
|
||||
assert(NumDwords == 2);
|
||||
if constexpr (NumDwords == 2) {
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_base = cu->shader->scratchApe().base;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_base,
|
||||
sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n",
|
||||
priv_base);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_PRIVATE_LIMIT:
|
||||
{
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_limit = cu->shader->scratchApe().limit;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_limit,
|
||||
sizeof(priv_limit));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
|
||||
priv_limit);
|
||||
assert(NumDwords == 2);
|
||||
if constexpr (NumDwords == 2) {
|
||||
ComputeUnit *cu = _gpuDynInst->computeUnit();
|
||||
ScalarRegU64 priv_limit =
|
||||
cu->shader->scratchApe().limit;
|
||||
std::memcpy((void*)srfData.data(), (void*)&priv_limit,
|
||||
sizeof(srfData));
|
||||
DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
|
||||
priv_limit);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case REG_POS_HALF:
|
||||
|
||||
@@ -2476,15 +2476,15 @@ ComputeUnit::ComputeUnitStats::ComputeUnitStats(statistics::Group *parent,
|
||||
instCyclesLdsPerSimd.init(cu->numVectorALUs);
|
||||
|
||||
hitsPerTLBLevel.init(4);
|
||||
execRateDist.init(0, 10, 2);
|
||||
ldsBankConflictDist.init(0, cu->wfSize(), 2);
|
||||
execRateDist.init(0, 10-1, 2);
|
||||
ldsBankConflictDist.init(0, cu->wfSize()-1, 2);
|
||||
|
||||
pageDivergenceDist.init(1, cu->wfSize(), 4);
|
||||
controlFlowDivergenceDist.init(1, cu->wfSize(), 4);
|
||||
activeLanesPerGMemInstrDist.init(1, cu->wfSize(), 4);
|
||||
activeLanesPerLMemInstrDist.init(1, cu->wfSize(), 4);
|
||||
|
||||
headTailLatency.init(0, 1000000, 10000).flags(statistics::pdf |
|
||||
headTailLatency.init(0, 1000000-1, 10000).flags(statistics::pdf |
|
||||
statistics::oneline);
|
||||
waveLevelParallelism.init(0, n_wf * cu->numVectorALUs, 1);
|
||||
instInterleave.init(cu->numVectorALUs, 0, 20, 1);
|
||||
|
||||
@@ -216,7 +216,7 @@ ExecStage::ExecStageStats::ExecStageStats(statistics::Group *parent)
|
||||
ComputeUnit *compute_unit = static_cast<ComputeUnit*>(parent);
|
||||
|
||||
spc.init(0, compute_unit->numExeUnits(), 1);
|
||||
idleDur.init(0, 75, 5);
|
||||
idleDur.init(0, 75-1, 5);
|
||||
numCyclesWithInstrTypeIssued.init(compute_unit->numExeUnits());
|
||||
numCyclesWithNoInstrTypeIssued.init(compute_unit->numExeUnits());
|
||||
|
||||
|
||||
@@ -607,31 +607,31 @@ Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size)
|
||||
"vector instruction destination operand distribution")
|
||||
{
|
||||
allLatencyDist
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
loadLatencyDist
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
storeLatencyDist
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
initToCoalesceLatency
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
rubyNetworkLatency
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
gmEnqueueLatency
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
gmToCompleteLatency
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
coalsrLineAddresses
|
||||
@@ -647,7 +647,7 @@ Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size)
|
||||
ccprintf(namestr, "%s.cacheBlockRoundTrip%d",
|
||||
static_cast<Shader*>(parent)->name(), idx);
|
||||
cacheBlockRoundTrip[idx]
|
||||
.init(0, 1600000, 10000)
|
||||
.init(0, 1600000-1, 10000)
|
||||
.name(namestr.str())
|
||||
.desc("Coalsr-to-coalsr time for the Nth cache block in an inst")
|
||||
.flags(statistics::pdf | statistics::oneline);
|
||||
|
||||
Reference in New Issue
Block a user