diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index e485aa6161..3e0b8070fd 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -2436,15 +2436,15 @@ ComputeUnit::ComputeUnitStats::ComputeUnitStats(statistics::Group *parent, instCyclesLdsPerSimd.init(cu->numVectorALUs); hitsPerTLBLevel.init(4); - execRateDist.init(0, 10, 2); - ldsBankConflictDist.init(0, cu->wfSize(), 2); + execRateDist.init(0, 10-1, 2); + ldsBankConflictDist.init(0, cu->wfSize()-1, 2); pageDivergenceDist.init(1, cu->wfSize(), 4); controlFlowDivergenceDist.init(1, cu->wfSize(), 4); activeLanesPerGMemInstrDist.init(1, cu->wfSize(), 4); activeLanesPerLMemInstrDist.init(1, cu->wfSize(), 4); - headTailLatency.init(0, 1000000, 10000).flags(statistics::pdf | + headTailLatency.init(0, 1000000-1, 10000).flags(statistics::pdf | statistics::oneline); waveLevelParallelism.init(0, n_wf * cu->numVectorALUs, 1); instInterleave.init(cu->numVectorALUs, 0, 20, 1); diff --git a/src/gpu-compute/exec_stage.cc b/src/gpu-compute/exec_stage.cc index bcba938cd8..f2b847c8a7 100644 --- a/src/gpu-compute/exec_stage.cc +++ b/src/gpu-compute/exec_stage.cc @@ -216,7 +216,7 @@ ExecStage::ExecStageStats::ExecStageStats(statistics::Group *parent) ComputeUnit *compute_unit = static_cast(parent); spc.init(0, compute_unit->numExeUnits(), 1); - idleDur.init(0, 75, 5); + idleDur.init(0, 75-1, 5); numCyclesWithInstrTypeIssued.init(compute_unit->numExeUnits()); numCyclesWithNoInstrTypeIssued.init(compute_unit->numExeUnits()); diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index 6e3d556026..792ecb6678 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -584,31 +584,31 @@ Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size) "vector instruction destination operand distribution") { allLatencyDist - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); loadLatencyDist - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); storeLatencyDist - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); initToCoalesceLatency - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); rubyNetworkLatency - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); gmEnqueueLatency - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); gmToCompleteLatency - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .flags(statistics::pdf | statistics::oneline); coalsrLineAddresses @@ -624,7 +624,7 @@ Shader::ShaderStats::ShaderStats(statistics::Group *parent, int wf_size) ccprintf(namestr, "%s.cacheBlockRoundTrip%d", static_cast(parent)->name(), idx); cacheBlockRoundTrip[idx] - .init(0, 1600000, 10000) + .init(0, 1600000-1, 10000) .name(namestr.str()) .desc("Coalsr-to-coalsr time for the Nth cache block in an inst") .flags(statistics::pdf | statistics::oneline);