mem-ruby: Improve Ruby/CHI stats for in/out trans (#220)

Currently we generate these stats for all defined Events in the
protocol, which may generate too many stats that are never used. Though
these don't appear in the stats.txt file, they unnecessarily increases
simulation startup time and memory footprint.

This patch limits those stats to events with the "in_trans" and/or
"out_trans" properties. SLICC compiler then checks which combinations of
event+state are possible when generating the stats.

Also the possible level of detail for inTransLatHist was reduced.
Only the number of transactions for each event+initial+final state
combinations is now accounted. Latency histograms are only defined per
event type (similarly to outTransLatHist). This significantly reduces
the final file size for generated stats.
This commit is contained in:
Bobby R. Bruce
2023-08-28 15:06:39 -07:00
committed by GitHub
4 changed files with 179 additions and 102 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022 ARM Limited
* Copyright (c) 2021-2023 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -280,37 +280,37 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// Events triggered by sequencer requests or snoops in the rdy queue
// See CHIRequestType in CHi-msg.sm for descriptions
Load, desc="";
Store, desc="";
Prefetch, desc="";
ReadShared, desc="";
ReadNotSharedDirty, desc="";
ReadUnique, desc="";
ReadUnique_PoC, desc="";
ReadOnce, desc="";
CleanUnique, desc="";
Evict, desc="";
WriteBackFull, desc="";
WriteEvictFull, desc="";
WriteCleanFull, desc="";
WriteUnique, desc="";
WriteUniquePtl_PoC, desc="";
WriteUniqueFull_PoC, desc="";
WriteUniqueFull_PoC_Alloc, desc="";
SnpCleanInvalid, desc="";
SnpShared, desc="";
SnpSharedFwd, desc="";
SnpNotSharedDirtyFwd, desc="";
SnpUnique, desc="";
SnpUniqueFwd, desc="";
SnpOnce, desc="";
SnpOnceFwd, desc="";
SnpStalled, desc=""; // A snoop stall triggered from the inport
Load, desc="", in_trans="yes";
Store, desc="", in_trans="yes";
Prefetch, desc="", in_trans="yes";
ReadShared, desc="", in_trans="yes";
ReadNotSharedDirty, desc="", in_trans="yes";
ReadUnique, desc="", in_trans="yes";
ReadUnique_PoC, desc="", in_trans="yes";
ReadOnce, desc="", in_trans="yes";
CleanUnique, desc="", in_trans="yes";
Evict, desc="", in_trans="yes";
WriteBackFull, desc="", in_trans="yes";
WriteEvictFull, desc="", in_trans="yes";
WriteCleanFull, desc="", in_trans="yes";
WriteUnique, desc="", in_trans="yes";
WriteUniquePtl_PoC, desc="", in_trans="yes";
WriteUniqueFull_PoC, desc="", in_trans="yes";
WriteUniqueFull_PoC_Alloc, desc="", in_trans="yes";
SnpCleanInvalid, desc="", in_trans="yes";
SnpShared, desc="", in_trans="yes";
SnpSharedFwd, desc="", in_trans="yes";
SnpNotSharedDirtyFwd, desc="", in_trans="yes";
SnpUnique, desc="", in_trans="yes";
SnpUniqueFwd, desc="", in_trans="yes";
SnpOnce, desc="", in_trans="yes";
SnpOnceFwd, desc="", in_trans="yes";
SnpStalled, desc="", in_trans="yes"; // A snoop stall triggered from the inport
// DVM sequencer requests
DvmTlbi_Initiate, desc=""; // triggered when a CPU core wants to send a TLBI
DvmTlbi_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a TLBI
// TLBIs are handled entirely within Ruby, so there's no ExternCompleted message
DvmSync_Initiate, desc=""; // triggered when a CPU core wants to send a sync
DvmSync_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a sync
DvmSync_ExternCompleted, desc=""; // triggered when an externally requested Sync is completed
// Events triggered by incoming response messages
@@ -344,10 +344,10 @@ machine(MachineType:Cache, "Cache coherency protocol") :
PCrdGrant_PoC_Hazard, desc="";
// Events triggered by incoming DVM messages
SnpDvmOpSync_P1, desc="";
SnpDvmOpSync_P2, desc="";
SnpDvmOpNonSync_P1, desc="";
SnpDvmOpNonSync_P2, desc="";
SnpDvmOpSync_P1, desc="", in_trans="yes";
SnpDvmOpSync_P2, desc="", in_trans="yes";
SnpDvmOpNonSync_P1, desc="", in_trans="yes";
SnpDvmOpNonSync_P2, desc="", in_trans="yes";
// Events triggered by incoming data response messages
// See CHIDataType in CHi-msg.sm for descriptions
@@ -383,20 +383,20 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// A Write or Evict becomes stale when the requester receives a snoop that
// changes the state of the data while the request was pending.
// Actual CHI implementations don't have this check.
Evict_Stale, desc="";
WriteBackFull_Stale, desc="";
WriteEvictFull_Stale, desc="";
WriteCleanFull_Stale, desc="";
CleanUnique_Stale, desc="";
Evict_Stale, desc="", in_trans="yes";
WriteBackFull_Stale, desc="", in_trans="yes";
WriteEvictFull_Stale, desc="", in_trans="yes";
WriteCleanFull_Stale, desc="", in_trans="yes";
CleanUnique_Stale, desc="", in_trans="yes";
// Cache fill handling
CheckCacheFill, desc="Check if need to write or update the cache and trigger any necessary allocation and evictions";
// Internal requests generated to evict or writeback a local copy
// to free-up cache space
Local_Eviction, desc="Evicts/WB the local copy of the line";
LocalHN_Eviction, desc="Local_Eviction triggered when is HN";
Global_Eviction, desc="Local_Eviction + back-invalidate line in all upstream requesters";
Local_Eviction, in_trans="yes", desc="Evicts/WB the local copy of the line";
LocalHN_Eviction, in_trans="yes", desc="Local_Eviction triggered when is HN";
Global_Eviction, in_trans="yes", desc="Local_Eviction + back-invalidate line in all upstream requesters";
// Events triggered from tbe.actions
// In general, for each event we define a single transition from
@@ -425,11 +425,11 @@ machine(MachineType:Cache, "Cache coherency protocol") :
SnpOncePipe, desc="Latency for SnpOnce requests";
// Send a read request downstream.
SendReadShared, desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false";
SendReadOnce, desc="Send a ReadOnce";
SendReadNoSnp, desc="Send a SendReadNoSnp";
SendReadNoSnpDMT, desc="Send a SendReadNoSnp using DMT";
SendReadUnique, desc="Send a ReadUnique";
SendReadShared, out_trans="yes", desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false";
SendReadOnce, out_trans="yes", desc="Send a ReadOnce";
SendReadNoSnp, out_trans="yes", desc="Send a SendReadNoSnp";
SendReadNoSnpDMT, out_trans="yes", desc="Send a SendReadNoSnp using DMT";
SendReadUnique, out_trans="yes", desc="Send a ReadUnique";
SendCompAck, desc="Send CompAck";
// Read handling at the completer
SendCompData, desc="Send CompData";
@@ -437,11 +437,11 @@ machine(MachineType:Cache, "Cache coherency protocol") :
SendRespSepData, desc="Send RespSepData for a DMT request";
// Send a write request downstream.
SendWriteBackOrWriteEvict, desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
SendWriteClean, desc="Send a WriteCleanFull";
SendWriteNoSnp, desc="Send a WriteNoSnp for a full line";
SendWriteNoSnpPartial, desc="Send a WriteNoSnpPtl";
SendWriteUnique, desc="Send a WriteUniquePtl";
SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
SendWriteClean, out_trans="yes", desc="Send a WriteCleanFull";
SendWriteNoSnp, out_trans="yes", desc="Send a WriteNoSnp for a full line";
SendWriteNoSnpPartial, out_trans="yes", desc="Send a WriteNoSnpPtl";
SendWriteUnique, out_trans="yes", desc="Send a WriteUniquePtl";
SendWBData, desc="Send writeback data";
SendWUData, desc="Send write unique data";
SendWUDataCB, desc="Send write unique data from a sequencer callback";
@@ -453,9 +453,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
SendComp_WU, desc="Ack WU completion";
// Dataless requests
SendEvict, desc="Send a Evict";
SendEvict, out_trans="yes", desc="Send a Evict";
SendCompIResp, desc="Ack Evict with Comp_I";
SendCleanUnique,desc="Send a CleanUnique";
SendCleanUnique,out_trans="yes", desc="Send a CleanUnique";
SendCompUCResp, desc="Ack CleanUnique with Comp_UC";
SendCompUCRespStale, desc="Ack stale CleanUnique with Comp_UC";

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022 ARM Limited
* Copyright (c) 2021-2023 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -170,8 +170,8 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op
SendPCrdGrant, desc="Send PCrdGrant";
DoRetry, desc="Resend the current pending request";
DvmTlbi_Initiate, desc="Initiate a DVM TLBI on the provided TBE";
DvmSync_Initiate, desc="Initiate a DVM Sync on the provided TBE";
DvmTlbi_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM TLBI on the provided TBE";
DvmSync_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM Sync on the provided TBE";
DvmSendNextMessage_P1, desc="Trigger a SnpDvmOp_P1 message based on the TBE type";
DvmSendNextMessage_P2, desc="Trigger a SnpDvmOp_P2 message based on the TBE type";
DvmFinishDistributing, desc="Move the TBE out of the Distributing state into Waiting";

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017,2019-2022 ARM Limited
* Copyright (c) 2017,2019-2023 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -267,7 +267,7 @@ class AbstractController : public ClockedObject, public Consumer
assert(m_inTrans.find(addr) == m_inTrans.end());
m_inTrans[addr] = {type, initialState, curTick()};
if (retried)
++(*stats.inTransLatRetries[type]);
++(*stats.inTransRetryCnt[type]);
}
/**
@@ -288,11 +288,23 @@ class AbstractController : public ClockedObject, public Consumer
isAddressed ? m_inTransAddressed : m_inTransUnaddressed;
auto iter = m_inTrans.find(addr);
assert(iter != m_inTrans.end());
stats.inTransLatHist[iter->second.transaction]
[iter->second.state]
[(unsigned)finalState]->sample(
ticksToCycles(curTick() - iter->second.time));
++(*stats.inTransLatTotal[iter->second.transaction]);
auto &trans = iter->second;
auto stat_iter_ev = stats.inTransStateChanges.find(trans.transaction);
gem5_assert(stat_iter_ev != stats.inTransStateChanges.end(),
"%s: event type=%d not marked as in_trans in SLICC",
name(), trans.transaction);
auto stat_iter_state = stat_iter_ev->second.find(trans.state);
gem5_assert(stat_iter_state != stat_iter_ev->second.end(),
"%s: event type=%d has no transition from state=%d",
name(), trans.transaction, trans.state);
++(*stat_iter_state->second[(unsigned)finalState]);
stats.inTransLatHist[iter->second.transaction]->sample(
ticksToCycles(curTick() - trans.time));
m_inTrans.erase(iter);
}
@@ -334,10 +346,17 @@ class AbstractController : public ClockedObject, public Consumer
isAddressed ? m_outTransAddressed : m_outTransUnaddressed;
auto iter = m_outTrans.find(addr);
assert(iter != m_outTrans.end());
stats.outTransLatHist[iter->second.transaction]->sample(
ticksToCycles(curTick() - iter->second.time));
auto &trans = iter->second;
auto stat_iter = stats.outTransLatHist.find(trans.transaction);
gem5_assert(stat_iter != stats.outTransLatHist.end(),
"%s: event type=%d not marked as out_trans in SLICC",
name(), trans.transaction);
stat_iter->second->sample(
ticksToCycles(curTick() - trans.time));
if (retried)
++(*stats.outTransLatHistRetries[iter->second.transaction]);
++(*stats.outTransRetryCnt[trans.transaction]);
m_outTrans.erase(iter);
}
@@ -429,17 +448,25 @@ class AbstractController : public ClockedObject, public Consumer
{
ControllerStats(statistics::Group *parent);
// Initialized by the SLICC compiler for all combinations of event and
// states. Only histograms with samples will appear in the stats
std::vector<std::vector<std::vector<statistics::Histogram*>>>
inTransLatHist;
std::vector<statistics::Scalar*> inTransLatRetries;
std::vector<statistics::Scalar*> inTransLatTotal;
// Initialized by the SLICC compiler for all events with the
// "in_trans" property.
// Only histograms with samples will appear in the stats
std::unordered_map<unsigned, statistics::Histogram*> inTransLatHist;
std::unordered_map<unsigned, statistics::Scalar*> inTransRetryCnt;
// Initialized by the SLICC compiler for all combinations of events
// with the "in_trans" property, potential initial states, and
// potential final states. Potential initial states are states that
// appear in transitions triggered by that event. Currently all states
// are considered as potential final states.
std::unordered_map<unsigned, std::unordered_map<unsigned,
std::vector<statistics::Scalar*>>> inTransStateChanges;
// Initialized by the SLICC compiler for all events.
// Initialized by the SLICC compiler for all events with the
// "out_trans" property.
// Only histograms with samples will appear in the stats.
std::vector<statistics::Histogram*> outTransLatHist;
std::vector<statistics::Scalar*> outTransLatHistRetries;
std::unordered_map<unsigned, statistics::Histogram*> outTransLatHist;
std::unordered_map<unsigned, statistics::Scalar*>
outTransRetryCnt;
//! Counter for the number of cycles when the transitions carried out
//! were equal to the maximum allowed

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021 ARM Limited
# Copyright (c) 2019-2021,2023 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -111,8 +111,11 @@ class StateMachine(Symbol):
self.actions = OrderedDict()
self.request_types = OrderedDict()
self.transitions = []
self.transitions_per_ev = {}
self.in_ports = []
self.functions = []
self.event_stats_in_trans = []
self.event_stats_out_trans = []
# Data members in the State Machine that have been declared inside
# the {} machine. Note that these along with the config params
@@ -136,6 +139,10 @@ class StateMachine(Symbol):
def addEvent(self, event):
assert self.table is None
self.events[event.ident] = event
if "in_trans" in event.pairs:
self.event_stats_in_trans.append(event)
if "out_trans" in event.pairs:
self.event_stats_out_trans.append(event)
def addAction(self, action):
assert self.table is None
@@ -163,6 +170,9 @@ class StateMachine(Symbol):
def addTransition(self, trans):
assert self.table is None
self.transitions.append(trans)
if trans.event not in self.transitions_per_ev:
self.transitions_per_ev[trans.event] = []
self.transitions_per_ev[trans.event].append(trans)
def addInPort(self, var):
self.in_ports.append(var)
@@ -957,53 +967,93 @@ $c_ident::regStats()
}
}
for (${ident}_Event event = ${ident}_Event_FIRST;
event < ${ident}_Event_NUM; ++event) {
"""
)
# check if Events/States have profiling qualifiers flags for
# inTransLatHist and outTransLatHist stats.
ev_ident_list = [
"%s_Event_%s" % (ident, ev.ident)
for ev in self.event_stats_out_trans
]
ev_ident_str = "{" + ",".join(ev_ident_list) + "}"
code(
"""
const std::vector<${ident}_Event> out_trans_evs = ${ev_ident_str};
"""
)
ev_ident_list = [
"%s_Event_%s" % (ident, ev.ident)
for ev in self.event_stats_in_trans
]
ev_ident_str = "{" + ",".join(ev_ident_list) + "}"
code(
"""
const std::vector<${ident}_Event> in_trans_evs = ${ev_ident_str};
"""
)
kv_ident_list = []
for ev in self.event_stats_in_trans:
key_ident = "%s_Event_%s" % (ident, ev.ident)
val_ident_lst = [
"%s_State_%s" % (ident, trans.state.ident)
for trans in self.transitions_per_ev[ev]
]
val_ident_str = "{" + ",".join(val_ident_lst) + "}"
kv_ident_list.append("{%s, %s}" % (key_ident, val_ident_str))
key_ident_str = "{" + ",".join(kv_ident_list) + "}"
code(
"""
const std::unordered_map<${ident}_Event, std::vector<${ident}_State>>
in_trans_evs_states = ${key_ident_str};
"""
)
code(
"""
for (const auto event : out_trans_evs) {
std::string stat_name =
"outTransLatHist." + ${ident}_Event_to_string(event);
statistics::Histogram* t =
new statistics::Histogram(&stats, stat_name.c_str());
stats.outTransLatHist.push_back(t);
stats.outTransLatHist[event] = t;
t->init(5);
t->flags(statistics::pdf | statistics::total |
statistics::oneline | statistics::nozero);
statistics::Scalar* r = new statistics::Scalar(&stats,
(stat_name + ".retries").c_str());
stats.outTransLatHistRetries.push_back(r);
stats.outTransRetryCnt[event] = r;
r->flags(statistics::nozero);
}
for (${ident}_Event event = ${ident}_Event_FIRST;
event < ${ident}_Event_NUM; ++event) {
std::string stat_name = "inTransLatHist." +
${ident}_Event_to_string(event);
for (const auto event : in_trans_evs) {
std::string stat_name =
"inTransLatHist." + ${ident}_Event_to_string(event);
statistics::Histogram* t =
new statistics::Histogram(&stats, stat_name.c_str());
stats.inTransLatHist[event] = t;
t->init(5);
t->flags(statistics::pdf | statistics::total |
statistics::oneline | statistics::nozero);
statistics::Scalar* r = new statistics::Scalar(&stats,
(stat_name + ".total").c_str());
stats.inTransLatTotal.push_back(r);
(stat_name + ".retries").c_str());
stats.inTransRetryCnt[event] = r;
r->flags(statistics::nozero);
r = new statistics::Scalar(&stats,
(stat_name + ".retries").c_str());
stats.inTransLatRetries.push_back(r);
r->flags(statistics::nozero);
stats.inTransLatHist.emplace_back();
for (${ident}_State initial_state = ${ident}_State_FIRST;
initial_state < ${ident}_State_NUM; ++initial_state) {
stats.inTransLatHist.back().emplace_back();
auto &src_states = stats.inTransStateChanges[event];
for (const auto initial_state : in_trans_evs_states.at(event)) {
auto &dst_vector = src_states[initial_state];
for (${ident}_State final_state = ${ident}_State_FIRST;
final_state < ${ident}_State_NUM; ++final_state) {
std::string stat_name = "inTransLatHist." +
${ident}_Event_to_string(event) + "." +
${ident}_State_to_string(initial_state) + "." +
${ident}_State_to_string(final_state);
statistics::Histogram* t =
new statistics::Histogram(&stats, stat_name.c_str());
stats.inTransLatHist.back().back().push_back(t);
t->init(5);
t->flags(statistics::pdf | statistics::total |
statistics::oneline | statistics::nozero);
${ident}_State_to_string(final_state) + ".total";
statistics::Scalar* t =
new statistics::Scalar(&stats, stat_name.c_str());
t->flags(statistics::nozero);
dst_vector.push_back(t);
}
}
}