From 3360a87d5ac516d7581a75b2b2ee1bc09dd9bc6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiago=20M=C3=BCck?= Date: Mon, 1 May 2023 18:46:30 -0500 Subject: [PATCH 1/2] mem-ruby: optimize in/outTransLatHist stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generating these stats for all defined Events may generate too many stats that are never used, which unnecessarily increases simulation startup time and memory consumption. This patch limits those stats to events with the "in_trans" and/or "out_trans" properties. SLICC compiler then checks which combinations of event+state are possible when generating the stats. Also the possible level of detail for inTransLatHist was reduced. Only the number of transactions for each event+initial+final state combinations is now accounted. Latency histograms are only defined per event type (similarly to outTransLatHist). This significantly reduces the final file size for generated stats. Change-Id: I29aaeb771436cc3f0ce7547a223d58e71d9cedcc Signed-off-by: Tiago Mück --- .../slicc_interface/AbstractController.hh | 65 +++++++---- src/mem/slicc/symbols/StateMachine.py | 104 +++++++++++++----- 2 files changed, 123 insertions(+), 46 deletions(-) diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 7fdb88b07d..72b679d6cf 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017,2019-2022 ARM Limited + * Copyright (c) 2017,2019-2023 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -267,7 +267,7 @@ class AbstractController : public ClockedObject, public Consumer assert(m_inTrans.find(addr) == m_inTrans.end()); m_inTrans[addr] = {type, initialState, curTick()}; if (retried) - ++(*stats.inTransLatRetries[type]); + ++(*stats.inTransRetryCnt[type]); } /** @@ -288,11 +288,23 @@ class AbstractController : public ClockedObject, public Consumer isAddressed ? m_inTransAddressed : m_inTransUnaddressed; auto iter = m_inTrans.find(addr); assert(iter != m_inTrans.end()); - stats.inTransLatHist[iter->second.transaction] - [iter->second.state] - [(unsigned)finalState]->sample( - ticksToCycles(curTick() - iter->second.time)); - ++(*stats.inTransLatTotal[iter->second.transaction]); + auto &trans = iter->second; + + auto stat_iter_ev = stats.inTransStateChanges.find(trans.transaction); + gem5_assert(stat_iter_ev != stats.inTransStateChanges.end(), + "%s: event type=%d not marked as in_trans in SLICC", + name(), trans.transaction); + + auto stat_iter_state = stat_iter_ev->second.find(trans.state); + gem5_assert(stat_iter_state != stat_iter_ev->second.end(), + "%s: event type=%d has no transition from state=%d", + name(), trans.transaction, trans.state); + + ++(*stat_iter_state->second[(unsigned)finalState]); + + stats.inTransLatHist[iter->second.transaction]->sample( + ticksToCycles(curTick() - trans.time)); + m_inTrans.erase(iter); } @@ -334,10 +346,17 @@ class AbstractController : public ClockedObject, public Consumer isAddressed ? m_outTransAddressed : m_outTransUnaddressed; auto iter = m_outTrans.find(addr); assert(iter != m_outTrans.end()); - stats.outTransLatHist[iter->second.transaction]->sample( - ticksToCycles(curTick() - iter->second.time)); + auto &trans = iter->second; + + auto stat_iter = stats.outTransLatHist.find(trans.transaction); + gem5_assert(stat_iter != stats.outTransLatHist.end(), + "%s: event type=%d not marked as out_trans in SLICC", + name(), trans.transaction); + + stat_iter->second->sample( + ticksToCycles(curTick() - trans.time)); if (retried) - ++(*stats.outTransLatHistRetries[iter->second.transaction]); + ++(*stats.outTransRetryCnt[trans.transaction]); m_outTrans.erase(iter); } @@ -429,17 +448,25 @@ class AbstractController : public ClockedObject, public Consumer { ControllerStats(statistics::Group *parent); - // Initialized by the SLICC compiler for all combinations of event and - // states. Only histograms with samples will appear in the stats - std::vector>> - inTransLatHist; - std::vector inTransLatRetries; - std::vector inTransLatTotal; + // Initialized by the SLICC compiler for all events with the + // "in_trans" property. + // Only histograms with samples will appear in the stats + std::unordered_map inTransLatHist; + std::unordered_map inTransRetryCnt; + // Initialized by the SLICC compiler for all combinations of events + // with the "in_trans" property, potential initial states, and + // potential final states. Potential initial states are states that + // appear in transitions triggered by that event. Currently all states + // are considered as potential final states. + std::unordered_map>> inTransStateChanges; - // Initialized by the SLICC compiler for all events. + // Initialized by the SLICC compiler for all events with the + // "out_trans" property. // Only histograms with samples will appear in the stats. - std::vector outTransLatHist; - std::vector outTransLatHistRetries; + std::unordered_map outTransLatHist; + std::unordered_map + outTransRetryCnt; //! Counter for the number of cycles when the transitions carried out //! were equal to the maximum allowed diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 4712064089..039202a321 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 ARM Limited +# Copyright (c) 2019-2021,2023 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -111,8 +111,11 @@ class StateMachine(Symbol): self.actions = OrderedDict() self.request_types = OrderedDict() self.transitions = [] + self.transitions_per_ev = {} self.in_ports = [] self.functions = [] + self.event_stats_in_trans = [] + self.event_stats_out_trans = [] # Data members in the State Machine that have been declared inside # the {} machine. Note that these along with the config params @@ -136,6 +139,10 @@ class StateMachine(Symbol): def addEvent(self, event): assert self.table is None self.events[event.ident] = event + if "in_trans" in event.pairs: + self.event_stats_in_trans.append(event) + if "out_trans" in event.pairs: + self.event_stats_out_trans.append(event) def addAction(self, action): assert self.table is None @@ -163,6 +170,9 @@ class StateMachine(Symbol): def addTransition(self, trans): assert self.table is None self.transitions.append(trans) + if trans.event not in self.transitions_per_ev: + self.transitions_per_ev[trans.event] = [] + self.transitions_per_ev[trans.event].append(trans) def addInPort(self, var): self.in_ports.append(var) @@ -957,53 +967,93 @@ $c_ident::regStats() } } - for (${ident}_Event event = ${ident}_Event_FIRST; - event < ${ident}_Event_NUM; ++event) { +""" + ) + # check if Events/States have profiling qualifiers flags for + # inTransLatHist and outTransLatHist stats. + ev_ident_list = [ + "%s_Event_%s" % (ident, ev.ident) + for ev in self.event_stats_out_trans + ] + ev_ident_str = "{" + ",".join(ev_ident_list) + "}" + code( + """ + const std::vector<${ident}_Event> out_trans_evs = ${ev_ident_str}; +""" + ) + ev_ident_list = [ + "%s_Event_%s" % (ident, ev.ident) + for ev in self.event_stats_in_trans + ] + ev_ident_str = "{" + ",".join(ev_ident_list) + "}" + code( + """ + const std::vector<${ident}_Event> in_trans_evs = ${ev_ident_str}; +""" + ) + kv_ident_list = [] + for ev in self.event_stats_in_trans: + key_ident = "%s_Event_%s" % (ident, ev.ident) + val_ident_lst = [ + "%s_State_%s" % (ident, trans.state.ident) + for trans in self.transitions_per_ev[ev] + ] + val_ident_str = "{" + ",".join(val_ident_lst) + "}" + kv_ident_list.append("{%s, %s}" % (key_ident, val_ident_str)) + key_ident_str = "{" + ",".join(kv_ident_list) + "}" + code( + """ + const std::unordered_map<${ident}_Event, std::vector<${ident}_State>> + in_trans_evs_states = ${key_ident_str}; +""" + ) + code( + """ + + for (const auto event : out_trans_evs) { std::string stat_name = "outTransLatHist." + ${ident}_Event_to_string(event); statistics::Histogram* t = new statistics::Histogram(&stats, stat_name.c_str()); - stats.outTransLatHist.push_back(t); + stats.outTransLatHist[event] = t; t->init(5); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); statistics::Scalar* r = new statistics::Scalar(&stats, (stat_name + ".retries").c_str()); - stats.outTransLatHistRetries.push_back(r); + stats.outTransRetryCnt[event] = r; r->flags(statistics::nozero); } - for (${ident}_Event event = ${ident}_Event_FIRST; - event < ${ident}_Event_NUM; ++event) { - std::string stat_name = "inTransLatHist." + - ${ident}_Event_to_string(event); + for (const auto event : in_trans_evs) { + std::string stat_name = + "inTransLatHist." + ${ident}_Event_to_string(event); + statistics::Histogram* t = + new statistics::Histogram(&stats, stat_name.c_str()); + stats.inTransLatHist[event] = t; + t->init(5); + t->flags(statistics::pdf | statistics::total | + statistics::oneline | statistics::nozero); + statistics::Scalar* r = new statistics::Scalar(&stats, - (stat_name + ".total").c_str()); - stats.inTransLatTotal.push_back(r); + (stat_name + ".retries").c_str()); + stats.inTransRetryCnt[event] = r; r->flags(statistics::nozero); - r = new statistics::Scalar(&stats, - (stat_name + ".retries").c_str()); - stats.inTransLatRetries.push_back(r); - r->flags(statistics::nozero); - - stats.inTransLatHist.emplace_back(); - for (${ident}_State initial_state = ${ident}_State_FIRST; - initial_state < ${ident}_State_NUM; ++initial_state) { - stats.inTransLatHist.back().emplace_back(); + auto &src_states = stats.inTransStateChanges[event]; + for (const auto initial_state : in_trans_evs_states.at(event)) { + auto &dst_vector = src_states[initial_state]; for (${ident}_State final_state = ${ident}_State_FIRST; final_state < ${ident}_State_NUM; ++final_state) { std::string stat_name = "inTransLatHist." + ${ident}_Event_to_string(event) + "." + ${ident}_State_to_string(initial_state) + "." + - ${ident}_State_to_string(final_state); - statistics::Histogram* t = - new statistics::Histogram(&stats, stat_name.c_str()); - stats.inTransLatHist.back().back().push_back(t); - t->init(5); - t->flags(statistics::pdf | statistics::total | - statistics::oneline | statistics::nozero); + ${ident}_State_to_string(final_state) + ".total"; + statistics::Scalar* t = + new statistics::Scalar(&stats, stat_name.c_str()); + t->flags(statistics::nozero); + dst_vector.push_back(t); } } } From 9584d2efa96db205c0168838c24defb11749ab1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiago=20M=C3=BCck?= Date: Mon, 1 May 2023 18:53:44 -0500 Subject: [PATCH 2/2] mem-ruby: add in_trans/out_trans to CHI events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marks which events signal the beginning of incoming and outgoing transactions for generating inTransLatHist and outTransLatHist stats. Change-Id: I90594a27fa01ef9cfface309971354b281308d22 Signed-off-by: Tiago Mück --- src/mem/ruby/protocol/chi/CHI-cache.sm | 106 +++++++++--------- .../ruby/protocol/chi/CHI-dvm-misc-node.sm | 6 +- 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index 3bd8d3f3c3..568b39c223 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 ARM Limited + * Copyright (c) 2021-2023 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -280,37 +280,37 @@ machine(MachineType:Cache, "Cache coherency protocol") : // Events triggered by sequencer requests or snoops in the rdy queue // See CHIRequestType in CHi-msg.sm for descriptions - Load, desc=""; - Store, desc=""; - Prefetch, desc=""; - ReadShared, desc=""; - ReadNotSharedDirty, desc=""; - ReadUnique, desc=""; - ReadUnique_PoC, desc=""; - ReadOnce, desc=""; - CleanUnique, desc=""; - Evict, desc=""; - WriteBackFull, desc=""; - WriteEvictFull, desc=""; - WriteCleanFull, desc=""; - WriteUnique, desc=""; - WriteUniquePtl_PoC, desc=""; - WriteUniqueFull_PoC, desc=""; - WriteUniqueFull_PoC_Alloc, desc=""; - SnpCleanInvalid, desc=""; - SnpShared, desc=""; - SnpSharedFwd, desc=""; - SnpNotSharedDirtyFwd, desc=""; - SnpUnique, desc=""; - SnpUniqueFwd, desc=""; - SnpOnce, desc=""; - SnpOnceFwd, desc=""; - SnpStalled, desc=""; // A snoop stall triggered from the inport + Load, desc="", in_trans="yes"; + Store, desc="", in_trans="yes"; + Prefetch, desc="", in_trans="yes"; + ReadShared, desc="", in_trans="yes"; + ReadNotSharedDirty, desc="", in_trans="yes"; + ReadUnique, desc="", in_trans="yes"; + ReadUnique_PoC, desc="", in_trans="yes"; + ReadOnce, desc="", in_trans="yes"; + CleanUnique, desc="", in_trans="yes"; + Evict, desc="", in_trans="yes"; + WriteBackFull, desc="", in_trans="yes"; + WriteEvictFull, desc="", in_trans="yes"; + WriteCleanFull, desc="", in_trans="yes"; + WriteUnique, desc="", in_trans="yes"; + WriteUniquePtl_PoC, desc="", in_trans="yes"; + WriteUniqueFull_PoC, desc="", in_trans="yes"; + WriteUniqueFull_PoC_Alloc, desc="", in_trans="yes"; + SnpCleanInvalid, desc="", in_trans="yes"; + SnpShared, desc="", in_trans="yes"; + SnpSharedFwd, desc="", in_trans="yes"; + SnpNotSharedDirtyFwd, desc="", in_trans="yes"; + SnpUnique, desc="", in_trans="yes"; + SnpUniqueFwd, desc="", in_trans="yes"; + SnpOnce, desc="", in_trans="yes"; + SnpOnceFwd, desc="", in_trans="yes"; + SnpStalled, desc="", in_trans="yes"; // A snoop stall triggered from the inport // DVM sequencer requests - DvmTlbi_Initiate, desc=""; // triggered when a CPU core wants to send a TLBI + DvmTlbi_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a TLBI // TLBIs are handled entirely within Ruby, so there's no ExternCompleted message - DvmSync_Initiate, desc=""; // triggered when a CPU core wants to send a sync + DvmSync_Initiate, desc="", out_trans="yes", in_trans="yes"; // triggered when a CPU core wants to send a sync DvmSync_ExternCompleted, desc=""; // triggered when an externally requested Sync is completed // Events triggered by incoming response messages @@ -344,10 +344,10 @@ machine(MachineType:Cache, "Cache coherency protocol") : PCrdGrant_PoC_Hazard, desc=""; // Events triggered by incoming DVM messages - SnpDvmOpSync_P1, desc=""; - SnpDvmOpSync_P2, desc=""; - SnpDvmOpNonSync_P1, desc=""; - SnpDvmOpNonSync_P2, desc=""; + SnpDvmOpSync_P1, desc="", in_trans="yes"; + SnpDvmOpSync_P2, desc="", in_trans="yes"; + SnpDvmOpNonSync_P1, desc="", in_trans="yes"; + SnpDvmOpNonSync_P2, desc="", in_trans="yes"; // Events triggered by incoming data response messages // See CHIDataType in CHi-msg.sm for descriptions @@ -383,20 +383,20 @@ machine(MachineType:Cache, "Cache coherency protocol") : // A Write or Evict becomes stale when the requester receives a snoop that // changes the state of the data while the request was pending. // Actual CHI implementations don't have this check. - Evict_Stale, desc=""; - WriteBackFull_Stale, desc=""; - WriteEvictFull_Stale, desc=""; - WriteCleanFull_Stale, desc=""; - CleanUnique_Stale, desc=""; + Evict_Stale, desc="", in_trans="yes"; + WriteBackFull_Stale, desc="", in_trans="yes"; + WriteEvictFull_Stale, desc="", in_trans="yes"; + WriteCleanFull_Stale, desc="", in_trans="yes"; + CleanUnique_Stale, desc="", in_trans="yes"; // Cache fill handling CheckCacheFill, desc="Check if need to write or update the cache and trigger any necessary allocation and evictions"; // Internal requests generated to evict or writeback a local copy // to free-up cache space - Local_Eviction, desc="Evicts/WB the local copy of the line"; - LocalHN_Eviction, desc="Local_Eviction triggered when is HN"; - Global_Eviction, desc="Local_Eviction + back-invalidate line in all upstream requesters"; + Local_Eviction, in_trans="yes", desc="Evicts/WB the local copy of the line"; + LocalHN_Eviction, in_trans="yes", desc="Local_Eviction triggered when is HN"; + Global_Eviction, in_trans="yes", desc="Local_Eviction + back-invalidate line in all upstream requesters"; // Events triggered from tbe.actions // In general, for each event we define a single transition from @@ -425,11 +425,11 @@ machine(MachineType:Cache, "Cache coherency protocol") : SnpOncePipe, desc="Latency for SnpOnce requests"; // Send a read request downstream. - SendReadShared, desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false"; - SendReadOnce, desc="Send a ReadOnce"; - SendReadNoSnp, desc="Send a SendReadNoSnp"; - SendReadNoSnpDMT, desc="Send a SendReadNoSnp using DMT"; - SendReadUnique, desc="Send a ReadUnique"; + SendReadShared, out_trans="yes", desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false"; + SendReadOnce, out_trans="yes", desc="Send a ReadOnce"; + SendReadNoSnp, out_trans="yes", desc="Send a SendReadNoSnp"; + SendReadNoSnpDMT, out_trans="yes", desc="Send a SendReadNoSnp using DMT"; + SendReadUnique, out_trans="yes", desc="Send a ReadUnique"; SendCompAck, desc="Send CompAck"; // Read handling at the completer SendCompData, desc="Send CompData"; @@ -437,11 +437,11 @@ machine(MachineType:Cache, "Cache coherency protocol") : SendRespSepData, desc="Send RespSepData for a DMT request"; // Send a write request downstream. - SendWriteBackOrWriteEvict, desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)"; - SendWriteClean, desc="Send a WriteCleanFull"; - SendWriteNoSnp, desc="Send a WriteNoSnp for a full line"; - SendWriteNoSnpPartial, desc="Send a WriteNoSnpPtl"; - SendWriteUnique, desc="Send a WriteUniquePtl"; + SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)"; + SendWriteClean, out_trans="yes", desc="Send a WriteCleanFull"; + SendWriteNoSnp, out_trans="yes", desc="Send a WriteNoSnp for a full line"; + SendWriteNoSnpPartial, out_trans="yes", desc="Send a WriteNoSnpPtl"; + SendWriteUnique, out_trans="yes", desc="Send a WriteUniquePtl"; SendWBData, desc="Send writeback data"; SendWUData, desc="Send write unique data"; SendWUDataCB, desc="Send write unique data from a sequencer callback"; @@ -453,9 +453,9 @@ machine(MachineType:Cache, "Cache coherency protocol") : SendComp_WU, desc="Ack WU completion"; // Dataless requests - SendEvict, desc="Send a Evict"; + SendEvict, out_trans="yes", desc="Send a Evict"; SendCompIResp, desc="Ack Evict with Comp_I"; - SendCleanUnique,desc="Send a CleanUnique"; + SendCleanUnique,out_trans="yes", desc="Send a CleanUnique"; SendCompUCResp, desc="Ack CleanUnique with Comp_UC"; SendCompUCRespStale, desc="Ack stale CleanUnique with Comp_UC"; diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm index 92a04ed3d2..aa27c40964 100644 --- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm +++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 ARM Limited + * Copyright (c) 2021-2023 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -170,8 +170,8 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op SendPCrdGrant, desc="Send PCrdGrant"; DoRetry, desc="Resend the current pending request"; - DvmTlbi_Initiate, desc="Initiate a DVM TLBI on the provided TBE"; - DvmSync_Initiate, desc="Initiate a DVM Sync on the provided TBE"; + DvmTlbi_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM TLBI on the provided TBE"; + DvmSync_Initiate, out_trans="yes", in_trans="yes", desc="Initiate a DVM Sync on the provided TBE"; DvmSendNextMessage_P1, desc="Trigger a SnpDvmOp_P1 message based on the TBE type"; DvmSendNextMessage_P2, desc="Trigger a SnpDvmOp_P2 message based on the TBE type"; DvmFinishDistributing, desc="Move the TBE out of the Distributing state into Waiting";