From 2c048bb097e003ad2c623fc54225e9fd31fbc6f8 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Thu, 18 Feb 2021 00:53:02 -0800 Subject: [PATCH 01/25] scons: Fixing build errors with scons 4.0.1 and 4.1.0 SCons failed to find m5 module while loading m5.util.terminal from site_scons/gem5_scons/util.py. This results in the current version of gem5 stable failed to build with scons 4.0.1 and 4.1.0. The nature of the bug and the explaination for the fix can be found here, https://gem5-review.googlesource.com/c/public/gem5/+/38616 Jira: https://gem5.atlassian.net/browse/GEM5-916 Change-Id: I3817f39ebc3021fb6fc89bcd09a96999f8ca2841 Signed-off-by: Hoa Nguyen Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41594 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- SConstruct | 2 +- site_scons/site_tools/default.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/SConstruct b/SConstruct index 0d8159bbab..bb038b8fbe 100755 --- a/SConstruct +++ b/SConstruct @@ -139,7 +139,7 @@ if GetOption('no_lto') and GetOption('force_lto'): # ######################################################################## -main = Environment() +main = Environment(tools=['default', 'git']) from gem5_scons.util import get_termcap termcap = get_termcap() diff --git a/site_scons/site_tools/default.py b/site_scons/site_tools/default.py index 1965a20173..88a693237d 100644 --- a/site_scons/site_tools/default.py +++ b/site_scons/site_tools/default.py @@ -78,15 +78,9 @@ def common_config(env): # as well env.AppendENVPath('PYTHONPATH', extra_python_paths) -gem5_tool_list = [ - 'git', -] - def generate(env): common_config(env) SCons.Tool.default.generate(env) - for tool in gem5_tool_list: - SCons.Tool.Tool(tool)(env) def exists(env): return 1 From 2373934b8224e33c55ce96a0fe7751caeee13baf Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 19 Feb 2021 15:35:05 -0800 Subject: [PATCH 02/25] misc: Updated the RELEASE-NOTES and version number Updated the RELEASE-NOTES.md and version number for the v20.1.0.4 hotfix release. Change-Id: Iaefed86cb176c3adcd66d101ac3155d30528b025 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41713 Maintainer: Bobby R. Bruce Reviewed-by: Jason Lowe-Power Reviewed-by: Hoa Nguyen Tested-by: kokoro --- RELEASE-NOTES.md | 5 +++++ src/Doxyfile | 2 +- src/base/version.cc | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index c84d9b47df..3f1709123d 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,3 +1,8 @@ +# Version 20.1.0.4 + +**[HOTFIX]** [gem5 was failing to build with SCons 4.0.1 and 4.1.0](https://gem5.atlassian.net/browse/GEM5-916). +This hotfix makes the necessary changes to `site_scons/site_tools/default.py` for gem5 to compile successfully on these versions of SCons. + # Version 20.1.0.3 **[HOTFIX]** A patch was apply to fix an [error where booting Linux stalled when using the ARM ISA](https://gem5.atlassian.net/browse/GEM5-901). diff --git a/src/Doxyfile b/src/Doxyfile index ddc39338c7..4ad0ea537b 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = gem5 # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = v20.1.0.3 +PROJECT_NUMBER = v20.1.0.4 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/src/base/version.cc b/src/base/version.cc index d30ddd1510..0a34488e5f 100644 --- a/src/base/version.cc +++ b/src/base/version.cc @@ -29,4 +29,4 @@ /** * @ingroup api_base_utils */ -const char *gem5Version = "20.1.0.3"; +const char *gem5Version = "20.1.0.4"; From 48094d1abf44e7f5c9414923332ce51e3f8a23f6 Mon Sep 17 00:00:00 2001 From: Peter Date: Wed, 27 Jan 2021 17:25:47 +0800 Subject: [PATCH 03/25] arch-riscv: Fixing RISC-V remote GDB MIP and MIE accesses. readMiscRegNoEffect reads directly from the misc reg file. However, MIP and MIE actually reads directly from the interrupt controller and does not store the values in the misc reg file. As such, readMiscReg should be used instead. Change-Id: I6bd71da5c83e12c06043889bdbef7e4c0cf78190 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/39816 Reviewed-by: Bobby R. Bruce Reviewed-by: Ayaz Akram Maintainer: Bobby R. Bruce Tested-by: kokoro --- src/arch/riscv/remote_gdb.cc | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc index 021b388a75..da78957008 100644 --- a/src/arch/riscv/remote_gdb.cc +++ b/src/arch/riscv/remote_gdb.cc @@ -211,7 +211,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) // U mode CSR r.ustatus = context->readMiscRegNoEffect( CSRData.at(CSR_USTATUS).physIndex) & CSRMasks.at(CSR_USTATUS); - r.uie = context->readMiscRegNoEffect( + r.uie = context->readMiscReg( CSRData.at(CSR_UIE).physIndex) & CSRMasks.at(CSR_UIE); r.utvec = context->readMiscRegNoEffect( CSRData.at(CSR_UTVEC).physIndex); @@ -223,7 +223,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_UCAUSE).physIndex); r.utval = context->readMiscRegNoEffect( CSRData.at(CSR_UTVAL).physIndex); - r.uip = context->readMiscRegNoEffect( + r.uip = context->readMiscReg( CSRData.at(CSR_UIP).physIndex) & CSRMasks.at(CSR_UIP); // S mode CSR @@ -233,7 +233,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_SEDELEG).physIndex); r.sideleg = context->readMiscRegNoEffect( CSRData.at(CSR_SIDELEG).physIndex); - r.sie = context->readMiscRegNoEffect( + r.sie = context->readMiscReg( CSRData.at(CSR_SIE).physIndex) & CSRMasks.at(CSR_SIE); r.stvec = context->readMiscRegNoEffect( CSRData.at(CSR_STVEC).physIndex); @@ -247,7 +247,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_SCAUSE).physIndex); r.stval = context->readMiscRegNoEffect( CSRData.at(CSR_STVAL).physIndex); - r.sip = context->readMiscRegNoEffect( + r.sip = context->readMiscReg( CSRData.at(CSR_SIP).physIndex) & CSRMasks.at(CSR_SIP); r.satp = context->readMiscRegNoEffect( CSRData.at(CSR_SATP).physIndex); @@ -269,7 +269,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_MEDELEG).physIndex); r.mideleg = context->readMiscRegNoEffect( CSRData.at(CSR_MIDELEG).physIndex); - r.mie = context->readMiscRegNoEffect( + r.mie = context->readMiscReg( CSRData.at(CSR_MIE).physIndex) & CSRMasks.at(CSR_MIE); r.mtvec = context->readMiscRegNoEffect( CSRData.at(CSR_MTVEC).physIndex); @@ -283,7 +283,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context) CSRData.at(CSR_MCAUSE).physIndex); r.mtval = context->readMiscRegNoEffect( CSRData.at(CSR_MTVAL).physIndex); - r.mip = context->readMiscRegNoEffect( + r.mip = context->readMiscReg( CSRData.at(CSR_MIP).physIndex) & CSRMasks.at(CSR_MIP); // H mode CSR (to be implemented) @@ -340,11 +340,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const newVal = (oldVal & ~mask) | (r.ustatus & mask); context->setMiscRegNoEffect( CSRData.at(CSR_USTATUS).physIndex, newVal); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_UIE).physIndex); mask = CSRMasks.at(CSR_UIE); newVal = (oldVal & ~mask) | (r.uie & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_UIE).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_UTVEC).physIndex, r.utvec); @@ -356,11 +356,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_UCAUSE).physIndex, r.ucause); context->setMiscRegNoEffect( CSRData.at(CSR_UTVAL).physIndex, r.utval); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_UIP).physIndex); mask = CSRMasks.at(CSR_UIP); newVal = (oldVal & ~mask) | (r.uip & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_UIP).physIndex, newVal); // S mode CSR @@ -374,11 +374,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg); context->setMiscRegNoEffect( CSRData.at(CSR_SIDELEG).physIndex, r.sideleg); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_SIE).physIndex); mask = CSRMasks.at(CSR_SIE); newVal = (oldVal & ~mask) | (r.sie & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_SIE).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_STVEC).physIndex, r.stvec); @@ -392,11 +392,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_SCAUSE).physIndex, r.scause); context->setMiscRegNoEffect( CSRData.at(CSR_STVAL).physIndex, r.stval); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_SIP).physIndex); mask = CSRMasks.at(CSR_SIP); newVal = (oldVal & ~mask) | (r.sip & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_SIP).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_SATP).physIndex, r.satp); @@ -426,11 +426,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MEDELEG).physIndex, r.medeleg); context->setMiscRegNoEffect( CSRData.at(CSR_MIDELEG).physIndex, r.mideleg); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_MIE).physIndex); mask = CSRMasks.at(CSR_MIE); newVal = (oldVal & ~mask) | (r.mie & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_MIE).physIndex, newVal); context->setMiscRegNoEffect( CSRData.at(CSR_MTVEC).physIndex, r.mtvec); @@ -444,11 +444,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const CSRData.at(CSR_MCAUSE).physIndex, r.mcause); context->setMiscRegNoEffect( CSRData.at(CSR_MTVAL).physIndex, r.mtval); - oldVal = context->readMiscRegNoEffect( + oldVal = context->readMiscReg( CSRData.at(CSR_MIP).physIndex); mask = CSRMasks.at(CSR_MIP); newVal = (oldVal & ~mask) | (r.mip & mask); - context->setMiscRegNoEffect( + context->setMiscReg( CSRData.at(CSR_MIP).physIndex, newVal); // H mode CSR (to be implemented) From 78b640b99430886aca033a00a48ec6e786a8ab71 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Wed, 10 Mar 2021 14:41:51 -0800 Subject: [PATCH 04/25] base-stats,python: Add missing "group" in `_prepare_stats` Change-Id: Idb25185e1d29ac9fd8c0503d55b56e0568a61d1f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42704 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/python/m5/stats/gem5stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/m5/stats/gem5stats.py b/src/python/m5/stats/gem5stats.py index 944604509c..9a2259aab5 100644 --- a/src/python/m5/stats/gem5stats.py +++ b/src/python/m5/stats/gem5stats.py @@ -230,7 +230,7 @@ def _prepare_stats(group: _m5.stats.Group): for stat in group.getStats(): stat.prepare() - for child in getStatGroups().values(): + for child in group.getStatGroups().values(): _prepare_stats(child) From fad705680e28da3fad013c4d945fbee74bdedc80 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 17:57:43 -0800 Subject: [PATCH 05/25] systemc: Stop using std::gets in systemc.h. This is in the Accellera version of systemc, used when gem5 is built into systemc as a black box. std::gets was depdecated in c++11, and removed entirely in c++14. Since gem5 has moved to c++14, this can't be brought in with "using" in that header. More recent versions of systemc from Accellera may have this fixed, and it's pretty bad practice to have a universal header file squash unrelated namespaces like that in the first place. Rather tha update all of this copy of systemc, this change just makes the smallest adjustment possible to get things to work again. Change-Id: I8a95665d4b5d49ffc014097714cf2e602bf9b937 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42583 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- ext/systemc/src/systemc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/systemc/src/systemc.h b/ext/systemc/src/systemc.h index ccc1285ebe..c0494b147b 100644 --- a/ext/systemc/src/systemc.h +++ b/ext/systemc/src/systemc.h @@ -117,7 +117,7 @@ using std::fputs; using std::getc; using std::getchar; - using std::gets; + //using std::gets; using std::putc; using std::putchar; using std::puts; From d278c229e0e9e564617129b23fb45b6e03fcf5e6 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 17:59:57 -0800 Subject: [PATCH 06/25] scons,tlm: Update the gem5-within-systemc SConstruct to use c++14. Gem5 has moved to c++14, and the sources used from there will no longer compile with an older version of the standard. Move this SConstruct from specifying c++11 to c++14. Change-Id: I5be151648c3b72b5cbc005acc0c208a11de458ba Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42584 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- util/tlm/SConstruct | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct index 6802695454..695c5e3080 100644 --- a/util/tlm/SConstruct +++ b/util/tlm/SConstruct @@ -55,7 +55,7 @@ env.Append(CPPPATH=[gem5_root + '/build/' + gem5_arch, '#examples/common', ]) -env.Append(CXXFLAGS=['-std=c++11', +env.Append(CXXFLAGS=['-std=c++14', '-DSC_INCLUDE_DYNAMIC_PROCESSES', '-DTRACING_ON', ]) From 391d8609996d79bf44a0de849fd4132c11868cdb Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 18:01:04 -0800 Subject: [PATCH 07/25] systemc,util: Update sc_master_port.cc now that params() returns a ref. The params() methods (when they exist) now all return references instead of pointers. Update sc_master_port.cc to work with this. Change-Id: Ifa3c33e69ba9c16dc2b984784e7978b43714dbf2 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42585 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- util/tlm/src/sc_master_port.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/util/tlm/src/sc_master_port.cc b/util/tlm/src/sc_master_port.cc index 7b04921deb..ec17c2c652 100644 --- a/util/tlm/src/sc_master_port.cc +++ b/util/tlm/src/sc_master_port.cc @@ -93,8 +93,7 @@ SCMasterPort::SCMasterPort(const std::string& name_, transactor(nullptr), simControl(simControl) { - system = - dynamic_cast(owner_.params())->system; + system = dynamic_cast(owner_.params()).system; } void From 476069cdd343ff8f71abc2022d1821a7c38879b2 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 18:02:04 -0800 Subject: [PATCH 08/25] systemc,util: Fix a bad port of a change from gem5 proper to here. This change: systemc: remove pipe through flag in TLM extension applied a change from gem5 proper in the gem5/TLM bridge improperly here, adding a reference to a member variable that didn't exist. This change removes the extra and invalid level of indirection to get things to build again. Change-Id: I77ffdb5408525e116d414df2095a944c58e40b4d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42586 Reviewed-by: Bobby R. Bruce Reviewed-by: Earl Ou Maintainer: Bobby R. Bruce Tested-by: kokoro --- util/tlm/src/sc_slave_port.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/tlm/src/sc_slave_port.cc b/util/tlm/src/sc_slave_port.cc index 58b01345b3..4b1075fdae 100644 --- a/util/tlm/src/sc_slave_port.cc +++ b/util/tlm/src/sc_slave_port.cc @@ -301,7 +301,7 @@ SCSlavePort::pec( packet->makeResponse(); } if (packet->isResponse()) { - need_retry = !bridgeResponsePort.sendTimingResp(packet); + need_retry = !sendTimingResp(packet); } if (need_retry) { From fcaf3a69fbbe6e6a3bad12f4ceecdae17cbec70d Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 21:15:00 -0800 Subject: [PATCH 09/25] scons,util: Make the tlm SConstruct put SConsign in the build dir. Otherwise it ends up in the root of the source directory which makes it harder to blast everything and do a truly clean build. Change-Id: Id1e948b9059aaab0814c19c0cfb6ed611bf14d08 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42587 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- util/tlm/SConstruct | 1 + 1 file changed, 1 insertion(+) diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct index 695c5e3080..f1e057db70 100644 --- a/util/tlm/SConstruct +++ b/util/tlm/SConstruct @@ -78,6 +78,7 @@ sys.path.append(gem5_root + '/src/python') AddOption('--no-colors', dest='use_colors', action='store_false', help="Don't add color to abbreviated scons output") +main.SConsignFile('build/systemc/sconsign') SConscript(gem5_root + '/ext/systemc/SConscript', variant_dir='build/systemc', exports='main') From cf1659a0c82adfdd0ab9cca7d7aaa31d42c2d8e2 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 21:16:33 -0800 Subject: [PATCH 10/25] sim,systemc: Use slightly non-standard constructors for custom create() Rather than rely on the default create() method being a weak symbol, we can just not have a compliant constructor signature which means we need to (and therefore can) define our own custom create(). Change-Id: I6009d72db0c103b5724d1ba7e20c0bd4a2b761e5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42588 Maintainer: Bobby R. Bruce Tested-by: kokoro Reviewed-by: Earl Ou --- src/sim/root.cc | 4 ++-- src/sim/root.hh | 4 +++- src/systemc/core/kernel.cc | 4 ++-- src/systemc/core/kernel.hh | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sim/root.cc b/src/sim/root.cc index 57212b2667..35afe70471 100644 --- a/src/sim/root.cc +++ b/src/sim/root.cc @@ -166,7 +166,7 @@ Root::timeSyncSpinThreshold(Time newThreshold) timeSyncEnable(en); } -Root::Root(const RootParams &p) +Root::Root(const RootParams &p, int) : SimObject(p), _enabled(false), _periodTick(p.time_sync_period), syncEvent([this]{ timeSync(); }, name()) { @@ -216,5 +216,5 @@ RootParams::create() const FullSystem = full_system; FullSystemInt = full_system ? 1 : 0; - return new Root(*this); + return new Root(*this, 0); } diff --git a/src/sim/root.hh b/src/sim/root.hh index e2c58fda3b..fd3b97d72f 100644 --- a/src/sim/root.hh +++ b/src/sim/root.hh @@ -134,7 +134,9 @@ class Root : public SimObject PARAMS(Root); - Root(const Params &p); + // The int parameter is ignored, it's just so we can define a custom + // create() method. + Root(const Params &p, int); /** Schedule the timesync event at startup(). */ diff --git a/src/systemc/core/kernel.cc b/src/systemc/core/kernel.cc index 75e5bc94a5..3bb27f225d 100644 --- a/src/systemc/core/kernel.cc +++ b/src/systemc/core/kernel.cc @@ -54,7 +54,7 @@ bool Kernel::endOfSimulationComplete() { return endComplete; } sc_core::sc_status Kernel::status() { return _status; } void Kernel::status(sc_core::sc_status s) { _status = s; } -Kernel::Kernel(const Params ¶ms) : +Kernel::Kernel(const Params ¶ms, int) : SimObject(params), t0Event(this, false, EventBase::Default_Pri - 1) { // Install ourselves as the scheduler's event manager. @@ -187,6 +187,6 @@ SystemC_KernelParams::create() const { panic_if(sc_gem5::kernel, "Only one systemc kernel object may be defined.\n"); - sc_gem5::kernel = new sc_gem5::Kernel(*this); + sc_gem5::kernel = new sc_gem5::Kernel(*this, 0); return sc_gem5::kernel; } diff --git a/src/systemc/core/kernel.hh b/src/systemc/core/kernel.hh index c58e0f1297..9bea0db216 100644 --- a/src/systemc/core/kernel.hh +++ b/src/systemc/core/kernel.hh @@ -46,7 +46,7 @@ class Kernel : public SimObject { public: typedef SystemC_KernelParams Params; - Kernel(const Params ¶ms); + Kernel(const Params ¶ms, int); void init() override; void regStats() override; From 6572078a992d0e1f98c812f7d9820722096991c9 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 11 Mar 2021 04:26:17 -0800 Subject: [PATCH 11/25] cpu: Delete unnecessary create() methods. These were added in changes which were created before create() methods were mostly automated, but were checked in after the then unnecessary create() methods were purged. Change-Id: I03da797ae8328fab6ef6b85dbc4ea86b34512fd5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42743 Reviewed-by: Daniel Carvalho Reviewed-by: Jason Lowe-Power Maintainer: Gabe Black Tested-by: kokoro --- src/cpu/testers/gpu_ruby_test/cpu_thread.cc | 6 ------ src/cpu/testers/gpu_ruby_test/dma_thread.cc | 6 ------ src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc | 6 ------ src/cpu/testers/gpu_ruby_test/protocol_tester.cc | 6 ------ 4 files changed, 24 deletions(-) diff --git a/src/cpu/testers/gpu_ruby_test/cpu_thread.cc b/src/cpu/testers/gpu_ruby_test/cpu_thread.cc index fa801b3ed7..d0ac10ea81 100644 --- a/src/cpu/testers/gpu_ruby_test/cpu_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/cpu_thread.cc @@ -43,12 +43,6 @@ CpuThread::CpuThread(const Params &p) assert(numLanes == 1); } -CpuThread* -CpuThreadParams::create() const -{ - return new CpuThread(*this); -} - void CpuThread::issueLoadOps() { diff --git a/src/cpu/testers/gpu_ruby_test/dma_thread.cc b/src/cpu/testers/gpu_ruby_test/dma_thread.cc index 254158dc00..e5f79c973f 100644 --- a/src/cpu/testers/gpu_ruby_test/dma_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/dma_thread.cc @@ -48,12 +48,6 @@ DmaThread::~DmaThread() } -DmaThread* -DmaThreadParams::create() const -{ - return new DmaThread(*this); -} - void DmaThread::issueLoadOps() { diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc index f2f1343e8a..a90b204da5 100644 --- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -48,12 +48,6 @@ GpuWavefront::~GpuWavefront() } -GpuWavefront* -GpuWavefrontParams::create() const -{ - return new GpuWavefront(*this); -} - void GpuWavefront::issueLoadOps() { diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc index a8f84081c4..95e6035805 100644 --- a/src/cpu/testers/gpu_ruby_test/protocol_tester.cc +++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.cc @@ -357,9 +357,3 @@ ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt) return true; } - -ProtocolTester* -ProtocolTesterParams::create() const -{ - return new ProtocolTester(*this); -} From 7bb690c1ee70b1c82b23eeb68dcda3dc6e5c97f7 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Mon, 8 Mar 2021 22:16:41 -0800 Subject: [PATCH 12/25] scons,python: Always generate default create() methods. We were originally generating default create() methods along side the pybind definitions, but unfortunately those are only included when python support is included. Since the SimObject Param structs are unconditionally provided even if the thing calling their create() methods is not, we need to also unconditionally provide the default create() definitions. We do that by putting them in their own new .cc files. Change-Id: I29d1573d578794b3fe7ec2bc16ef5c8c58e56d0e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42589 Maintainer: Gabe Black Tested-by: kokoro Reviewed-by: Jason Lowe-Power Reviewed-by: Earl Ou --- src/SConscript | 21 +++++- src/python/m5/SimObject.py | 149 +++++++++++++++++++------------------ 2 files changed, 94 insertions(+), 76 deletions(-) diff --git a/src/SConscript b/src/SConscript index 5fe0ab2c25..31fce0c6bf 100644 --- a/src/SConscript +++ b/src/SConscript @@ -917,7 +917,7 @@ PySource('m5', 'python/m5/info.py') # Create all of the SimObject param headers and enum headers # -def createSimObjectParamStruct(target, source, env): +def createSimObjectParamDecl(target, source, env): assert len(target) == 1 and len(source) == 1 name = source[0].get_text_contents() @@ -927,6 +927,16 @@ def createSimObjectParamStruct(target, source, env): obj.cxx_param_decl(code) code.write(target[0].abspath) +def createSimObjectParamDef(target, source, env): + assert len(target) == 1 and len(source) == 1 + + name = source[0].get_text_contents() + obj = sim_objects[name] + + code = code_formatter() + obj.cxx_param_def(code) + code.write(target[0].abspath) + def createSimObjectCxxConfig(is_header): def body(target, source, env): assert len(target) == 1 and len(source) == 1 @@ -987,9 +997,16 @@ for name,simobj in sorted(sim_objects.items()): hh_file = File('params/%s.hh' % name) params_hh_files.append(hh_file) env.Command(hh_file, Value(name), - MakeAction(createSimObjectParamStruct, Transform("SO PARAM"))) + MakeAction(createSimObjectParamDecl, Transform("SOPARMHH"))) env.Depends(hh_file, depends + extra_deps) + if not getattr(simobj, 'abstract', False) and hasattr(simobj, 'type'): + cc_file = File('params/%s.cc' % name) + env.Command(cc_file, Value(name), + MakeAction(createSimObjectParamDef, Transform("SOPARMCC"))) + env.Depends(cc_file, depends + extra_deps) + Source(cc_file) + # C++ parameter description files if GetOption('with_cxx_config'): for name,simobj in sorted(sim_objects.items()): diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py index e604a20a99..bdce718172 100644 --- a/src/python/m5/SimObject.py +++ b/src/python/m5/SimObject.py @@ -368,7 +368,7 @@ def createCxxConfigDirectoryEntryFile(code, name, simobj, is_header): if not is_header: code('{') - if hasattr(simobj, 'abstract') and simobj.abstract: + if getattr(simobj, 'abstract', False): code(' return NULL;') else: code(' return this->create();') @@ -700,6 +700,80 @@ class MetaSimObject(type): def pybind_predecls(cls, code): code('#include "${{cls.cxx_header}}"') + def cxx_param_def(cls, code): + code(''' +#include + +#include "base/compiler.hh" + +#include "${{cls.cxx_header}}" +#include "params/${cls}.hh" + +''') + code() + code('namespace') + code('{') + code() + # If we can't define a default create() method for this params struct + # because the SimObject doesn't have the right constructor, use + # template magic to make it so we're actually defining a create method + # for this class instead. + code('class Dummy${cls}ParamsClass') + code('{') + code(' public:') + code(' ${{cls.cxx_class}} *create() const;') + code('};') + code() + code('template ') + code('class Dummy${cls}Shunt;') + code() + # This version directs to the real Params struct and the default + # behavior of create if there's an appropriate constructor. + code('template ') + code('class Dummy${cls}Shunt::value>>') + code('{') + code(' public:') + code(' using Params = ${cls}Params;') + code(' static ${{cls.cxx_class}} *') + code(' create(const Params &p)') + code(' {') + code(' return new CxxClass(p);') + code(' }') + code('};') + code() + # This version diverts to the DummyParamsClass and a dummy + # implementation of create if the appropriate constructor does not + # exist. + code('template ') + code('class Dummy${cls}Shunt::value>>') + code('{') + code(' public:') + code(' using Params = Dummy${cls}ParamsClass;') + code(' static ${{cls.cxx_class}} *') + code(' create(const Params &p)') + code(' {') + code(' return nullptr;') + code(' }') + code('};') + code() + code('} // anonymous namespace') + code() + # An implementation of either the real Params struct's create + # method, or the Dummy one. Either an implementation is + # mandantory since this was shunted off to the dummy class, or + # one is optional which will override this weak version. + code('M5_VAR_USED ${{cls.cxx_class}} *') + code('Dummy${cls}Shunt<${{cls.cxx_class}}>::Params::create() const') + code('{') + code(' return Dummy${cls}Shunt<${{cls.cxx_class}}>::') + code(' create(*this);') + code('}') + + def pybind_decl(cls, code): py_class_name = cls.pybind_class @@ -713,9 +787,6 @@ class MetaSimObject(type): code('''#include "pybind11/pybind11.h" #include "pybind11/stl.h" -#include - -#include "base/compiler.hh" #include "params/$cls.hh" #include "python/pybind11/core.hh" #include "sim/init.hh" @@ -797,76 +868,6 @@ module_init(py::module_ &m_internal) code() code('static EmbeddedPyBind embed_obj("${0}", module_init, "${1}");', cls, cls._base.type if cls._base else "") - if not hasattr(cls, 'abstract') or not cls.abstract: - if 'type' in cls.__dict__: - code() - # This namespace can't *actually* be anonymous, or the compiler - # gets upset about having a weak symbol init. - code('namespace anonymous_params') - code('{') - code() - # If we can't define a default create() method for this params - # struct because the SimObject doesn't have the right - # constructor, use template magic to make it so we're actually - # defining a create method for this class instead. - code('class Dummy${cls}ParamsClass') - code('{') - code(' public:') - code(' ${{cls.cxx_class}} *create() const;') - code('};') - code() - code('template ') - code('class DummyShunt;') - code() - # This version directs to the real Params struct and the - # default behavior of create if there's an appropriate - # constructor. - code('template ') - code('class DummyShunt::value>>') - code('{') - code(' public:') - code(' using Params = ${cls}Params;') - code(' static ${{cls.cxx_class}} *') - code(' create(const Params &p)') - code(' {') - code(' return new CxxClass(p);') - code(' }') - code('};') - code() - # This version diverts to the DummyParamsClass and a dummy - # implementation of create if the appropriate constructor does - # not exist. - code('template ') - code('class DummyShunt::value>>') - code('{') - code(' public:') - code(' using Params = Dummy${cls}ParamsClass;') - code(' static ${{cls.cxx_class}} *') - code(' create(const Params &p)') - code(' {') - code(' return nullptr;') - code(' }') - code('};') - code() - code('} // namespace anonymous_params') - code() - code('using namespace anonymous_params;') - code() - # A weak implementation of either the real Params struct's - # create method, or the Dummy one if we don't want to have - # any default implementation. Either an implementation is - # mandantory since this was shunted off to the dummy class, or - # one is optional which will override this weak version. - code('M5_WEAK ${{cls.cxx_class}} *') - code('DummyShunt<${{cls.cxx_class}}>::Params::create() const') - code('{') - code(' return DummyShunt<${{cls.cxx_class}}>::') - code(' create(*this);') - code('}') _warned_about_nested_templates = False From 975fcf1355f6d3e0bb35a4477b032a1721fc7e9d Mon Sep 17 00:00:00 2001 From: Peter Yuen Date: Fri, 26 Feb 2021 15:43:50 +0700 Subject: [PATCH 13/25] arch-riscv: Fixed CPU switching and PLIC issue with MinorCPU Added takeover methods for PMA Checker and RiscvTLB to ensure that checkpoint restoration works. Also added logic in PLIC to prevent posting interrupts to a CPU that has yet to complete the current interrupt. PLIC's behaviour when a CPU claims another interrupt before completion is also changed. Now PLIC will return the uncompleted interrupt ID instead of return 0. This behaviour is not documented in the specs but is designed this way to avoid issues from CPU side (especially MinorCPU). Change-Id: I68eaaf56d2c4d76cc1e0a1e2160f5abe184c2cd5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/41933 Maintainer: Bobby R. Bruce Tested-by: kokoro Reviewed-by: Ayaz Akram --- src/arch/generic/mmu.hh | 2 +- src/arch/riscv/RiscvMMU.py | 4 +++- src/arch/riscv/RiscvTLB.py | 4 ++-- src/arch/riscv/mmu.hh | 13 ++++++++++++- src/arch/riscv/pma_checker.cc | 6 ++++++ src/arch/riscv/pma_checker.hh | 2 ++ src/arch/riscv/tlb.cc | 6 ++++++ src/arch/riscv/tlb.hh | 14 +++++++++++++- src/dev/riscv/plic.cc | 29 ++++++++++++++++++++++------- 9 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/arch/generic/mmu.hh b/src/arch/generic/mmu.hh index 976527397a..79e53dcf45 100644 --- a/src/arch/generic/mmu.hh +++ b/src/arch/generic/mmu.hh @@ -103,7 +103,7 @@ class BaseMMU : public SimObject return getTlb(mode)->finalizePhysical(req, tc, mode); } - void takeOverFrom(BaseMMU *old_mmu); + virtual void takeOverFrom(BaseMMU *old_mmu); public: BaseTLB* dtb; diff --git a/src/arch/riscv/RiscvMMU.py b/src/arch/riscv/RiscvMMU.py index 4ff477ec61..38f1da924a 100644 --- a/src/arch/riscv/RiscvMMU.py +++ b/src/arch/riscv/RiscvMMU.py @@ -35,6 +35,8 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from m5.params import * + from m5.objects.BaseMMU import BaseMMU from m5.objects.RiscvTLB import RiscvTLB from m5.objects.PMAChecker import PMAChecker @@ -45,7 +47,7 @@ class RiscvMMU(BaseMMU): cxx_header = 'arch/riscv/mmu.hh' itb = RiscvTLB() dtb = RiscvTLB() - pma_checker = PMAChecker() + pma_checker = Param.PMAChecker(PMAChecker(), "PMA Checker") @classmethod def walkerPorts(cls): diff --git a/src/arch/riscv/RiscvTLB.py b/src/arch/riscv/RiscvTLB.py index b419262390..05ff521dff 100644 --- a/src/arch/riscv/RiscvTLB.py +++ b/src/arch/riscv/RiscvTLB.py @@ -43,7 +43,7 @@ class RiscvPagetableWalker(ClockedObject): num_squash_per_cycle = Param.Unsigned(4, "Number of outstanding walks that can be squashed per cycle") # Grab the pma_checker from the MMU - pma_checker = Param.PMAChecker(Parent.any, "PMA Chekcer") + pma_checker = Param.PMAChecker(Parent.any, "PMA Checker") class RiscvTLB(BaseTLB): type = 'RiscvTLB' @@ -53,4 +53,4 @@ class RiscvTLB(BaseTLB): walker = Param.RiscvPagetableWalker(\ RiscvPagetableWalker(), "page table walker") # Grab the pma_checker from the MMU - pma_checker = Param.PMAChecker(Parent.any, "PMA Chekcer") + pma_checker = Param.PMAChecker(Parent.any, "PMA Checker") diff --git a/src/arch/riscv/mmu.hh b/src/arch/riscv/mmu.hh index 322f0afb87..ce3ce307ac 100644 --- a/src/arch/riscv/mmu.hh +++ b/src/arch/riscv/mmu.hh @@ -40,6 +40,7 @@ #include "arch/generic/mmu.hh" #include "arch/riscv/isa.hh" +#include "arch/riscv/pma_checker.hh" #include "arch/riscv/tlb.hh" #include "params/RiscvMMU.hh" @@ -49,8 +50,10 @@ namespace RiscvISA { class MMU : public BaseMMU { public: + PMAChecker *pma; + MMU(const RiscvMMUParams &p) - : BaseMMU(p) + : BaseMMU(p), pma(p.pma_checker) {} PrivilegeMode @@ -64,6 +67,14 @@ class MMU : public BaseMMU { return static_cast(dtb)->getWalker(); } + + void + takeOverFrom(BaseMMU *old_mmu) override + { + MMU *ommu = dynamic_cast(old_mmu); + BaseMMU::takeOverFrom(ommu); + pma->takeOverFrom(ommu->pma); + } }; } // namespace RiscvISA diff --git a/src/arch/riscv/pma_checker.cc b/src/arch/riscv/pma_checker.cc index 32cb66d70a..d36dc1dcb8 100644 --- a/src/arch/riscv/pma_checker.cc +++ b/src/arch/riscv/pma_checker.cc @@ -81,3 +81,9 @@ PMAChecker::isUncacheable(PacketPtr pkt) { return isUncacheable(pkt->getAddrRange()); } + +void +PMAChecker::takeOverFrom(PMAChecker *old) +{ + uncacheable = old->uncacheable; +} diff --git a/src/arch/riscv/pma_checker.hh b/src/arch/riscv/pma_checker.hh index 5833dbe610..298d4a0a72 100644 --- a/src/arch/riscv/pma_checker.hh +++ b/src/arch/riscv/pma_checker.hh @@ -74,6 +74,8 @@ class PMAChecker : public SimObject bool isUncacheable(const AddrRange &range); bool isUncacheable(const Addr &addr, const unsigned size); bool isUncacheable(PacketPtr pkt); + + void takeOverFrom(PMAChecker *old); }; #endif // __ARCH_RISCV_PMA_CHECKER_HH__ diff --git a/src/arch/riscv/tlb.cc b/src/arch/riscv/tlb.cc index 5109d2adfc..b7b09849dc 100644 --- a/src/arch/riscv/tlb.cc +++ b/src/arch/riscv/tlb.cc @@ -519,3 +519,9 @@ TLB::TlbStats::TlbStats(Stats::Group *parent) readAccesses + writeAccesses) { } + +Port * +TLB::getTableWalkerPort() +{ + return &walker->getPort("port"); +} \ No newline at end of file diff --git a/src/arch/riscv/tlb.hh b/src/arch/riscv/tlb.hh index 8dcf0fcf21..9c7172a26c 100644 --- a/src/arch/riscv/tlb.hh +++ b/src/arch/riscv/tlb.hh @@ -92,7 +92,7 @@ class TLB : public BaseTLB Walker *getWalker(); - void takeOverFrom(BaseTLB *otlb) override {} + void takeOverFrom(BaseTLB *old) override {} TlbEntry *insert(Addr vpn, const TlbEntry &entry); void flushAll() override; @@ -108,6 +108,18 @@ class TLB : public BaseTLB void serialize(CheckpointOut &cp) const override; void unserialize(CheckpointIn &cp) override; + /** + * Get the table walker port. This is used for + * migrating port connections during a CPU takeOverFrom() + * call. For architectures that do not have a table walker, + * NULL is returned, hence the use of a pointer rather than a + * reference. For RISC-V this method will always return a valid + * port pointer. + * + * @return A pointer to the walker port + */ + Port *getTableWalkerPort() override; + Addr translateWithTLB(Addr vaddr, uint16_t asid, Mode mode); Fault translateAtomic(const RequestPtr &req, diff --git a/src/dev/riscv/plic.cc b/src/dev/riscv/plic.cc index 60ac192201..2183183b87 100644 --- a/src/dev/riscv/plic.cc +++ b/src/dev/riscv/plic.cc @@ -354,17 +354,18 @@ Plic::readClaim(Register32& reg, const int context_id) context_id, max_int_id); clear(max_int_id); reg.update(max_int_id); + return reg.get(); } else { DPRINTF(Plic, "Claim already cleared - context: %d, interrupt ID: %d\n", context_id, max_int_id); - reg.update(0); + return 0; } } else { - warn("PLIC claim failed (not completed) - context: %d", context_id); - reg.update(0); + warn("PLIC claim repeated (not completed) - context: %d, last: %d", + context_id, lastID[context_id]); + return lastID[context_id]; } - return reg.get(); } void @@ -381,6 +382,7 @@ Plic::writeClaim(Register32& reg, const uint32_t& data, const int context_id) DPRINTF(Plic, "Complete - context: %d, interrupt ID: %d\n", context_id, reg.get()); + updateInt(); } void @@ -445,11 +447,11 @@ Plic::updateInt() uint32_t max_id = output.maxID[i]; uint32_t priority = output.maxPriority[i]; uint32_t threshold = registers.threshold[i].get(); - if (priority > threshold && max_id > 0) { + if (priority > threshold && max_id > 0 && lastID[i] == 0) { DPRINTF(Plic, "Int posted - thread: %d, int id: %d, ", thread_id, int_id); - DPRINTF(Plic, + DPRINTFR(Plic, "pri: %d, thres: %d\n", priority, threshold); intrctrl->post(thread_id, int_id, 0); } else { @@ -457,7 +459,7 @@ Plic::updateInt() DPRINTF(Plic, "Int filtered - thread: %d, int id: %d, ", thread_id, int_id); - DPRINTF(Plic, + DPRINTFR(Plic, "pri: %d, thres: %d\n", priority, threshold); } intrctrl->clear(thread_id, int_id, 0); @@ -499,6 +501,12 @@ Plic::serialize(CheckpointOut &cp) const SERIALIZE_SCALAR(n_outputs); SERIALIZE_CONTAINER(output.maxID); SERIALIZE_CONTAINER(output.maxPriority); + SERIALIZE_CONTAINER(pendingPriority); + for (int i=0; i < effPriority.size(); i++) { + arrayParamOut(cp, std::string("effPriority") + + std::to_string(i), effPriority[i]); + } + SERIALIZE_CONTAINER(lastID); } void @@ -541,4 +549,11 @@ Plic::unserialize(CheckpointIn &cp) } UNSERIALIZE_CONTAINER(output.maxID); UNSERIALIZE_CONTAINER(output.maxPriority); + UNSERIALIZE_CONTAINER(pendingPriority); + for (int i=0; i < effPriority.size(); i++) { + arrayParamIn(cp, std::string("effPriority") + + std::to_string(i), effPriority[i]); + } + UNSERIALIZE_CONTAINER(lastID); + updateInt(); } From fb1e29a17b16de12f31030dbc368144a9ee8196e Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Sat, 13 Mar 2021 22:29:48 +0000 Subject: [PATCH 14/25] configs: Remove icache from HTMSequencer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit icache has been removed by the Sequencer object with: https://gem5-review.googlesource.com/c/public/gem5/+/31267 Signed-off-by: Giacomo Travaglini Change-Id: Id2110a804ebbd942d02fef63b8becebbbb57683e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42923 Reviewed-by: Tiago Mück Maintainer: Bobby R. Bruce Tested-by: kokoro --- configs/ruby/MESI_Three_Level_HTM.py | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index 89ca93c61d..b51108dbb0 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -141,7 +141,6 @@ def create_system(options, full_system, system, dma_ports, bootmem, ruby_system = ruby_system) cpu_seq = RubyHTMSequencer(version = i * num_cpus_per_cluster + j, - icache = l0i_cache, clk_domain = clk_domain, dcache = l0d_cache, ruby_system = ruby_system) From 18065258336cdea65895ed726bddfa01a96035c1 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Sun, 14 Mar 2021 16:12:05 +0000 Subject: [PATCH 15/25] dev-arm: Fix SMMUv3BaseCache Stats After [1] the SMMUv3BaseCache stats are undistinguible within each other. With this patch we are adding a string to their constructor so that we can distinguish between an IPA, Config etc cache stat [1]: https://gem5-review.googlesource.com/c/public/gem5/+/36415 Signed-off-by: Giacomo Travaglini Change-Id: Iaa84ed948cf2a4c36ea4fcda589676b9bbeed6fd Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42983 Reviewed-by: Jason Lowe-Power Reviewed-by: Hoa Nguyen Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/dev/arm/smmu_v3_caches.cc | 28 +++++++++++++++------------- src/dev/arm/smmu_v3_caches.hh | 9 +++++---- src/dev/arm/smmu_v3_deviceifc.cc | 4 ++-- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/dev/arm/smmu_v3_caches.cc b/src/dev/arm/smmu_v3_caches.cc index 8bea3eacba..59f5acad03 100644 --- a/src/dev/arm/smmu_v3_caches.cc +++ b/src/dev/arm/smmu_v3_caches.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2018-2019 ARM Limited + * Copyright (c) 2014, 2018-2019, 2021 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -59,12 +59,12 @@ */ SMMUv3BaseCache::SMMUv3BaseCache(const std::string &policy_name, uint32_t seed, - Stats::Group *parent) : - replacementPolicy(decodePolicyName(policy_name)), + Stats::Group *parent, const std::string &name) + : replacementPolicy(decodePolicyName(policy_name)), nextToReplace(0), random(seed), useStamp(0), - baseCacheStats(parent) + baseCacheStats(parent, name) {} int @@ -82,8 +82,9 @@ SMMUv3BaseCache::decodePolicyName(const std::string &policy_name) } SMMUv3BaseCache:: -SMMUv3BaseCacheStats::SMMUv3BaseCacheStats(Stats::Group *parent) - : Stats::Group(parent), +SMMUv3BaseCacheStats::SMMUv3BaseCacheStats( + Stats::Group *parent, const std::string &name) + : Stats::Group(parent, name.c_str()), ADD_STAT(averageLookups, UNIT_RATE(Stats::Units::Count, Stats::Units::Second), "Average number lookups per second"), @@ -144,9 +145,10 @@ SMMUv3BaseCacheStats::SMMUv3BaseCacheStats(Stats::Group *parent) */ SMMUTLB::SMMUTLB(unsigned numEntries, unsigned _associativity, - const std::string &policy, Stats::Group *parent) + const std::string &policy, Stats::Group *parent, + const std::string &name) : - SMMUv3BaseCache(policy, SMMUTLB_SEED, parent), + SMMUv3BaseCache(policy, SMMUTLB_SEED, parent, name), associativity(_associativity) { if (associativity == 0) @@ -426,7 +428,7 @@ SMMUTLB::pickEntryIdxToReplace(const Set &set, AllocPolicy alloc) ARMArchTLB::ARMArchTLB(unsigned numEntries, unsigned _associativity, const std::string &policy, Stats::Group *parent) : - SMMUv3BaseCache(policy, ARMARCHTLB_SEED, parent), + SMMUv3BaseCache(policy, ARMARCHTLB_SEED, parent, "tlb"), associativity(_associativity) { if (associativity == 0) @@ -625,7 +627,7 @@ ARMArchTLB::pickEntryIdxToReplace(const Set &set) IPACache::IPACache(unsigned numEntries, unsigned _associativity, const std::string &policy, Stats::Group *parent) : - SMMUv3BaseCache(policy, IPACACHE_SEED, parent), + SMMUv3BaseCache(policy, IPACACHE_SEED, parent, "ipa"), associativity(_associativity) { if (associativity == 0) @@ -805,7 +807,7 @@ IPACache::pickEntryIdxToReplace(const Set &set) ConfigCache::ConfigCache(unsigned numEntries, unsigned _associativity, const std::string &policy, Stats::Group *parent) : - SMMUv3BaseCache(policy, CONFIGCACHE_SEED, parent), + SMMUv3BaseCache(policy, CONFIGCACHE_SEED, parent, "cfg"), associativity(_associativity) { if (associativity == 0) @@ -969,7 +971,7 @@ ConfigCache::pickEntryIdxToReplace(const Set &set) WalkCache::WalkCache(const std::array &_sizes, unsigned _associativity, const std::string &policy, Stats::Group *parent) : - SMMUv3BaseCache(policy, WALKCACHE_SEED, parent), + SMMUv3BaseCache(policy, WALKCACHE_SEED, parent, "walk"), walkCacheStats(&(SMMUv3BaseCache::baseCacheStats)), associativity(_associativity), sizes() @@ -1226,7 +1228,7 @@ WalkCache::pickEntryIdxToReplace(const Set &set, WalkCache:: WalkCacheStats::WalkCacheStats(Stats::Group *parent) - : Stats::Group(parent, "WalkCache") + : Stats::Group(parent) { using namespace Stats; diff --git a/src/dev/arm/smmu_v3_caches.hh b/src/dev/arm/smmu_v3_caches.hh index 640710f345..dee09f2314 100644 --- a/src/dev/arm/smmu_v3_caches.hh +++ b/src/dev/arm/smmu_v3_caches.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2018-2019 ARM Limited + * Copyright (c) 2014, 2018-2019, 2021 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -67,7 +67,7 @@ class SMMUv3BaseCache struct SMMUv3BaseCacheStats : public Stats::Group { - SMMUv3BaseCacheStats(Stats::Group *parent); + SMMUv3BaseCacheStats(Stats::Group *parent, const std::string &name); Stats::Formula averageLookups; Stats::Scalar totalLookups; @@ -87,7 +87,7 @@ class SMMUv3BaseCache public: SMMUv3BaseCache(const std::string &policy_name, uint32_t seed, - Stats::Group *parent); + Stats::Group *parent, const std::string &name); virtual ~SMMUv3BaseCache() {} }; @@ -122,7 +122,8 @@ class SMMUTLB : public SMMUv3BaseCache }; SMMUTLB(unsigned numEntries, unsigned _associativity, - const std::string &policy, Stats::Group *parent); + const std::string &policy, Stats::Group *parent, + const std::string &name); SMMUTLB(const SMMUTLB& tlb) = delete; virtual ~SMMUTLB() {} diff --git a/src/dev/arm/smmu_v3_deviceifc.cc b/src/dev/arm/smmu_v3_deviceifc.cc index 182c6d213e..6683e76968 100644 --- a/src/dev/arm/smmu_v3_deviceifc.cc +++ b/src/dev/arm/smmu_v3_deviceifc.cc @@ -49,11 +49,11 @@ SMMUv3DeviceInterface::SMMUv3DeviceInterface( microTLB(new SMMUTLB(p.utlb_entries, p.utlb_assoc, p.utlb_policy, - this)), + this, "utlb")), mainTLB(new SMMUTLB(p.tlb_entries, p.tlb_assoc, p.tlb_policy, - this)), + this, "maintlb")), microTLBEnable(p.utlb_enable), mainTLBEnable(p.tlb_enable), devicePortSem(1), From d8b172917a99fc9867f54351b6a23f12034a6ef3 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Sun, 14 Mar 2021 17:03:04 +0000 Subject: [PATCH 16/25] dev-arm: Fix WalkCache stats The WalkCache stats are wrongly using the legacy framework. With this patch we are registering those to the hierarchical structure. As we need to pass the Stats::Group parent at construction time, we are replacing 2d arrays with Vector2d for count stats and using a flat vector of pointers for the Formula stats Signed-off-by: Giacomo Travaglini Change-Id: I8992bc262a376e4e81a4d608c11dff6902e0a01d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42984 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/dev/arm/smmu_v3_caches.cc | 121 ++++++++++++++++++++++------------ src/dev/arm/smmu_v3_caches.hh | 17 ++--- 2 files changed, 88 insertions(+), 50 deletions(-) diff --git a/src/dev/arm/smmu_v3_caches.cc b/src/dev/arm/smmu_v3_caches.cc index 59f5acad03..b09fb31e72 100644 --- a/src/dev/arm/smmu_v3_caches.cc +++ b/src/dev/arm/smmu_v3_caches.cc @@ -1228,68 +1228,105 @@ WalkCache::pickEntryIdxToReplace(const Set &set, WalkCache:: WalkCacheStats::WalkCacheStats(Stats::Group *parent) - : Stats::Group(parent) + : Stats::Group(parent), + ADD_STAT(totalLookupsByStageLevel, UNIT_COUNT, + "Total number of lookups"), + ADD_STAT(totalMissesByStageLevel, UNIT_COUNT, + "Total number of misses"), + ADD_STAT(totalUpdatesByStageLevel, UNIT_COUNT, + "Total number of updates"), + ADD_STAT(insertionsByStageLevel, UNIT_COUNT, + "Number of insertions (not replacements)") { using namespace Stats; + totalLookupsByStageLevel + .init(2, WALK_CACHE_LEVELS) + .flags(pdf); + totalMissesByStageLevel + .init(2, WALK_CACHE_LEVELS) + .flags(pdf); + totalUpdatesByStageLevel + .init(2, WALK_CACHE_LEVELS) + .flags(pdf); + insertionsByStageLevel + .init(2, WALK_CACHE_LEVELS) + .flags(pdf); + for (int s = 0; s < 2; s++) { + totalLookupsByStageLevel.subname(s, csprintf("S%d", s + 1)); + totalMissesByStageLevel.subname(s, csprintf("S%d", s + 1)); + totalUpdatesByStageLevel.subname(s, csprintf("S%d", s + 1)); + insertionsByStageLevel.subname(s, csprintf("S%d", s + 1)); + for (int l = 0; l < WALK_CACHE_LEVELS; l++) { - averageLookupsByStageLevel[s][l] - .name(csprintf("averageLookupsS%dL%d", s+1, l)) - .desc("Average number lookups per second") - .flags(pdf); + totalLookupsByStageLevel.ysubname(l, csprintf("L%d", l)); + totalMissesByStageLevel.ysubname(l, csprintf("L%d", l)); + totalUpdatesByStageLevel.ysubname(l, csprintf("L%d", l)); + insertionsByStageLevel.ysubname(l, csprintf("L%d", l)); - totalLookupsByStageLevel[s][l] - .name(csprintf("totalLookupsS%dL%d", s+1, l)) - .desc("Total number of lookups") - .flags(pdf); + auto avg_lookup = new Stats::Formula( + this, + csprintf("averageLookups_S%dL%d", s+1, l).c_str(), + UNIT_RATE(Stats::Units::Count, Stats::Units::Second), + "Average number lookups per second"); + avg_lookup->flags(pdf); + averageLookupsByStageLevel.push_back(avg_lookup); - averageLookupsByStageLevel[s][l] = + *avg_lookup = totalLookupsByStageLevel[s][l] / simSeconds; + auto avg_misses = new Stats::Formula( + this, + csprintf("averageMisses_S%dL%d", s+1, l).c_str(), + UNIT_RATE(Stats::Units::Count, Stats::Units::Second), + "Average number misses per second"); + avg_misses->flags(pdf); + averageMissesByStageLevel.push_back(avg_misses); - averageMissesByStageLevel[s][l] - .name(csprintf("averageMissesS%dL%d", s+1, l)) - .desc("Average number misses per second") - .flags(pdf); - - totalMissesByStageLevel[s][l] - .name(csprintf("totalMissesS%dL%d", s+1, l)) - .desc("Total number of misses") - .flags(pdf); - - averageMissesByStageLevel[s][l] = + *avg_misses = totalMissesByStageLevel[s][l] / simSeconds; + auto avg_updates = new Stats::Formula( + this, + csprintf("averageUpdates_S%dL%d", s+1, l).c_str(), + UNIT_RATE(Stats::Units::Count, Stats::Units::Second), + "Average number updates per second"); + avg_updates->flags(pdf); + averageUpdatesByStageLevel.push_back(avg_updates); - averageUpdatesByStageLevel[s][l] - .name(csprintf("averageUpdatesS%dL%d", s+1, l)) - .desc("Average number updates per second") - .flags(pdf); - - totalUpdatesByStageLevel[s][l] - .name(csprintf("totalUpdatesS%dL%d", s+1, l)) - .desc("Total number of updates") - .flags(pdf); - - averageUpdatesByStageLevel[s][l] = + *avg_updates = totalUpdatesByStageLevel[s][l] / simSeconds; + auto avg_hitrate = new Stats::Formula( + this, + csprintf("averageHitRate_S%dL%d", s+1, l).c_str(), + UNIT_RATIO, + "Average hit rate"); + avg_hitrate->flags(pdf); + averageHitRateByStageLevel.push_back(avg_hitrate); - averageHitRateByStageLevel[s][l] - .name(csprintf("averageHitRateS%dL%d", s+1, l)) - .desc("Average hit rate") - .flags(pdf); - - averageHitRateByStageLevel[s][l] = + *avg_hitrate = (totalLookupsByStageLevel[s][l] - totalMissesByStageLevel[s][l]) / totalLookupsByStageLevel[s][l]; - insertionsByStageLevel[s][l] - .name(csprintf("insertionsS%dL%d", s+1, l)) - .desc("Number of insertions (not replacements)") - .flags(pdf); } } } + +WalkCache:: +WalkCacheStats::~WalkCacheStats() +{ + for (auto avg_lookup : averageLookupsByStageLevel) + delete avg_lookup; + + for (auto avg_miss : averageMissesByStageLevel) + delete avg_miss; + + for (auto avg_update : averageUpdatesByStageLevel) + delete avg_update; + + for (auto avg_hitrate : averageHitRateByStageLevel) + delete avg_hitrate; +} diff --git a/src/dev/arm/smmu_v3_caches.hh b/src/dev/arm/smmu_v3_caches.hh index dee09f2314..07c6242d75 100644 --- a/src/dev/arm/smmu_v3_caches.hh +++ b/src/dev/arm/smmu_v3_caches.hh @@ -325,22 +325,23 @@ class WalkCache : public SMMUv3BaseCache struct WalkCacheStats : public Stats::Group { WalkCacheStats(Stats::Group *parent); + ~WalkCacheStats(); unsigned int lookupsByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Formula averageLookupsByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Scalar totalLookupsByStageLevel[2][WALK_CACHE_LEVELS]; + std::vector averageLookupsByStageLevel; + Stats::Vector2d totalLookupsByStageLevel; unsigned int missesByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Formula averageMissesByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Scalar totalMissesByStageLevel[2][WALK_CACHE_LEVELS]; + std::vector averageMissesByStageLevel; + Stats::Vector2d totalMissesByStageLevel; unsigned int updatesByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Formula averageUpdatesByStageLevel[2][WALK_CACHE_LEVELS]; - Stats::Scalar totalUpdatesByStageLevel[2][WALK_CACHE_LEVELS]; + std::vector averageUpdatesByStageLevel; + Stats::Vector2d totalUpdatesByStageLevel; - Stats::Formula averageHitRateByStageLevel[2][WALK_CACHE_LEVELS]; + std::vector averageHitRateByStageLevel; - Stats::Scalar insertionsByStageLevel[2][WALK_CACHE_LEVELS]; + Stats::Vector2d insertionsByStageLevel; } walkCacheStats; private: typedef std::vector Set; From 9ffcf154714c6c8594bd0eca7234ba78e414e95a Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Sun, 14 Mar 2021 21:59:19 +0000 Subject: [PATCH 17/25] dev-arm: Remove unused SMMUv3 WalkCache variables Those were grouped within the stats data structures but were not actually stats Signed-off-by: Giacomo Travaglini Change-Id: I01bbbada423825ded04a033c0709108e2980ec70 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42985 Reviewed-by: Jason Lowe-Power Reviewed-by: Hoa Nguyen Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/dev/arm/smmu_v3_caches.cc | 3 --- src/dev/arm/smmu_v3_caches.hh | 3 --- 2 files changed, 6 deletions(-) diff --git a/src/dev/arm/smmu_v3_caches.cc b/src/dev/arm/smmu_v3_caches.cc index b09fb31e72..069e0702a2 100644 --- a/src/dev/arm/smmu_v3_caches.cc +++ b/src/dev/arm/smmu_v3_caches.cc @@ -1043,10 +1043,8 @@ WalkCache::lookup(Addr va, Addr vaMask, if (result == NULL) baseCacheStats.totalMisses++; - walkCacheStats.lookupsByStageLevel[stage-1][level]++; walkCacheStats.totalLookupsByStageLevel[stage-1][level]++; if (result == NULL) { - walkCacheStats.missesByStageLevel[stage-1][level]++; walkCacheStats.totalMissesByStageLevel[stage-1][level]++; } } @@ -1079,7 +1077,6 @@ WalkCache::store(const Entry &incoming) } baseCacheStats.totalUpdates++; - walkCacheStats.updatesByStageLevel[incoming.stage-1][incoming.level]++; walkCacheStats .totalUpdatesByStageLevel[incoming.stage-1][incoming.level]++; } diff --git a/src/dev/arm/smmu_v3_caches.hh b/src/dev/arm/smmu_v3_caches.hh index 07c6242d75..a6faf6a89d 100644 --- a/src/dev/arm/smmu_v3_caches.hh +++ b/src/dev/arm/smmu_v3_caches.hh @@ -327,15 +327,12 @@ class WalkCache : public SMMUv3BaseCache WalkCacheStats(Stats::Group *parent); ~WalkCacheStats(); - unsigned int lookupsByStageLevel[2][WALK_CACHE_LEVELS]; std::vector averageLookupsByStageLevel; Stats::Vector2d totalLookupsByStageLevel; - unsigned int missesByStageLevel[2][WALK_CACHE_LEVELS]; std::vector averageMissesByStageLevel; Stats::Vector2d totalMissesByStageLevel; - unsigned int updatesByStageLevel[2][WALK_CACHE_LEVELS]; std::vector averageUpdatesByStageLevel; Stats::Vector2d totalUpdatesByStageLevel; From a7a83e2581181e975d36fd179f1b5e4c0fba8d3d Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Fri, 12 Mar 2021 12:50:49 +0000 Subject: [PATCH 18/25] configs: Use integer division in MESI_Three_Level_HTM.py num_cpus_per_cluster and num_l2caches_per_cluster need to be integer as we are iterating over those variables Signed-off-by: Giacomo Travaglini Change-Id: Iaad1ce9b4058421ff83ba9d2419eb5c36c772c35 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42883 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- configs/ruby/MESI_Three_Level_HTM.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index b51108dbb0..f39e457af3 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -78,10 +78,10 @@ def create_system(options, full_system, system, dma_ports, bootmem, dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) - num_cpus_per_cluster = options.num_cpus / options.num_clusters + num_cpus_per_cluster = options.num_cpus // options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) - num_l2caches_per_cluster = options.num_l2caches / options.num_clusters + num_l2caches_per_cluster = options.num_l2caches // options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) From dcc2f4caaf912e5a4125ac5bfab16ad938a32c9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiago=20M=C3=BCck?= Date: Mon, 15 Mar 2021 17:39:34 -0500 Subject: [PATCH 19/25] scons,mem-ruby: export need_partial_func_reads in SConstruct need_partial_func_reads should now be modified from protocol specific files (e.g. src/learning_gem5/part3/SConsopts) Change-Id: I38039aab6178a019d063d6124200050f2ed7b446 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43043 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- SConstruct | 3 +++ src/mem/ruby/system/SConscript | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/SConstruct b/SConstruct index beaf9aedb5..fb3421ca6f 100755 --- a/SConstruct +++ b/SConstruct @@ -764,6 +764,9 @@ protocol_dirs = [] Export('protocol_dirs') slicc_includes = [] Export('slicc_includes') +# list of protocols that require the partial functional read interface +need_partial_func_reads = [] +Export('need_partial_func_reads') # Walk the tree and execute all SConsopts scripts that wil add to the # above variables diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript index a2708ae9db..e87cd249c9 100644 --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -45,9 +45,6 @@ if env['PROTOCOL'] == 'None': env.Append(CPPDEFINES=['PROTOCOL_' + env['PROTOCOL']]) -# list of protocols that require the partial functional read interface -need_partial_func_reads = [] - if env['PROTOCOL'] in need_partial_func_reads: env.Append(CPPDEFINES=['PARTIAL_FUNC_READS']) From b13b4850951b4507cabee27a8c2a748c93a20daf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tiago=20M=C3=BCck?= Date: Mon, 22 Jun 2020 14:09:50 -0500 Subject: [PATCH 20/25] configs,mem-ruby: CHI-based Ruby protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch add a new Ruby cache coherence protocol based on Arm' AMBA5 CHI specification. The CHI protocol defines and implements two state machine types: - Cache_Controller: generic cache controller that can be configured as: - Top-level L1 I/D cache - A intermediate level (L2, L3, ...) private or shared cache - A CHI home node (i.e. the point of coherence of the system and has the global directory) - A DMA requester - Memory_Controller: implements a CHI slave node and interfaces with gem5 memory controller. This controller has the functionality of a Directory_Controller on the other Ruby protocols, except it doesn't have a directory. The Cache_Controller has multiple cache allocation/deallocation parameters to control the clusivity with respect to upstream caches. Allocation can be completely disabled to use Cache_Controller as a DMA requester or as a home node without a shared LLC. The standard configuration file configs/ruby/CHI.py provides a 'create_system' compatible with configs/example/fs.py and configs/example/se.py and creates a system with private L1/L2 caches per core and a shared LLC at the home nodes. Different cache topologies can be defined by modifying 'create_system' or by creating custom scripts using the structures defined in configs/ruby/CHI.py. This patch also includes the 'CustomMesh' topology script to be used with CHI. CustomMesh generates a 2D mesh topology with the placement of components manually defined in a separate configuration file using the --noc-config parameter. The example in configs/example/noc_config/2x4.yaml creates a simple 2x4 mesh. For example, to run a SE mode simulation, with 4 cores, 4 mem ctnrls, and 4 home nodes (L3 caches): build/ARM/gem5.opt configs/example/se.py \ --cmd 'tests/test-progs/hello/bin/arm/linux/hello' \ --ruby --num-cpus=4 --num-dirs=4 --num-l3caches=4 \ --topology=CustomMesh --noc-config=configs/example/noc_config/2x4.yaml If one doesn't care about the component placement on the interconnect, the 'Crossbar' and 'Pt2Pt' may be used and they do not require the --noc-config option. Additional authors: Joshua Randall Pedro Benedicte Tuan Ta JIRA: https://gem5.atlassian.net/browse/GEM5-908 Change-Id: I856524b0afd30842194190f5bd69e7e6ded906b0 Signed-off-by: Tiago Mück Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42563 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- configs/example/noc_config/2x4.yaml | 70 + configs/ruby/CHI.py | 840 +++++ configs/topologies/CustomMesh.py | 444 +++ src/mem/ruby/SConscript | 2 + src/mem/ruby/common/ExpectedMap.hh | 232 ++ src/mem/ruby/common/TriggerQueue.hh | 125 + src/mem/ruby/protocol/RubySlicc_Exports.sm | 4 +- .../ruby/protocol/chi/CHI-cache-actions.sm | 3057 +++++++++++++++++ src/mem/ruby/protocol/chi/CHI-cache-funcs.sm | 1255 +++++++ src/mem/ruby/protocol/chi/CHI-cache-ports.sm | 398 +++ .../protocol/chi/CHI-cache-transitions.sm | 1218 +++++++ src/mem/ruby/protocol/chi/CHI-cache.sm | 775 +++++ src/mem/ruby/protocol/chi/CHI-mem.sm | 792 +++++ src/mem/ruby/protocol/chi/CHI-msg.sm | 234 ++ src/mem/ruby/protocol/chi/CHI.slicc | 6 + src/mem/ruby/protocol/chi/SConsopts | 47 + 16 files changed, 9498 insertions(+), 1 deletion(-) create mode 100644 configs/example/noc_config/2x4.yaml create mode 100644 configs/ruby/CHI.py create mode 100644 configs/topologies/CustomMesh.py create mode 100644 src/mem/ruby/common/ExpectedMap.hh create mode 100644 src/mem/ruby/common/TriggerQueue.hh create mode 100644 src/mem/ruby/protocol/chi/CHI-cache-actions.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-cache-funcs.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-cache-ports.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-cache-transitions.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-cache.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-mem.sm create mode 100644 src/mem/ruby/protocol/chi/CHI-msg.sm create mode 100644 src/mem/ruby/protocol/chi/CHI.slicc create mode 100644 src/mem/ruby/protocol/chi/SConsopts diff --git a/configs/example/noc_config/2x4.yaml b/configs/example/noc_config/2x4.yaml new file mode 100644 index 0000000000..84ec476290 --- /dev/null +++ b/configs/example/noc_config/2x4.yaml @@ -0,0 +1,70 @@ +# Copyright (c) 2021 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# 2x4 mesh definition +# +# 0 --- 1 --- 2 --- 3 +# | | | | +# 4 --- 5 --- 6 --- 7 +# +mesh: + num_rows : 2 + num_cols : 4 + router_latency : 1 + link_latency : 1 + +# Bindings for each CHI node type. + +CHI_RNF: + # Uncomment to map num_nodes_per_router RNFs in each provided router, + # assuming num. created CHI_RNFs == len(router_list)*num_nodes_per_router + # num_nodes_per_router: 1 + router_list: [1, 2, 5, 6] + +CHI_HNF: + # num_nodes_per_router: 1 + router_list: [1, 2, 5, 6] + +CHI_SNF_MainMem: + # num_nodes_per_router: 1 + router_list: [0, 4] + +# Applies to CHI_SNF_BootMem and possibly other non-main memories +CHI_SNF_IO: + router_list: [3] + +# Applies to CHI_RNI_DMA and CHI_RNI_IO +CHI_RNI_IO: + router_list: [7] diff --git a/configs/ruby/CHI.py b/configs/ruby/CHI.py new file mode 100644 index 0000000000..0a49371d84 --- /dev/null +++ b/configs/ruby/CHI.py @@ -0,0 +1,840 @@ +# Copyright (c) 2021 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import math +import yaml +import m5 +from m5.objects import * +from m5.defines import buildEnv +from .Ruby import create_topology, setup_memory_controllers + +def define_options(parser): + parser.add_option("--noc-config", action="store", type="string", + default=None, + help="YAML NoC config. parameters and bindings. " + "required for CustomMesh topology") + +class Versions: + ''' + Helper class to obtain unique ids for a given controller class. + These are passed as the 'version' parameter when creating the controller. + ''' + _seqs = 0 + @classmethod + def getSeqId(cls): + val = cls._seqs + cls._seqs += 1 + return val + + _version = {} + @classmethod + def getVersion(cls, tp): + if tp not in cls._version: + cls._version[tp] = 0 + val = cls._version[tp] + cls._version[tp] = val + 1 + return val + + +class CHI_Node(SubSystem): + ''' + Base class with common functions for setting up Cache or Memory + controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes. + Notice getNetworkSideControllers and getAllControllers must be implemented + in the derived classes. + ''' + + def __init__(self, ruby_system): + super(CHI_Node, self).__init__() + self._ruby_system = ruby_system + self._network = ruby_system.network + + def getNetworkSideControllers(self): + ''' + Returns all ruby controllers that need to be connected to the + network + ''' + raise NotImplementedError() + + def getAllControllers(self): + ''' + Returns all ruby controllers associated with this node + ''' + raise NotImplementedError() + + def setDownstream(self, cntrls): + ''' + Sets cntrls as the downstream list of all controllers in this node + ''' + for c in self.getNetworkSideControllers(): + c.downstream_destinations = cntrls + + def connectController(self, cntrl): + ''' + Creates and configures the messages buffers for the CHI input/output + ports that connect to the network + ''' + cntrl.reqOut = MessageBuffer() + cntrl.rspOut = MessageBuffer() + cntrl.snpOut = MessageBuffer() + cntrl.datOut = MessageBuffer() + cntrl.reqIn = MessageBuffer() + cntrl.rspIn = MessageBuffer() + cntrl.snpIn = MessageBuffer() + cntrl.datIn = MessageBuffer() + + # All CHI ports are always connected to the network. + # Controllers that are not part of the getNetworkSideControllers list + # still communicate using internal routers, thus we need to wire-up the + # ports + cntrl.reqOut.out_port = self._network.in_port + cntrl.rspOut.out_port = self._network.in_port + cntrl.snpOut.out_port = self._network.in_port + cntrl.datOut.out_port = self._network.in_port + cntrl.reqIn.in_port = self._network.out_port + cntrl.rspIn.in_port = self._network.out_port + cntrl.snpIn.in_port = self._network.out_port + cntrl.datIn.in_port = self._network.out_port + +class TriggerMessageBuffer(MessageBuffer): + ''' + MessageBuffer for triggering internal controller events. + These buffers should not be affected by the Ruby tester randomization + and allow poping messages enqueued in the same cycle. + ''' + randomization = 'disabled' + allow_zero_latency = True + +class OrderedTriggerMessageBuffer(TriggerMessageBuffer): + ordered = True + +class CHI_Cache_Controller(Cache_Controller): + ''' + Default parameters for a Cache controller + The Cache_Controller can also be used as a DMA requester or as + a pure directory if all cache allocation policies are disabled. + ''' + + def __init__(self, ruby_system): + super(CHI_Cache_Controller, self).__init__( + version = Versions.getVersion(Cache_Controller), + ruby_system = ruby_system, + mandatoryQueue = MessageBuffer(), + prefetchQueue = MessageBuffer(), + triggerQueue = TriggerMessageBuffer(), + retryTriggerQueue = OrderedTriggerMessageBuffer(), + replTriggerQueue = OrderedTriggerMessageBuffer(), + reqRdy = TriggerMessageBuffer(), + snpRdy = TriggerMessageBuffer()) + # Set somewhat large number since we really a lot on internal + # triggers. To limit the controller performance, tweak other + # params such as: input port buffer size, cache banks, and output + # port latency + self.transitions_per_cycle = 128 + # This should be set to true in the data cache controller to enable + # timeouts on unique lines when a store conditional fails + self.sc_lock_enabled = False + +class CHI_L1Controller(CHI_Cache_Controller): + ''' + Default parameters for a L1 Cache controller + ''' + + def __init__(self, ruby_system, sequencer, cache, prefetcher): + super(CHI_L1Controller, self).__init__(ruby_system) + self.sequencer = sequencer + self.cache = cache + self.use_prefetcher = False + self.send_evictions = True + self.is_HN = False + self.enable_DMT = False + self.enable_DCT = False + # Strict inclusive MOESI + self.allow_SD = True + self.alloc_on_seq_acc = True + self.alloc_on_seq_line_write = False + self.alloc_on_readshared = True + self.alloc_on_readunique = True + self.alloc_on_readonce = True + self.alloc_on_writeback = True + self.dealloc_on_unique = False + self.dealloc_on_shared = False + self.dealloc_backinv_unique = True + self.dealloc_backinv_shared = True + # Some reasonable default TBE params + self.number_of_TBEs = 16 + self.number_of_repl_TBEs = 16 + self.number_of_snoop_TBEs = 4 + self.unify_repl_TBEs = False + +class CHI_L2Controller(CHI_Cache_Controller): + ''' + Default parameters for a L2 Cache controller + ''' + + def __init__(self, ruby_system, cache, prefetcher): + super(CHI_L2Controller, self).__init__(ruby_system) + self.sequencer = NULL + self.cache = cache + self.use_prefetcher = False + self.allow_SD = True + self.is_HN = False + self.enable_DMT = False + self.enable_DCT = False + self.send_evictions = False + # Strict inclusive MOESI + self.alloc_on_seq_acc = False + self.alloc_on_seq_line_write = False + self.alloc_on_readshared = True + self.alloc_on_readunique = True + self.alloc_on_readonce = True + self.alloc_on_writeback = True + self.dealloc_on_unique = False + self.dealloc_on_shared = False + self.dealloc_backinv_unique = True + self.dealloc_backinv_shared = True + # Some reasonable default TBE params + self.number_of_TBEs = 32 + self.number_of_repl_TBEs = 32 + self.number_of_snoop_TBEs = 16 + self.unify_repl_TBEs = False + +class CHI_HNFController(CHI_Cache_Controller): + ''' + Default parameters for a coherent home node (HNF) cache controller + ''' + + def __init__(self, ruby_system, cache, prefetcher, addr_ranges): + super(CHI_HNFController, self).__init__(ruby_system) + self.sequencer = NULL + self.cache = cache + self.use_prefetcher = False + self.addr_ranges = addr_ranges + self.allow_SD = True + self.is_HN = True + self.enable_DMT = True + self.enable_DCT = True + self.send_evictions = False + # MOESI / Mostly inclusive for shared / Exclusive for unique + self.alloc_on_seq_acc = False + self.alloc_on_seq_line_write = False + self.alloc_on_readshared = True + self.alloc_on_readunique = False + self.alloc_on_readonce = True + self.alloc_on_writeback = True + self.dealloc_on_unique = True + self.dealloc_on_shared = False + self.dealloc_backinv_unique = False + self.dealloc_backinv_shared = False + # Some reasonable default TBE params + self.number_of_TBEs = 32 + self.number_of_repl_TBEs = 32 + self.number_of_snoop_TBEs = 1 # should not receive any snoop + self.unify_repl_TBEs = False + +class CHI_DMAController(CHI_Cache_Controller): + ''' + Default parameters for a DMA controller + ''' + + def __init__(self, ruby_system, sequencer): + super(CHI_DMAController, self).__init__(ruby_system) + self.sequencer = sequencer + class DummyCache(RubyCache): + dataAccessLatency = 0 + tagAccessLatency = 1 + size = "128" + assoc = 1 + self.use_prefetcher = False + self.cache = DummyCache() + self.sequencer.dcache = NULL + # All allocations are false + # Deallocations are true (don't really matter) + self.allow_SD = False + self.is_HN = False + self.enable_DMT = False + self.enable_DCT = False + self.alloc_on_seq_acc = False + self.alloc_on_seq_line_write = False + self.alloc_on_readshared = False + self.alloc_on_readunique = False + self.alloc_on_readonce = False + self.alloc_on_writeback = False + self.dealloc_on_unique = False + self.dealloc_on_shared = False + self.dealloc_backinv_unique = False + self.dealloc_backinv_shared = False + self.send_evictions = False + self.number_of_TBEs = 16 + self.number_of_repl_TBEs = 1 + self.number_of_snoop_TBEs = 1 # should not receive any snoop + self.unify_repl_TBEs = False + +class CPUSequencerWrapper: + ''' + Other generic configuration scripts assume a matching number of sequencers + and cpus. This wraps the instruction and data sequencer so they are + compatible with the other scripts. This assumes all scripts are using + connectCpuPorts/connectIOPorts to bind ports + ''' + + def __init__(self, iseq, dseq): + # use this style due to __setattr__ override below + self.__dict__['inst_seq'] = iseq + self.__dict__['data_seq'] = dseq + self.__dict__['support_data_reqs'] = True + self.__dict__['support_inst_reqs'] = True + # Compatibility with certain scripts that wire up ports + # without connectCpuPorts + self.__dict__['slave'] = dseq.in_ports + self.__dict__['in_ports'] = dseq.in_ports + + def connectCpuPorts(self, cpu): + assert(isinstance(cpu, BaseCPU)) + cpu.icache_port = self.inst_seq.in_ports + for p in cpu._cached_ports: + if str(p) != 'icache_port': + exec('cpu.%s = self.data_seq.in_ports' % p) + cpu.connectUncachedPorts(self.data_seq) + + def connectIOPorts(self, piobus): + self.data_seq.connectIOPorts(piobus) + + def __setattr__(self, name, value): + setattr(self.inst_seq, name, value) + setattr(self.data_seq, name, value) + +class CHI_RNF(CHI_Node): + ''' + Defines a CHI request node. + Notice all contollers and sequencers are set as children of the cpus, so + this object acts more like a proxy for seting things up and has no topology + significance unless the cpus are set as its children at the top level + ''' + def __init__(self, cpus, ruby_system, + l1Icache_type, l1Dcache_type, + cache_line_size, + l1Iprefetcher_type=None, l1Dprefetcher_type=None): + super(CHI_RNF, self).__init__(ruby_system) + + self._block_size_bits = int(math.log(cache_line_size, 2)) + + # All sequencers and controllers + self._seqs = [] + self._cntrls = [] + + # Last level controllers in this node, i.e., the ones that will send + # requests to the home nodes + self._ll_cntrls = [] + + self._cpus = cpus + + # First creates L1 caches and sequencers + for cpu in self._cpus: + cpu.inst_sequencer = RubySequencer(version = Versions.getSeqId(), + ruby_system = ruby_system) + cpu.data_sequencer = RubySequencer(version = Versions.getSeqId(), + ruby_system = ruby_system) + + self._seqs.append(CPUSequencerWrapper(cpu.inst_sequencer, + cpu.data_sequencer)) + + # caches + l1i_cache = l1Icache_type(start_index_bit = self._block_size_bits, + is_icache = True) + + l1d_cache = l1Dcache_type(start_index_bit = self._block_size_bits, + is_icache = False) + + # Placeholders for future prefetcher support + if l1Iprefetcher_type != None or l1Dprefetcher_type != None: + m5.fatal('Prefetching not supported yet') + l1i_pf = NULL + l1d_pf = NULL + + # cache controllers + cpu.l1i = CHI_L1Controller(ruby_system, cpu.inst_sequencer, + l1i_cache, l1i_pf) + + cpu.l1d = CHI_L1Controller(ruby_system, cpu.data_sequencer, + l1d_cache, l1d_pf) + + cpu.inst_sequencer.dcache = NULL + cpu.data_sequencer.dcache = cpu.l1d.cache + + cpu.l1d.sc_lock_enabled = True + + cpu._ll_cntrls = [cpu.l1i, cpu.l1d] + for c in cpu._ll_cntrls: + self._cntrls.append(c) + self.connectController(c) + self._ll_cntrls.append(c) + + def getSequencers(self): + return self._seqs + + def getAllControllers(self): + return self._cntrls + + def getNetworkSideControllers(self): + return self._cntrls + + def setDownstream(self, cntrls): + for c in self._ll_cntrls: + c.downstream_destinations = cntrls + + def getCpus(self): + return self._cpus + + # Adds a private L2 for each cpu + def addPrivL2Cache(self, cache_type, pf_type=None): + self._ll_cntrls = [] + for cpu in self._cpus: + l2_cache = cache_type(start_index_bit = self._block_size_bits, + is_icache = False) + if pf_type != None: + m5.fatal('Prefetching not supported yet') + l2_pf = NULL + + cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf) + + self._cntrls.append(cpu.l2) + self.connectController(cpu.l2) + + self._ll_cntrls.append(cpu.l2) + + for c in cpu._ll_cntrls: + c.downstream_destinations = [cpu.l2] + cpu._ll_cntrls = [cpu.l2] + + +class CHI_HNF(CHI_Node): + ''' + Encapsulates an HNF cache/directory controller. + Before the first controller is created, the class method + CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object + to set-up the interleaved address ranges used by the HNFs + ''' + + _addr_ranges = [] + @classmethod + def createAddrRanges(cls, sys_mem_ranges, cache_line_size, num_hnfs): + # Create the HNFs interleaved addr ranges + block_size_bits = int(math.log(cache_line_size, 2)) + cls._addr_ranges = [] + llc_bits = int(math.log(num_hnfs, 2)) + numa_bit = block_size_bits + llc_bits - 1 + for i in range(num_hnfs): + ranges = [] + for r in sys_mem_ranges: + addr_range = AddrRange(r.start, size = r.size(), + intlvHighBit = numa_bit, + intlvBits = llc_bits, + intlvMatch = i) + ranges.append(addr_range) + cls._addr_ranges.append((ranges, numa_bit, i)) + + @classmethod + def getAddrRanges(cls, hnf_idx): + assert(len(cls._addr_ranges) != 0) + return cls._addr_ranges[hnf_idx] + + # The CHI controller can be a child of this object or another if + # 'parent' if specified + def __init__(self, hnf_idx, ruby_system, llcache_type, parent): + super(CHI_HNF, self).__init__(ruby_system) + + addr_ranges,intlvHighBit,intlvMatch = CHI_HNF.getAddrRanges(hnf_idx) + # All ranges should have the same interleaving + assert(len(addr_ranges) >= 1) + assert(intlvMatch == hnf_idx) + + ll_cache = llcache_type(start_index_bit = intlvHighBit + 1) + self._cntrl = CHI_HNFController(ruby_system, ll_cache, NULL, + addr_ranges) + + if parent == None: + self.cntrl = self._cntrl + else: + parent.cntrl = self._cntrl + + self.connectController(self._cntrl) + + def getAllControllers(self): + return [self._cntrl] + + def getNetworkSideControllers(self): + return [self._cntrl] + + +class CHI_SNF_Base(CHI_Node): + ''' + Creates CHI node controllers for the memory controllers + ''' + + # The CHI controller can be a child of this object or another if + # 'parent' if specified + def __init__(self, ruby_system, parent): + super(CHI_SNF_Base, self).__init__(ruby_system) + + self._cntrl = Memory_Controller( + version = Versions.getVersion(Memory_Controller), + ruby_system = ruby_system, + triggerQueue = TriggerMessageBuffer(), + responseFromMemory = MessageBuffer(), + requestToMemory = MessageBuffer(ordered = True), + reqRdy = TriggerMessageBuffer()) + + self.connectController(self._cntrl) + + if parent: + parent.cntrl = self._cntrl + else: + self.cntrl = self._cntrl + + def getAllControllers(self): + return [self._cntrl] + + def getNetworkSideControllers(self): + return [self._cntrl] + + def getMemRange(self, mem_ctrl): + # TODO need some kind of transparent API for + # MemCtrl+DRAM vs SimpleMemory + if hasattr(mem_ctrl, 'range'): + return mem_ctrl.range + else: + return mem_ctrl.dram.range + +class CHI_SNF_BootMem(CHI_SNF_Base): + ''' + Create the SNF for the boot memory + ''' + def __init__(self, ruby_system, parent, bootmem): + super(CHI_SNF_BootMem, self).__init__(ruby_system, parent) + self._cntrl.memory_out_port = bootmem.port + self._cntrl.addr_ranges = self.getMemRange(bootmem) + +class CHI_SNF_MainMem(CHI_SNF_Base): + ''' + Create the SNF for a list main memory controllers + ''' + def __init__(self, ruby_system, parent, mem_ctrl = None): + super(CHI_SNF_MainMem, self).__init__(ruby_system, parent) + if mem_ctrl: + self._cntrl.memory_out_port = mem_ctrl.port + self._cntrl.addr_ranges = self.getMemRange(mem_ctrl) + # else bind ports and range later + +class CHI_RNI_Base(CHI_Node): + ''' + Request node without cache / DMA + ''' + + # The CHI controller can be a child of this object or another if + # 'parent' if specified + def __init__(self, ruby_system, parent): + super(CHI_RNI_Base, self).__init__(ruby_system) + + self._sequencer = RubySequencer(version = Versions.getSeqId(), + ruby_system = ruby_system, + clk_domain = ruby_system.clk_domain) + self._cntrl = CHI_DMAController(ruby_system, self._sequencer) + + if parent: + parent.cntrl = self._cntrl + else: + self.cntrl = self._cntrl + + self.connectController(self._cntrl) + + def getAllControllers(self): + return [self._cntrl] + + def getNetworkSideControllers(self): + return [self._cntrl] + +class CHI_RNI_DMA(CHI_RNI_Base): + ''' + DMA controller wiredup to a given dma port + ''' + def __init__(self, ruby_system, dma_port, parent): + super(CHI_RNI_DMA, self).__init__(ruby_system, parent) + assert(dma_port != None) + self._sequencer.in_ports = dma_port + +class CHI_RNI_IO(CHI_RNI_Base): + ''' + DMA controller wiredup to ruby_system IO port + ''' + def __init__(self, ruby_system, parent): + super(CHI_RNI_IO, self).__init__(ruby_system, parent) + ruby_system._io_port = self._sequencer + +def noc_params_from_config(config, noc_params): + # mesh options + noc_params.num_rows = config['mesh']['num_rows'] + noc_params.num_cols = config['mesh']['num_cols'] + if 'router_latency' in config['mesh']: + noc_params.router_latency = config['mesh']['router_latency'] + if 'link_latency' in config['mesh']: + noc_params.router_link_latency = config['mesh']['link_latency'] + noc_params.node_link_latency = config['mesh']['link_latency'] + if 'router_link_latency' in config['mesh']: + noc_params.router_link_latency = config['mesh']['router_link_latency'] + if 'node_link_latency' in config['mesh']: + noc_params.node_link_latency = config['mesh']['node_link_latency'] + if 'cross_links' in config['mesh']: + noc_params.cross_link_latency = \ + config['mesh']['cross_link_latency'] + noc_params.cross_links = [] + for x, y in config['mesh']['cross_links']: + noc_params.cross_links.append((x, y)) + noc_params.cross_links.append((y, x)) + else: + noc_params.cross_links = [] + noc_params.cross_link_latency = 0 + + # CHI_RNF options + noc_params.CHI_RNF = config['CHI_RNF'] + + # CHI_RNI_IO + noc_params.CHI_RNI_IO = config['CHI_RNI_IO'] + + # CHI_HNF options + noc_params.CHI_HNF = config['CHI_HNF'] + if 'pairing' in config['CHI_HNF']: + noc_params.pairing = config['CHI_HNF']['pairing'] + + # CHI_SNF_MainMem + noc_params.CHI_SNF_MainMem = config['CHI_SNF_MainMem'] + + # CHI_SNF_IO (applies to CHI_SNF_Bootmem) + noc_params.CHI_SNF_IO = config['CHI_SNF_IO'] + + +def create_system(options, full_system, system, dma_ports, bootmem, + ruby_system): + + if buildEnv['PROTOCOL'] != 'CHI': + m5.panic("This script requires the CHI build") + + if options.num_dirs < 1: + m5.fatal('--num-dirs must be at least 1') + + if options.num_l3caches < 1: + m5.fatal('--num-l3caches must be at least 1') + + # Default parameters for the network + class NoC_Params(object): + def __init__(self): + self.topology = options.topology + self.network = options.network + self.router_link_latency = 1 + self.node_link_latency = 1 + self.router_latency = 1 + self.router_buffer_size = 4 + self.cntrl_msg_size = 8 + self.data_width = 32 + params = NoC_Params() + + # read additional configurations from yaml file if provided + if options.noc_config: + with open(options.noc_config, 'r') as file: + noc_params_from_config(yaml.load(file), params) + elif params.topology == 'CustomMesh': + m5.fatal('--noc-config must be provided if topology is CustomMesh') + + # Declare caches and controller types used by the protocol + # Notice tag and data accesses are not concurrent, so the a cache hit + # latency = tag + data + response latencies. + # Default response latencies are 1 cy for all controllers. + # For L1 controllers the mandatoryQueue enqueue latency is always 1 cy and + # this is deducted from the initial tag read latency for sequencer requests + # dataAccessLatency may be set to 0 if one wants to consider parallel + # data and tag lookups + class L1ICache(RubyCache): + dataAccessLatency = 1 + tagAccessLatency = 1 + size = options.l1i_size + assoc = options.l1i_assoc + + class L1DCache(RubyCache): + dataAccessLatency = 2 + tagAccessLatency = 1 + size = options.l1d_size + assoc = options.l1d_assoc + + class L2Cache(RubyCache): + dataAccessLatency = 6 + tagAccessLatency = 2 + size = options.l2_size + assoc = options.l2_assoc + + class HNFCache(RubyCache): + dataAccessLatency = 10 + tagAccessLatency = 2 + size = options.l3_size + assoc = options.l3_assoc + + # other functions use system.cache_line_size assuming it has been set + assert(system.cache_line_size.value == options.cacheline_size) + + cpu_sequencers = [] + mem_cntrls = [] + mem_dests = [] + network_nodes = [] + network_cntrls = [] + hnf_dests = [] + all_cntrls = [] + + # Creates on RNF per cpu with priv l2 caches + assert(len(system.cpu) == options.num_cpus) + ruby_system.rnf = [ CHI_RNF([cpu], ruby_system, L1ICache, L1DCache, + system.cache_line_size.value) + for cpu in system.cpu ] + for rnf in ruby_system.rnf: + rnf.addPrivL2Cache(L2Cache) + cpu_sequencers.extend(rnf.getSequencers()) + all_cntrls.extend(rnf.getAllControllers()) + network_nodes.append(rnf) + network_cntrls.extend(rnf.getNetworkSideControllers()) + + # Look for other memories + other_memories = [] + if bootmem: + other_memories.append(bootmem) + if getattr(system, 'sram', None): + other_memories.append(getattr(system, 'sram', None)) + on_chip_mem_ports = getattr(system, '_on_chip_mem_ports', None) + if on_chip_mem_ports: + other_memories.extend([p.simobj for p in on_chip_mem_ports]) + + # Create the LLCs cntrls + sysranges = [] + system.mem_ranges + + for m in other_memories: + sysranges.append(m.range) + + CHI_HNF.createAddrRanges(sysranges, system.cache_line_size.value, + options.num_l3caches) + ruby_system.hnf = [ CHI_HNF(i, ruby_system, HNFCache, None) + for i in range(options.num_l3caches) ] + + for hnf in ruby_system.hnf: + network_nodes.append(hnf) + network_cntrls.extend(hnf.getNetworkSideControllers()) + assert(hnf.getAllControllers() == hnf.getNetworkSideControllers()) + all_cntrls.extend(hnf.getAllControllers()) + hnf_dests.extend(hnf.getAllControllers()) + + # Create the memory controllers + # Notice we don't define a Directory_Controller type so we don't use + # create_directories shared by other protocols. + + ruby_system.snf = [ CHI_SNF_MainMem(ruby_system, None, None) + for i in range(options.num_dirs) ] + for snf in ruby_system.snf: + network_nodes.append(snf) + network_cntrls.extend(snf.getNetworkSideControllers()) + assert(snf.getAllControllers() == snf.getNetworkSideControllers()) + mem_cntrls.extend(snf.getAllControllers()) + all_cntrls.extend(snf.getAllControllers()) + mem_dests.extend(snf.getAllControllers()) + + if len(other_memories) > 0: + ruby_system.rom_snf = [ CHI_SNF_BootMem(ruby_system, None, m) + for m in other_memories ] + for snf in ruby_system.rom_snf: + network_nodes.append(snf) + network_cntrls.extend(snf.getNetworkSideControllers()) + all_cntrls.extend(snf.getAllControllers()) + mem_dests.extend(snf.getAllControllers()) + + + # Creates the controller for dma ports and io + + if len(dma_ports) > 0: + ruby_system.dma_rni = [ CHI_RNI_DMA(ruby_system, dma_port, None) + for dma_port in dma_ports ] + for rni in ruby_system.dma_rni: + network_nodes.append(rni) + network_cntrls.extend(rni.getNetworkSideControllers()) + all_cntrls.extend(rni.getAllControllers()) + + if full_system: + ruby_system.io_rni = CHI_RNI_IO(ruby_system, None) + network_nodes.append(ruby_system.io_rni) + network_cntrls.extend(ruby_system.io_rni.getNetworkSideControllers()) + all_cntrls.extend(ruby_system.io_rni.getAllControllers()) + + + # Assign downstream destinations + for rnf in ruby_system.rnf: + rnf.setDownstream(hnf_dests) + if len(dma_ports) > 0: + for rni in ruby_system.dma_rni: + rni.setDownstream(hnf_dests) + if full_system: + ruby_system.io_rni.setDownstream(hnf_dests) + for hnf in ruby_system.hnf: + hnf.setDownstream(mem_dests) + + # Setup data message size for all controllers + for cntrl in all_cntrls: + cntrl.data_channel_size = params.data_width + + # Network configurations + # virtual networks: 0=request, 1=snoop, 2=response, 3=data + ruby_system.network.number_of_virtual_networks = 4 + + ruby_system.network.control_msg_size = params.cntrl_msg_size + ruby_system.network.data_msg_size = params.data_width + ruby_system.network.buffer_size = params.router_buffer_size + + if params.topology == 'CustomMesh': + topology = create_topology(network_nodes, params) + elif params.topology in ['Crossbar', 'Pt2Pt']: + topology = create_topology(network_cntrls, params) + else: + m5.fatal("%s not supported!" % params.topology) + + # Incorporate the params into options so it's propagated to + # makeTopology by the parent script + for k in dir(params): + if not k.startswith('__'): + setattr(options, k, getattr(params, k)) + + return (cpu_sequencers, mem_cntrls, topology) diff --git a/configs/topologies/CustomMesh.py b/configs/topologies/CustomMesh.py new file mode 100644 index 0000000000..73793e48ed --- /dev/null +++ b/configs/topologies/CustomMesh.py @@ -0,0 +1,444 @@ +# Copyright (c) 2021 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +import math + +from m5.util import fatal +from m5.params import * +from m5.objects import * + +from m5.defines import buildEnv +if buildEnv['PROTOCOL'] == 'CHI': + import ruby.CHI as CHI + +from topologies.BaseTopology import SimpleTopology + +class CustomMesh(SimpleTopology): + description = 'CustomMesh' + + def __init__(self, controllers): + self.nodes = controllers + + #-------------------------------------------------------------------------- + # _makeMesh + #-------------------------------------------------------------------------- + + def _makeMesh(self, IntLink, link_latency, num_rows, num_columns, + cross_links, cross_link_latency): + + # East->West, West->East, North->South, South->North + # XY routing weights + link_weights = [1, 1, 2, 2] + + # East output to West input links + for row in range(num_rows): + for col in range(num_columns): + if (col + 1 < num_columns): + east_out = col + (row * num_columns) + west_in = (col + 1) + (row * num_columns) + llat = cross_link_latency \ + if (east_out, west_in) in cross_links \ + else link_latency + self._int_links.append(\ + IntLink(link_id=self._link_count, + src_node=self._routers[east_out], + dst_node=self._routers[west_in], + dst_inport="West", + latency = llat, + weight=link_weights[0])) + self._link_count += 1 + + # West output to East input links + for row in range(num_rows): + for col in range(num_columns): + if (col + 1 < num_columns): + east_in = col + (row * num_columns) + west_out = (col + 1) + (row * num_columns) + llat = cross_link_latency \ + if (west_out, east_in) in cross_links \ + else link_latency + self._int_links.append(\ + IntLink(link_id=self._link_count, + src_node=self._routers[west_out], + dst_node=self._routers[east_in], + dst_inport="East", + latency = llat, + weight=link_weights[1])) + self._link_count += 1 + + # North output to South input links + for col in range(num_columns): + for row in range(num_rows): + if (row + 1 < num_rows): + north_out = col + (row * num_columns) + south_in = col + ((row + 1) * num_columns) + llat = cross_link_latency \ + if (north_out, south_in) in cross_links \ + else link_latency + self._int_links.append(\ + IntLink(link_id=self._link_count, + src_node=self._routers[north_out], + dst_node=self._routers[south_in], + dst_inport="South", + latency = llat, + weight=link_weights[2])) + self._link_count += 1 + + # South output to North input links + for col in range(num_columns): + for row in range(num_rows): + if (row + 1 < num_rows): + north_in = col + (row * num_columns) + south_out = col + ((row + 1) * num_columns) + llat = cross_link_latency \ + if (south_out, north_in) in cross_links \ + else link_latency + self._int_links.append(\ + IntLink(link_id=self._link_count, + src_node=self._routers[south_out], + dst_node=self._routers[north_in], + dst_inport="North", + latency = llat, + weight=link_weights[3])) + self._link_count += 1 + + #-------------------------------------------------------------------------- + # distributeNodes + #-------------------------------------------------------------------------- + + def _createRNFRouter(self, mesh_router): + # Create a zero-latency router bridging node controllers + # and the mesh router + node_router = self._Router(router_id = len(self._routers), + latency = 0) + self._routers.append(node_router) + + # connect node_router <-> mesh router + self._int_links.append(self._IntLink( \ + link_id = self._link_count, + src_node = node_router, + dst_node = mesh_router, + latency = self._router_link_latency)) + self._link_count += 1 + + self._int_links.append(self._IntLink( \ + link_id = self._link_count, + src_node = mesh_router, + dst_node = node_router, + latency = self._router_link_latency)) + self._link_count += 1 + + return node_router + + def distributeNodes(self, num_nodes_per_router, router_idx_list, + node_list): + + if num_nodes_per_router: + # evenly distribute nodes to all listed routers + assert(len(router_idx_list)*num_nodes_per_router == len(node_list)) + + for idx, node in enumerate(node_list): + mesh_router_idx = router_idx_list[idx // num_nodes_per_router] + router = self._routers[mesh_router_idx] + + # Create another router bridging RNF node controllers + # and the mesh router + # for non-RNF nodes, node router is mesh router + if isinstance(node, CHI.CHI_RNF): + router = self._createRNFRouter(router) + + # connect all ctrls in the node to node_router + ctrls = node.getNetworkSideControllers() + for c in ctrls: + self._ext_links.append(self._ExtLink( + link_id = self._link_count, + ext_node = c, + int_node = router, + latency = self._node_link_latency)) + self._link_count += 1 + else: + # try to circulate all nodes to all routers, some routers may be + # connected to zero or more than one node. + idx = 0 + for node in node_list: + ridx = router_idx_list[idx] + router = self._routers[ridx] + + if isinstance(node, CHI.CHI_RNF): + router = self._createRNFRouter(router) + ctrls = node.getNetworkSideControllers() + for c in ctrls: + self._ext_links.append(self._ExtLink( \ + link_id = self._link_count, + ext_node = c, + int_node = router, + latency = self._node_link_latency)) + self._link_count += 1 + idx = (idx + 1) % len(router_idx_list) + + #-------------------------------------------------------------------------- + # makeTopology + #-------------------------------------------------------------------------- + + def makeTopology(self, options, network, IntLink, ExtLink, Router): + assert(buildEnv['PROTOCOL'] == 'CHI') + + num_rows = options.num_rows + num_cols = options.num_cols + num_mesh_routers = num_rows * num_cols + + self._IntLink = IntLink + self._ExtLink = ExtLink + self._Router = Router + + if hasattr(options, 'router_link_latency'): + self._router_link_latency = options.router_link_latency + self._node_link_latency = options.node_link_latency + else: + print("WARNING: router/node link latencies not provided") + self._router_link_latency = options.link_latency + self._node_link_latency = options.link_latency + + # classify nodes into different types + rnf_list = [] + hnf_list = [] + mem_ctrls = [] + io_mem_ctrls = [] + io_rni_ctrls = [] + + for n in self.nodes: + if isinstance(n, CHI.CHI_RNF): + rnf_list.append(n) + elif isinstance(n, CHI.CHI_HNF): + hnf_list.append(n) + elif isinstance(n, CHI.CHI_SNF_MainMem): + mem_ctrls.append(n) + elif isinstance(n, CHI.CHI_SNF_BootMem): + io_mem_ctrls.append(n) + elif isinstance(n, CHI.CHI_RNI_DMA): + io_rni_ctrls.append(n) + elif isinstance(n, CHI.CHI_RNI_IO): + io_rni_ctrls.append(n) + else: + fatal('topologies.CustomMesh: {} not supported' + .format(n.__class__.__name__)) + + # Create all mesh routers + self._routers = [Router(router_id=i, latency = options.router_latency)\ + for i in range(num_mesh_routers)] + + self._link_count = 0 + self._int_links = [] + self._ext_links = [] + + # Create all the mesh internal links. + self._makeMesh(IntLink, self._router_link_latency, num_rows, num_cols, + options.cross_links, options.cross_link_latency) + + # Place CHI_RNF on the mesh + num_nodes_per_router = options.CHI_RNF['num_nodes_per_router'] \ + if 'num_nodes_per_router' in options.CHI_RNF else None + self.distributeNodes(num_nodes_per_router, + options.CHI_RNF['router_list'], + rnf_list) + + # Place CHI_HNF on the mesh + num_nodes_per_router = options.CHI_HNF['num_nodes_per_router'] \ + if 'num_nodes_per_router' in options.CHI_HNF else None + self.distributeNodes(num_nodes_per_router, + options.CHI_HNF['router_list'], + hnf_list) + + # Place CHI_SNF_MainMem on the mesh + num_nodes_per_router = options.CHI_SNF_MainMem['num_nodes_per_router']\ + if 'num_nodes_per_router' in options.CHI_SNF_MainMem else None + self.distributeNodes(num_nodes_per_router, + options.CHI_SNF_MainMem['router_list'], + mem_ctrls) + + # Place all IO mem nodes on the mesh + num_nodes_per_router = options.CHI_SNF_IO['num_nodes_per_router'] \ + if 'num_nodes_per_router' in options.CHI_SNF_IO else None + self.distributeNodes(num_nodes_per_router, + options.CHI_SNF_IO['router_list'], + io_mem_ctrls) + + # Place all IO request nodes on the mesh + num_nodes_per_router = options.CHI_RNI_IO['num_nodes_per_router'] \ + if 'num_nodes_per_router' in options.CHI_RNI_IO else None + self.distributeNodes(num_nodes_per_router, + options.CHI_RNI_IO['router_list'], + io_rni_ctrls) + + # Set up + network.int_links = self._int_links + network.ext_links = self._ext_links + network.routers = self._routers + + pairing = getattr(options, 'pairing', None) + if pairing != None: + self._autoPairHNFandSNF(hnf_list, mem_ctrls, pairing) + + #-------------------------------------------------------------------------- + # _autoPair + #-------------------------------------------------------------------------- + def _autoPairHNFandSNF(self, cache_ctrls, mem_ctrls, pairing): + # Use the pairing defined by the configuration to reassign the + # memory ranges + pair_debug = False + + print("Pairing HNFs to SNFs") + print(pairing) + + all_cache = [] + for c in cache_ctrls: all_cache.extend(c.getNetworkSideControllers()) + all_mem = [] + for c in mem_ctrls: all_mem.extend(c.getNetworkSideControllers()) + + # checks and maps index from pairing map to component + assert(len(pairing) == len(all_cache)) + + def _tolist(val): return val if isinstance(val, list) else [val] + + for m in all_mem: m._pairing = [] + + pairing_check = max(1, len(all_mem) / len(all_cache)) + for cidx,c in enumerate(all_cache): + c._pairing = [] + for midx in _tolist(pairing[cidx]): + c._pairing.append(all_mem[midx]) + if c not in all_mem[midx]._pairing: + all_mem[midx]._pairing.append(c) + assert(len(c._pairing) == pairing_check) + if pair_debug: + print(c.path()) + for r in c.addr_ranges: + print("%s" % r) + for p in c._pairing: + print("\t"+p.path()) + for r in p.addr_ranges: + print("\t%s" % r) + + # all must be paired + for c in all_cache: assert(len(c._pairing) > 0) + for m in all_mem: assert(len(m._pairing) > 0) + + # only support a single range for the main memory controllers + tgt_range_start = all_mem[0].addr_ranges[0].start.value + for mem in all_mem: + for r in mem.addr_ranges: + if r.start.value != tgt_range_start: + fatal('topologies.CustomMesh: not supporting pairing of '\ + 'main memory with multiple ranges') + + # reassign ranges for a 1 -> N paring + def _rerange(src_cntrls, tgt_cntrls, fix_tgt_peer): + assert(len(tgt_cntrls) >= len(src_cntrls)) + + def _rangeToBit(addr_ranges): + bit = None + for r in addr_ranges: + if bit == None: + bit = r.intlvMatch + else: + assert(bit == r.intlvMatch) + return bit + + def _getPeer(cntrl): + return cntrl.memory_out_port.peer.simobj + + sorted_src = list(src_cntrls) + sorted_src.sort(key = lambda x: _rangeToBit(x.addr_ranges)) + + # paired controllers need to have seq. interleaving match values + intlvMatch = 0 + for src in sorted_src: + for tgt in src._pairing: + for r in tgt.addr_ranges: + r.intlvMatch = intlvMatch + if fix_tgt_peer: + _getPeer(tgt).range.intlvMatch = intlvMatch + intlvMatch = intlvMatch + 1 + + # recreate masks + for src in sorted_src: + for src_range in src.addr_ranges: + if src_range.start.value != tgt_range_start: + continue + new_src_mask = [] + for m in src_range.masks: + # TODO should mask all the way to the max range size + new_src_mask.append(m | (m*2) | (m*4) | + (m*8) | (m*16)) + for tgt in src._pairing: + paired = False + for tgt_range in tgt.addr_ranges: + if tgt_range.start.value == \ + src_range.start.value: + src_range.masks = new_src_mask + new_tgt_mask = [] + lsbs = len(tgt_range.masks) - \ + len(new_src_mask) + for i in range(lsbs): + new_tgt_mask.append(tgt_range.masks[i]) + for m in new_src_mask: + new_tgt_mask.append(m) + tgt_range.masks = new_tgt_mask + if fix_tgt_peer: + _getPeer(tgt).range.masks = new_tgt_mask + paired = True + if not paired: + fatal('topologies.CustomMesh: could not ' \ + 'reassign ranges {} {}'.format( + src.path(), tgt.path())) + if len(all_mem) >= len(all_cache): + _rerange(all_cache, all_mem, True) + else: + _rerange(all_mem, all_cache, False) + + if pair_debug: + print("") + for cidx,c in enumerate(all_cache): + assert(len(c._pairing) == pairing_check) + print(c.path()) + for r in c.addr_ranges: + print("%s" % r) + for p in c._pairing: + print("\t"+p.path()) + for r in p.addr_ranges: + print("\t%s" % r) + + diff --git a/src/mem/ruby/SConscript b/src/mem/ruby/SConscript index bde71c0176..c3f8365594 100644 --- a/src/mem/ruby/SConscript +++ b/src/mem/ruby/SConscript @@ -114,9 +114,11 @@ MakeInclude('slicc_interface/RubyRequest.hh') MakeInclude('common/Address.hh') MakeInclude('common/BoolVec.hh') MakeInclude('common/DataBlock.hh') +MakeInclude('common/ExpectedMap.hh') MakeInclude('common/IntVec.hh') MakeInclude('common/MachineID.hh') MakeInclude('common/NetDest.hh') +MakeInclude('common/TriggerQueue.hh') MakeInclude('common/Set.hh') MakeInclude('common/WriteMask.hh') MakeInclude('network/MessageBuffer.hh') diff --git a/src/mem/ruby/common/ExpectedMap.hh b/src/mem/ruby/common/ExpectedMap.hh new file mode 100644 index 0000000000..a1889b7c8a --- /dev/null +++ b/src/mem/ruby/common/ExpectedMap.hh @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MEM_RUBY_COMMON_EXPECTEDMAP_HH__ +#define __MEM_RUBY_COMMON_EXPECTEDMAP_HH__ + +#include +#include +#include + +// ExpectedMap helper class is used to facilitate tracking of pending +// response and data messages in the CHI protocol. It offers additional +// functionality when compared to plain counters: +// - tracks the expected type for received messages +// - tracks segmented data messages (i.e. when a line transfer is split in +// multiple messages) + +template +class ExpectedMap +{ + private: + + template + struct ExpectedState + { + struct EnumClassHash + { + std::size_t operator()(Type t) const + { + return static_cast(t); + } + }; + + private: + // chunks is the number segmented messages we expect to receive + // before incrementing numReceived. This is tipically always 1 for all + // non-data messages + int chunks; + int currChunk; + int numReceived; + std::unordered_map expectedTypes; + + public: + ExpectedState() + :chunks(1), currChunk(0), numReceived(0) + {} + + void + clear(int msg_chunks) + { + chunks = msg_chunks; + currChunk = 0; + numReceived = 0; + expectedTypes.clear(); + } + + void + addExpectedType(const Type &val) + { + expectedTypes[val] = false; + } + + int received() const { return numReceived; } + + bool + increaseReceived(const Type &val) + { + if (expectedTypes.find(val) == expectedTypes.end()) + return false; + + expectedTypes[val] = true; + ++currChunk; + if (currChunk == chunks) { + ++numReceived; + currChunk = 0; + } + + return true; + } + + bool + receivedType(const Type &val) const + { + auto i = expectedTypes.find(val); + if (i != expectedTypes.end()) + return i->second; + else + return false; + } + }; + + ExpectedState expectedData; + ExpectedState expectedResp; + int totalExpected; + + public: + ExpectedMap() + :expectedData(), expectedResp(), totalExpected(0) + {} + + // Clear the tracking state and specified the number of chunks are required + // to receive a complete data message + void + clear(int dataChunks) + { + expectedData.clear(dataChunks); + expectedResp.clear(1); + totalExpected = 0; + } + + // Register an expected response message type + void + addExpectedRespType(const RespType &val) + { + expectedResp.addExpectedType(val); + } + + // Register an expected data message type + void + addExpectedDataType(const DataType &val) + { + expectedData.addExpectedType(val); + } + + // Set the number of expected messages + void setExpectedCount(int val) { totalExpected = val; } + + void addExpectedCount(int val) { totalExpected += val; } + + // Returns the number of messages received. + // Notice that a data message counts as received only after all of + // its chunks are received. + int + received() const + { + return expectedData.received() + expectedResp.received(); + } + + // Returns the remaining number of expected messages + int expected() const { return totalExpected - received(); } + + // Has any expected message ? + bool hasExpected() const { return expected() != 0; } + + // Has received any data ? + bool hasReceivedData() const { return expectedData.received() != 0; } + + // Has received any response ? + bool hasReceivedResp() const { return expectedResp.received() != 0; } + + + // Notifies that a response message was received + bool + receiveResp(const RespType &val) + { + assert(received() < totalExpected); + return expectedResp.increaseReceived(val); + } + + // Notifies that a data message chunk was received + bool + receiveData(const DataType &val) + { + assert(received() <= totalExpected); + return expectedData.increaseReceived(val); + } + + // Has received any data of the given type ? + bool + receivedDataType(const DataType &val) const + { + return expectedData.receivedType(val); + } + + // Has received any response of the given type ? + bool + receivedRespType(const RespType &val) const + { + return expectedResp.receivedType(val); + } + + void + print(std::ostream& out) const + { + out << expected(); + } +}; + +template +inline std::ostream& +operator<<(std::ostream& out, const ExpectedMap& obj) +{ + obj.print(out); + return out; +} + + +#endif // __MEM_RUBY_COMMON_EXPECTEDMAP_HH__ diff --git a/src/mem/ruby/common/TriggerQueue.hh b/src/mem/ruby/common/TriggerQueue.hh new file mode 100644 index 0000000000..2775d7a06e --- /dev/null +++ b/src/mem/ruby/common/TriggerQueue.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __MEM_RUBY_COMMON_QUEUE_HH__ +#define __MEM_RUBY_COMMON_QUEUE_HH__ + +#include +#include + +// TriggerQueue helper class is used keep a list of events that trigger the +// actions that need to be executed before an ouststanding transaction +// completes in the CHI protocol. When a transaction no longer has pending +// respose or data messages, this queue is checked and the event at the head +// of the queue is triggered. If the queue is empty, the transactions is +// finalized. Events can be marked as NB (non-blocking). NB are triggered by +// the protocol even if the transactions has pending data/responses. + +template +class TriggerQueue +{ + private: + struct ValType { + T val; + bool non_blocking; + }; + std::deque queue; + + public: + // Returns the head of the queue + const T& front() const { return queue.front().val; } + + // Returns the head of the queue + // NOTE: SLICC won't allow to reuse front() or different + // values of the template parameter, thus we use an additional + // def. to workaround that + const T& next() const { return queue.front().val; } + + // Returns the end of the queue + const T& back() const { return queue.back().val; } + + // Is the head event non-blocking ? + bool frontNB() const { return queue.front().non_blocking; } + + // Is the last event non-blocking ? + bool backNB() const { return queue.back().non_blocking; } + + // Is the queue empty ? + bool empty() const { return queue.empty(); } + + // put an event at the end of the queue + void push(const T &elem) { queue.push_back({elem,false}); } + + // emplace an event at the end of the queue + template + void + emplace(Ts&&... args) + { + queue.push_back({T(std::forward(args)...),false}); + } + + // put an event at the head of the queue + void pushFront(const T &elem) { queue.push_front({elem,false}); } + + // put a non-blocking event at the end of the queue + void pushNB(const T &elem) { queue.push_back({elem,true}); } + + // put a non-blocking event at the head of the queue + void pushFrontNB(const T &elem) { queue.push_front({elem,true}); } + + // pop the head of the queue + void pop() { queue.pop_front(); } + + void print(std::ostream& out) const; +}; + +template +inline std::ostream& +operator<<(std::ostream& out, const TriggerQueue& obj) +{ + obj.print(out); + out << std::flush; + return out; +} + +template +inline void +TriggerQueue::print(std::ostream& out) const +{ +} + +#endif // __MEM_RUBY_COMMON_QUEUE_HH__ diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index c2f2c9df48..7706f572da 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -262,7 +262,9 @@ enumeration(MachineType, desc="...", default="MachineType_NULL") { TCCdir, desc="Directory at the GPU L2 Cache (TCC)"; SQC, desc="GPU L1 Instr Cache (Sequencer Cache)"; RegionDir, desc="Region-granular directory"; - RegionBuffer,desc="Region buffer for CPU and GPU"; + RegionBuffer, desc="Region buffer for CPU and GPU"; + Cache, desc="Generic coherent cache controller"; + Memory, desc="Memory controller interface"; NULL, desc="null mach type"; } diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm new file mode 100644 index 0000000000..ea5eaff4ec --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -0,0 +1,3057 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +//////////////////////////////////////////////////////////////////////////// +// CHI-cache actions definitions +//////////////////////////////////////////////////////////////////////////// + +action(AllocateTBE_Request, desc="") { + if (storTBEs.areNSlotsAvailable(1)) { + // reserve a slot for this request + storTBEs.incrementReserved(); + + // Move request to rdy queue + peek(reqInPort, CHIRequestMsg) { + enqueue(reqRdyOutPort, CHIRequestMsg, allocation_latency) { + assert(in_msg.addr == address); + assert(in_msg.is_local_pf == false); + out_msg := in_msg; + } + } + + } else { + // we don't have resources to track this request; enqueue a retry + peek(reqInPort, CHIRequestMsg) { + assert(in_msg.allowRetry); + enqueue(retryTriggerOutPort, RetryTriggerMsg, 0) { + out_msg.addr := in_msg.addr; + out_msg.event := Event:SendRetryAck; + out_msg.retryDest := in_msg.requestor; + retryQueue.emplace(in_msg.addr,in_msg.requestor); + } + } + } + + reqInPort.dequeue(clockEdge()); +} + +action(AllocateTBE_Request_WithCredit, desc="") { + // TBE slot already reserved + // Move request to rdy queue + peek(reqInPort, CHIRequestMsg) { + assert(in_msg.allowRetry == false); + enqueue(reqRdyOutPort, CHIRequestMsg, allocation_latency) { + assert(in_msg.addr == address); + out_msg := in_msg; + } + } + reqInPort.dequeue(clockEdge()); +} + +action(AllocateTBE_Snoop, desc="") { + // No retry for snoop requests; just create resource stall + check_allocate(storSnpTBEs); + + storSnpTBEs.incrementReserved(); + + // Move request to rdy queue + peek(snpInPort, CHIRequestMsg) { + enqueue(snpRdyOutPort, CHIRequestMsg, allocation_latency) { + assert(in_msg.addr == address); + out_msg := in_msg; + } + + // also remove snoop source from waiting retry destinations to prevent + // deadlocks in which this snoop is blocked by a transaction that needs to + // send a request to the snoop destination before going to BUSY_INTR, + // but the destination needs the snoop to complete before sending retry + // credit + destsWaitingRetry.remove(in_msg.requestor); + } + snpInPort.dequeue(clockEdge()); +} + +action(AllocateTBE_SeqRequest, desc="") { + // No retry for sequencer requests; just create resource stall + check_allocate(storTBEs); + + // reserve a slot for this request + storTBEs.incrementReserved(); + + // Move request to rdy queue + peek(seqInPort, RubyRequest) { + enqueue(reqRdyOutPort, CHIRequestMsg, allocation_latency) { + out_msg.addr := in_msg.LineAddress; + assert((in_msg.Size > 0) && (in_msg.Size <= blockSize)); + out_msg.accAddr := in_msg.PhysicalAddress; + out_msg.accSize := in_msg.Size; + out_msg.requestor := machineID; + out_msg.fwdRequestor := machineID; + out_msg.seqReq := in_msg.getRequestPtr(); + out_msg.isSeqReqValid := true; + assert(in_msg.Prefetch == PrefetchBit:No); + out_msg.is_local_pf := false; + out_msg.is_remote_pf := false; + + if ((in_msg.Type == RubyRequestType:LD) || + (in_msg.Type == RubyRequestType:IFETCH)) { + out_msg.type := CHIRequestType:Load; + } else if (in_msg.Type == RubyRequestType:ST) { + if (in_msg.Size == blockSize) { + out_msg.type := CHIRequestType:StoreLine; + } else { + out_msg.type := CHIRequestType:Store; + } + } else { + error("Invalid RubyRequestType"); + } + } + } + seqInPort.dequeue(clockEdge()); +} + +action(AllocateTBE_PfRequest, desc="Allocate TBE for prefetch request") { + // No retry for prefetch requests; just create resource stall + check_allocate(storTBEs); + + // reserve a slot for this request + storTBEs.incrementReserved(); + + // Move request to rdy queue + peek(pfInPort, RubyRequest) { + enqueue(reqRdyOutPort, CHIRequestMsg, 0) { + out_msg.addr := in_msg.LineAddress; + assert((in_msg.Size > 0) && (in_msg.Size <= blockSize)); + out_msg.accAddr := in_msg.PhysicalAddress; + out_msg.accSize := in_msg.Size; + out_msg.requestor := machineID; + out_msg.fwdRequestor := machineID; + out_msg.seqReq := in_msg.getRequestPtr(); + out_msg.isSeqReqValid := true; + assert(in_msg.Prefetch != PrefetchBit:No); + out_msg.is_local_pf := true; + out_msg.is_remote_pf := false; + + if (in_msg.Type == RubyRequestType:LD) { + out_msg.type := CHIRequestType:Load; + } else if (in_msg.Type == RubyRequestType:ST) { + error("CHI is not supporting prefetch store requests"); + } else { + error("Invalid RubyRequestType"); + } + } + } + pfInPort.dequeue(clockEdge()); +} + +action(Initiate_Request, desc="") { + State initial := getState(tbe, cache_entry, address); + bool was_retried := false; + peek(reqRdyPort, CHIRequestMsg) { + set_tbe(allocateRequestTBE(address, in_msg)); + // only a msg that was already retried doesn't allow a retry + was_retried := in_msg.allowRetry == false; + } + DirEntry dir_entry := getDirEntry(address); + copyCacheAndDir(cache_entry, dir_entry, tbe, initial); + + tbe.use_DMT := is_HN && enable_DMT; + tbe.use_DCT := enable_DCT; + + bool alloc_entry := needCacheEntry(tbe.reqType, + cache_entry, dir_entry, + tbe.is_local_pf); + bool dealloc_entry := needDeallocCacheEntry(tbe.reqType); + assert((alloc_entry && dealloc_entry) == false); + + // always drops any data when not caching it or when this transaction + // requires deallocation + tbe.dataToBeInvalid := dealloc_entry || + (is_invalid(cache_entry) && (alloc_entry == false)); + tbe.doCacheFill := alloc_entry || is_valid(cache_entry); + + // model the initial tag array read + tbe.actions.pushNB(Event:TagArrayRead); + + incomingTransactionStart(address, curTransitionEvent(), initial, was_retried); +} + +action(Initiate_Request_Stale, desc="") { + State initial := getState(tbe, cache_entry, address); + bool was_retried := false; + peek(reqRdyPort, CHIRequestMsg) { + set_tbe(allocateRequestTBE(address, in_msg)); + was_retried := in_msg.allowRetry == false; + } + copyCacheAndDir(cache_entry, getDirEntry(address), tbe, initial); + incomingTransactionStart(address, curTransitionEvent(), initial, was_retried); +} + +action(Initiate_Snoop, desc="") { + State initial := getState(tbe, cache_entry, address); + peek(snpRdyPort, CHIRequestMsg) { + set_tbe(allocateSnoopTBE(address, in_msg)); + } + copyCacheAndDir(cache_entry, getDirEntry(address), tbe, initial); + + // if we end up with valid data drop it if no entry allocated + tbe.dataToBeInvalid := is_invalid(cache_entry); + + // model the initial tag array read + tbe.actions.pushNB(Event:TagArrayRead); + + incomingTransactionStart(address, curTransitionEvent(), initial, false); +} + +action(Initiate_Snoop_Hazard, desc="") { + assert(is_valid(tbe)); + assert(tbe.is_req_tbe || tbe.is_repl_tbe); + + // Switch to the new snoop TBE + TBE prev_tbe := tbe; + peek(snpRdyPort, CHIRequestMsg) { + set_tbe(allocateSnoopTBE(address, in_msg)); + } + assert(tbe.is_snp_tbe); + if (prev_tbe.is_req_tbe) { + assert(prev_tbe.is_repl_tbe == false); + tbe.is_req_hazard := true; + } else { + assert(prev_tbe.is_repl_tbe); + tbe.is_repl_hazard := true; + } + + // Use state from prev TBE + tbe.pendReqType := prev_tbe.pendReqType; + copyCacheAndDirTBEs(prev_tbe, tbe); + tbe.wakeup_pending_req := prev_tbe.wakeup_pending_req; + tbe.wakeup_pending_snp := prev_tbe.wakeup_pending_snp; + tbe.wakeup_pending_tgr := prev_tbe.wakeup_pending_tgr; +} + +action(RestoreFromHazard, desc="") { + TBE hazard_tbe := getHazardTBE(tbe); + + // update + setDataToBeStates(tbe); + + copyCacheAndDirTBEs(tbe, hazard_tbe); + hazard_tbe.wakeup_pending_req := tbe.wakeup_pending_req; + hazard_tbe.wakeup_pending_snp := tbe.wakeup_pending_snp; + hazard_tbe.wakeup_pending_tgr := tbe.wakeup_pending_tgr; + + deallocateSnpTBE(tbe); + set_tbe(hazard_tbe); + + // if the pending request is a WB or Evict then it becomes a stale request + // if data is no longer in the expected state + if (tbe.pendReqType == CHIRequestType:WriteBackFull) { + tbe.is_stale := (tbe.dataValid && tbe.dataDirty) == false; + } else if (tbe.pendReqType == CHIRequestType:WriteCleanFull) { + tbe.is_stale := (tbe.dataValid && tbe.dataDirty) == false; + } else if (hazard_tbe.pendReqType == CHIRequestType:WriteEvictFull) { + tbe.is_stale := (tbe.dataValid && tbe.dataUnique) == false; + } else if (hazard_tbe.pendReqType == CHIRequestType:Evict) { + tbe.is_stale := tbe.dataValid == false; + } + + // a pending action from the original request may have been stalled during + // the hazard and needs to wakeup up now + wakeupPendingTgrs(tbe); +} + +action(Initiate_Replacement, desc="") { + assert(is_invalid(tbe)); + State initial := getState(tbe, cache_entry, address); + if (unify_repl_TBEs) { + peek(replTriggerInPort, ReplacementMsg) { + set_tbe(allocateReplacementTBEOnSlot(address, in_msg.slot)); + DPRINTF(RubySlicc, "Allocated replacement TBE on slot %d\n", tbe.storSlot); + } + } else { + set_tbe(allocateReplacementTBE(address)); + DPRINTF(RubySlicc, "Allocated replacement TBE on new slot %d\n", tbe.storSlot); + } + copyCacheAndDir(cache_entry, getDirEntry(address), tbe, initial); + + // model the initial tag array read + tbe.actions.pushNB(Event:TagArrayRead); + + incomingTransactionStart(address, curTransitionEvent(), initial, false); +} + + + +action(StallRequest, desc="") { + // was stalled because of an existing request + assert(is_valid(tbe)); + assert(tbe.addr == address); + // tracks pending + tbe.wakeup_pending_req := true; + stall_and_wait(reqRdyPort, address); +} + +action(StallSnoop, desc="") { + // was stalled because of an existing request + assert(is_valid(tbe)); + assert(tbe.addr == address); + // tracks pending + tbe.wakeup_pending_snp := true; + stall_and_wait(snpRdyPort, address); +} + +action(StallLocalEviction, desc="") { + // was stalled because of an existing request + assert(is_valid(tbe)); + assert(tbe.addr == address); + + // Just pop the queue and When this transaction finishes wake-up the original + // msgs that caused this eviction + tbe.wakeup_pending_tgr := true; + replTriggerInPort.dequeue(clockEdge()); +} + +action(StallSnoop_NoTBE, desc="") { + stall_and_wait(snpRdyPort, address); +} + +action(StallActionOnHazard, desc="") { + assert(is_valid(tbe)); + assert(tbe.is_req_hazard || tbe.is_repl_hazard); + tbe.wakeup_pending_tgr := true; + stall_and_wait(triggerInPort, address); +} + +action(Initiate_ReadShared_Miss, desc="") { + tbe.actions.push(Event:ReadMissPipe); + if (is_HN && tbe.use_DMT) { + tbe.requestorToBeExclusiveOwner := true; + tbe.dataMaybeDirtyUpstream := true; // SNF always replies with CompData_UC + if (enable_DMT_early_dealloc) { + tbe.actions.push(Event:SendRespSepData); + } + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendReadNoSnpDMT); + } else if (is_HN) { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } else { + tbe.actions.push(Event:SendReadShared); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_ReadShared_Hit, desc="") { + tbe.actions.push(Event:ReadHitPipe); + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_ReadShared_HitUpstream, desc="") { + tbe.actions.push(Event:ReadMissPipe); + if (tbe.use_DCT) { + tbe.actions.push(Event:SendSnpSharedFwdToOwner); + tbe.actions.pushNB(Event:WaitCompAck); + tbe.updateDirOnCompAck := false; + } else { + tbe.actions.push(Event:SendSnpShared); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + tbe.actions.push(Event:MaintainCoherence); +} + +action(Initiate_ReadShared_HitUpstream_NoOwner, desc="") { + tbe.actions.push(Event:ReadMissPipe); + if (tbe.use_DCT) { + tbe.actions.push(Event:SendSnpSharedFwdToSharer); + tbe.actions.pushNB(Event:WaitCompAck); + tbe.updateDirOnCompAck := false; + } else { + tbe.actions.push(Event:SendSnpOnce); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + tbe.actions.push(Event:MaintainCoherence); +} + + +action(Initiate_ReadOnce_Miss, desc="") { + // drop at the end if not doing a fill + tbe.dataToBeInvalid := tbe.doCacheFill == false; + + tbe.actions.push(Event:ReadMissPipe); + if (is_HN && tbe.use_DMT) { + assert(is_invalid(cache_entry)); + tbe.requestorToBeExclusiveOwner := true; + tbe.dataMaybeDirtyUpstream := true; // SNF always replies with CompData_UC + if (enable_DMT_early_dealloc) { + tbe.actions.push(Event:SendRespSepData); + } + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendReadNoSnpDMT); + } else if (is_HN) { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } else { + // if not allocating an entry send a ReadOnce + if (tbe.dataToBeInvalid) { + tbe.actions.push(Event:SendReadOnce); + } else { + tbe.actions.push(Event:SendReadShared); + } + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + + tbe.updateDirOnCompAck := false; + + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_ReadOnce_Hit, desc="") { + tbe.actions.push(Event:ReadHitPipe); + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + tbe.updateDirOnCompAck := false; +} + +action(Initiate_ReadOnce_HitUpstream, desc="") { + tbe.actions.push(Event:ReadMissPipe); + if (tbe.use_DCT) { + tbe.actions.push(Event:SendSnpOnceFwd); + tbe.actions.pushNB(Event:WaitCompAck); + } else { + tbe.actions.push(Event:SendSnpOnce); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + tbe.updateDirOnCompAck := false; + // no need to update or access tags/data on ReadOnce served from upstream +} + + + +action(Initiate_ReadUnique_Miss, desc="") { + tbe.actions.push(Event:ReadMissPipe); + if (is_HN && tbe.use_DMT) { + tbe.requestorToBeExclusiveOwner := true; + tbe.dataMaybeDirtyUpstream := true; // SNF always replies with CompData_UC + if (enable_DMT_early_dealloc) { + tbe.actions.push(Event:SendRespSepData); + } + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendReadNoSnpDMT); + } else if (is_HN) { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } else { + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_ReadUnique_AutoUpgrade, desc="") { + assert(is_HN); + tbe.dataUnique := true; +} + +action(Initiate_ReadUnique_Upgrade, desc="") { + // must use the transitions with auto upgrade otherwise + assert(is_HN == false); + assert(tbe.use_DCT == false); + assert((tbe.dataValid && tbe.dataUnique) == false); + assert((tbe.dir_ownerExists && tbe.dir_ownerIsExcl) == false); + + tbe.actions.push(Event:ReadMissPipe); + if (tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0) { + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } else { + assert(tbe.dataValid); + } + // then attempt to upgrade our data + tbe.actions.push(Event:SendCleanUnique); + tbe.actions.push(Event:CheckUpgrade_FromRU); + + // send up the upgraded data or fresh data if we failed, see CheckUpgrade_FromRU + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_ReadUnique_Hit, desc="") { + tbe.actions.push(Event:ReadHitPipe); + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_ReadUnique_HitUpstream, desc="") { + tbe.actions.push(Event:ReadMissPipe); + // SnpUniqueFwd can be used only if the line is cached at a single + // requester; so force it off if that's the case + tbe.use_DCT := tbe.use_DCT && (tbe.dir_sharers.count() == 1) && + (tbe.dir_sharers.isElement(tbe.requestor) == false); + if (tbe.use_DCT) { + tbe.actions.push(Event:SendSnpUniqueFwd); + tbe.actions.pushNB(Event:WaitCompAck); + tbe.updateDirOnCompAck := false; + } else if (tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } else { + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + } + // just tag update since data any data would become stale + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_ReadUnique_Hit_InvUpstream, desc="") { + tbe.actions.push(Event:ReadHitPipe); + tbe.actions.push(Event:SendSnpCleanInvalid); + tbe.actions.pushNB(Event:DataArrayRead); + tbe.actions.push(Event:WaitCompAck); + tbe.actions.pushNB(Event:SendCompData); + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_CleanUnique, desc="") { + tbe.actions.push(Event:ReadMissPipe); // TODO need another latency pipe ?? + + // requestor don't have the line anymore; send response but don't update the + // directory on CompAck. The requestor knows we are not tracking it and will + // send a ReadUnique later + if (tbe.dir_sharers.isElement(tbe.requestor) == false) { + tbe.actions.push(Event:SendCompUCResp); + tbe.actions.push(Event:WaitCompAck); + tbe.updateDirOnCompAck := false; + } else { + // invalidates everyone except requestor + if (tbe.dir_sharers.count() > 1) { + tbe.actions.push(Event:SendSnpCleanInvalidNoReq); + } + // auto upgrade if HN + tbe.dataUnique := tbe.dataUnique || is_HN; + // get unique permission + if (tbe.dataUnique == false) { + tbe.actions.push(Event:SendCleanUnique); + tbe.actions.push(Event:CheckUpgrade_FromCU); + } + // next actions will depend on the data state after snoops+CleanUnique + tbe.actions.push(Event:FinishCleanUnique); + } +} + +action(Finish_CleanUnique, desc="") { + // This is should be executed at the end of a transaction + assert(tbe.actions.empty()); + tbe.actions.push(Event:SendCompUCResp); + tbe.actions.push(Event:WaitCompAck); + + // everyone may have been hit by an invalidation so check again + if (tbe.dir_sharers.isElement(tbe.requestor) == false) { + tbe.updateDirOnCompAck := false; + assert(tbe.dataValid == false); + } else { + // must be the only one in sharers map + assert(tbe.dir_sharers.count() == 1); + assert(tbe.dataUnique); + + // similar to Initiate_MaitainCoherence; writeback if the owner has data as + // clean data and we have it dirty and cannot keep it + bool fill_pipeline := tbe.dataValid && tbe.dataDirty; + bool req_has_dirty := tbe.dir_ownerExists && (tbe.dir_owner == tbe.requestor); + if (tbe.dataValid && tbe.dataDirty && tbe.dataToBeInvalid && + (req_has_dirty == false)) { + fill_pipeline := false; + if (is_HN) { + tbe.actions.push(Event:SendWriteNoSnp); + } else { + tbe.actions.push(Event:SendWriteClean); + } + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + } + + // needed by UpdateDirState_FromReqResp triggered by the expected CompAck + tbe.dataMaybeDirtyUpstream := true; + tbe.requestorToBeExclusiveOwner := true; + tbe.dir_ownerExists := false; + + if (fill_pipeline) { + tbe.actions.push(Event:CheckCacheFill); + } + } + tbe.actions.push(Event:TagArrayWrite); +} + + +action(Initiate_LoadHit, desc="") { + // Local prefetch requests do not read data array + if (tbe.is_local_pf == false) { + tbe.actions.push(Event:DataArrayRead); + } + tbe.actions.push(Event:LoadHit); +} + +action(Initiate_LoadMiss, desc="") { + if (tbe.doCacheFill) { + tbe.actions.push(Event:SendReadShared); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); + } else { + tbe.actions.push(Event:SendReadOnce); + tbe.dataToBeInvalid := true; + } +} + + + +action(Initiate_StoreHit, desc="") { + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:StoreHit); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_StoreMiss, desc="") { + if (tbe.doCacheFill) { + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); + } else { + tbe.actions.push(Event:SendWriteUnique); + tbe.actions.push(Event:SendWUDataCB); + tbe.dataToBeInvalid := true; + } +} + +action(Initiate_StoreUpgrade, desc="") { + assert(tbe.dataValid); + assert(is_valid(cache_entry)); + tbe.actions.push(Event:SendCleanUnique); + tbe.actions.push(Event:CheckUpgrade_FromStore); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_WriteUnique_LocalWrite, desc="") { + // auto-upgrade if hn but state was not unique + assert(is_HN || tbe.dataUnique); + tbe.dataUnique := true; + if (tbe.dir_sharers.count() > 0) { + tbe.actions.push(Event:SendSnpCleanInvalid); + } + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_WU); + } else { + tbe.actions.push(Event:SendCompDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_WriteUnique_LocalWrite_AfterUpgrade, desc="") { + assert(is_HN == false); + assert((tbe.dataValid && tbe.dataUnique) == false); + tbe.actions.push(Event:SendReadUnique); + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_WU); + } else { + tbe.actions.push(Event:SendCompDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_WriteUnique_Writeback, desc="") { + assert(is_HN); + assert(tbe.dir_sharers.count() > 0); + tbe.actions.push(Event:SendSnpUnique); + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteNoSnp); + tbe.actions.pushNB(Event:SendComp_WU); + } else { + tbe.actions.push(Event:SendCompDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteNoSnp); + } + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_WriteUnique_PartialWrite, desc="") { + assert(is_HN); + if (tbe.dir_sharers.count() > 0) { + tbe.actions.push(Event:SendSnpCleanInvalid); + } + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteNoSnpPartial); + tbe.actions.pushNB(Event:SendComp_WU); + } else { + tbe.actions.push(Event:SendCompDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteNoSnpPartial); + } + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_WriteUnique_Forward, desc="") { + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteUnique); + tbe.actions.pushNB(Event:SendComp_WU); + } else { + tbe.actions.push(Event:SendCompDBIDResp_WU); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendWriteUnique); + } + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + + + +action(Initiate_CopyBack, desc="") { + // expect to receive this data after Send_CompDBIDResp + if (tbe.reqType == CHIRequestType:WriteBackFull) { + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_UD_PD); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_SD_PD); + } else if (tbe.reqType == CHIRequestType:WriteEvictFull) { + assert(tbe.reqType == CHIRequestType:WriteEvictFull); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_SC); + } else { + assert(tbe.reqType == CHIRequestType:WriteCleanFull); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_UD_PD); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_SD_PD); + } + tbe.expected_req_resp.setExpectedCount(1); + + tbe.actions.pushNB(Event:SendCompDBIDResp); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:MaintainCoherence); + // MaintainCoherence queues the Tag/Data updates +} + +action(Initiate_CopyBack_Stale, desc="") { + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_SC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CBWrData_I); + tbe.expected_req_resp.setExpectedCount(1); + + tbe.actions.pushNB(Event:SendCompDBIDRespStale); + tbe.actions.pushNB(Event:WriteFEPipe); + + // if it was the last known sharer and we don't have the data do the same + // the Initiate_Evict + if ((is_HN == false) && (tbe.dir_sharers.count() == 1) && + tbe.dir_sharers.isElement(tbe.requestor) && (tbe.dataValid == false)) { + tbe.actions.push(Event:SendEvict); + } + + tbe.dir_sharers.remove(tbe.requestor); + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != tbe.requestor)); + + // usually we consider data locally invalid on RU states even if we + // have a copy; consider it valid for this transition only so we can + // comeback to UD_RU/UC_RU + if (is_valid(cache_entry) && (tbe.dataValid == false) && + tbe.dir_ownerExists && tbe.dir_ownerIsExcl) { + tbe.dataValid := true; + } +} + +action(Initiate_Evict, desc="") { + tbe.actions.push(Event:SendCompIResp); + + assert(tbe.dir_sharers.isElement(tbe.requestor)); + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != tbe.requestor)); + tbe.dir_sharers.remove(tbe.requestor); + + if ((is_HN == false) && (tbe.dir_sharers.count() == 0) && + (tbe.dataValid == false)) { + tbe.actions.push(Event:SendEvict); + } + + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_MaitainCoherence, desc="") { + // issue a copy back if necessary to maintain coherence for data we are + // droping. This is should be executed at the end of a transaction + assert(tbe.actions.empty()); + // go through either the fill or the writeback pipeline + if (tbe.dataValid && tbe.dataToBeInvalid) { + if (is_HN) { + if (tbe.dataDirty && (tbe.dataMaybeDirtyUpstream == false)) { + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + } + } else { + if (tbe.dir_sharers.isEmpty() && (tbe.dataDirty || tbe.dataUnique)) { + tbe.actions.push(Event:SendWriteBackOrWriteEvict); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + } else if ((tbe.dir_sharers.isEmpty() == false) && tbe.dataDirty && + (tbe.dataMaybeDirtyUpstream == false)) { + tbe.actions.push(Event:SendWriteClean); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + } + } + } + else if (tbe.dataValid) { + tbe.actions.push(Event:CheckCacheFill); + } + tbe.actions.push(Event:TagArrayWrite); +} + + + +// Too many common stuff between SnpUnique/SnpUniqueFwd/SnpCleanInvalid +// so do one action for all of them here +action(Initiate_InvalidationSnoop, desc="") { + tbe.actions.push(Event:SnpInvPipe); + // Propagate a snoop upwards depending on the type + if (tbe.dir_sharers.count() > 0) { + if ((tbe.reqType == CHIRequestType:SnpUniqueFwd) || + (tbe.reqType == CHIRequestType:SnpUnique)) { + if ((tbe.snpNeedsData && (tbe.dataMaybeDirtyUpstream == false)) || + (tbe.dataValid == false)) { + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } else { + tbe.actions.push(Event:SendSnpUnique); + } + } else { + assert(tbe.reqType == CHIRequestType:SnpCleanInvalid); + tbe.actions.push(Event:SendSnpCleanInvalid); + } + } + + if (tbe.reqType == CHIRequestType:SnpUniqueFwd) { + tbe.actions.push(Event:SendSnpUniqueFwdCompData); + } else { + tbe.actions.push(Event:SendInvSnpResp); + } + + if(tbe.is_req_hazard || tbe.is_repl_hazard) { + tbe.actions.push(Event:RestoreFromHazard); + } else { + tbe.actions.pushNB(Event:TagArrayWrite); + } + + tbe.dataToBeInvalid := true; +} + +action(Initiate_SnpShared, desc="") { + // Handles both SnpShared,SnpSharedFwd,SnpNotSharedDirtyFwd + tbe.actions.push(Event:SnpSharedPipe); + if (tbe.dir_ownerExists) { + assert(tbe.dataMaybeDirtyUpstream); + tbe.actions.push(Event:SendSnpShared); + } else if (tbe.dataValid == false) { + // must get a copy of shared data upstream + assert(tbe.dataMaybeDirtyUpstream == false); + assert(tbe.dir_sharers.count() > 0); + tbe.actions.push(Event:SendSnpOnce); + } else { + tbe.actions.push(Event:DataArrayRead); + } + + if (tbe.reqType == CHIRequestType:SnpSharedFwd) { + tbe.actions.push(Event:SendSnpSharedFwdCompData); + } else if (tbe.reqType == CHIRequestType:SnpNotSharedDirtyFwd) { + tbe.actions.push(Event:SendSnpNotSharedDirtyFwdCompData); + } else { + assert(tbe.reqType == CHIRequestType:SnpShared); + tbe.actions.push(Event:SendSnpData); + } + if (tbe.is_req_hazard || tbe.is_repl_hazard) { + tbe.actions.push(Event:RestoreFromHazard); + } else { + tbe.actions.pushNB(Event:TagArrayWrite); + } + tbe.dataToBeSharedClean := true; +} + +action(Initiate_SnpOnce, desc="") { + tbe.actions.push(Event:SnpOncePipe); + if (tbe.dataValid == false) { + assert(tbe.dir_sharers.count() > 0); + tbe.actions.push(Event:SendSnpOnce); + } else { + tbe.actions.push(Event:DataArrayRead); + } + + if (tbe.reqType == CHIRequestType:SnpOnceFwd) { + tbe.actions.push(Event:SendSnpOnceFwdCompData); + } else { + assert(tbe.reqType == CHIRequestType:SnpOnce); + assert(tbe.snpNeedsData); + tbe.actions.push(Event:SendSnpData); + } + + if (tbe.is_req_hazard || tbe.is_repl_hazard) { + tbe.actions.push(Event:RestoreFromHazard); + } else { + tbe.actions.pushNB(Event:TagArrayWrite); + } +} + + + +action(Initiate_Replacement_Evict_BackInvalidte, desc="") { + assert(is_HN == false); + tbe.actions.push(Event:SendSnpCleanInvalid); + tbe.actions.push(Event:SendEvict); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_Replacement_Evict, desc="") { + assert(is_HN == false); + assert(tbe.dir_sharers.isEmpty()); + tbe.actions.push(Event:SendEvict); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_Replacement_JustDrop, desc="") { + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_Replacement_WB_BackInvalidate, desc="") { + assert(tbe.dataDirty || tbe.dataUnique || tbe.dataMaybeDirtyUpstream); + tbe.actions.push(Event:SendSnpCleanInvalid); + tbe.actions.push(Event:WriteFEPipe); + if (is_HN) { + if (tbe.dataDirty || tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendWriteNoSnp); + } + } else { + tbe.actions.push(Event:SendWriteBackOrWriteEvict); + } + tbe.actions.pushNB(Event:DataArrayRead); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + tbe.dataToBeInvalid := true; + + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_Replacement_WB, desc="") { + tbe.actions.push(Event:WriteFEPipe); + if (is_HN) { + assert(tbe.dataDirty); + tbe.actions.push(Event:SendWriteNoSnp); + } else if (tbe.dir_sharers.isEmpty()) { + assert(tbe.dataDirty || tbe.dataUnique); + tbe.actions.push(Event:SendWriteBackOrWriteEvict); + } else { + assert(tbe.dataDirty); + tbe.actions.push(Event:SendWriteClean); + } + tbe.actions.pushNB(Event:DataArrayRead); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWBData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + + + +action(Send_ReadShared, desc="") { + assert(is_HN == false); + assert(tbe.dataValid == false); + + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:DataSepResp_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UD_PD); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_SC); + if (allow_SD) { + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_SD_PD); + } + // NOTE: the first CompData received counts as RespSepData + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); + tbe.expected_req_resp.setExpectedCount(2); + tbe.dataBlkValid.clear(); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + if (allow_SD) { + prepareRequest(tbe, CHIRequestType:ReadShared, out_msg); + } else { + prepareRequest(tbe, CHIRequestType:ReadNotSharedDirty, out_msg); + } + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + out_msg.dataToFwdRequestor := false; + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_ReadNoSnp, desc="") { + assert(is_HN); + assert(tbe.use_DMT == false); + + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC); + // NOTE: the first CompData received counts as RespSepData + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); + tbe.expected_req_resp.setExpectedCount(2); + tbe.dataBlkValid.clear(); + outgoingTransactionStart(address, curTransitionEvent()); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:ReadNoSnp, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + out_msg.dataToFwdRequestor := false; + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_ReadNoSnpDMT, desc="") { + assert(is_HN); + assert(tbe.use_DMT); + + CHIRequestType req := CHIRequestType:ReadNoSnp; + if (enable_DMT_early_dealloc) { + req := CHIRequestType:ReadNoSnpSep; + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:ReadReceipt); + tbe.expected_req_resp.addExpectedCount(1); + } + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, req, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + out_msg.dataToFwdRequestor := true; + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_ReadOnce, desc="") { + assert(is_HN == false); + assert(tbe.dataValid == false); + + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:DataSepResp_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I); + // NOTE: the first CompData received counts as RespSepData + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); + tbe.expected_req_resp.setExpectedCount(2); + tbe.dataBlkValid.clear(); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:ReadOnce, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + out_msg.dataToFwdRequestor := false; + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_ReadUnique, desc="") { + assert((tbe.dataValid && tbe.dataUnique) == false); + + assert(tbe.expected_req_resp.hasExpected() == false); + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:DataSepResp_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UD_PD); + // NOTE: the first CompData received counts as RespSepData + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); + tbe.expected_req_resp.setExpectedCount(2); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:ReadUnique, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + out_msg.dataToFwdRequestor := false; + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_CleanUnique, desc="") { + assert(tbe.dataValid || (tbe.dir_sharers.count() > 0)); + assert(tbe.dataUnique == false); + + assert(tbe.expected_req_resp.hasExpected() == false); + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp_UC); + tbe.expected_req_resp.setExpectedCount(1); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:CleanUnique, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } +} + +action(Send_Evict, desc="") { + assert(is_valid(tbe)); + assert(is_HN == false); + assert(tbe.expected_req_resp.hasExpected() == false); + clearExpectedReqResp(tbe); + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:Evict, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp_I); + tbe.expected_req_resp.setExpectedCount(1); +} + +action(Send_InvSnpResp, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.dataDirty || tbe.snpNeedsData || + (tbe.dataUnique && (tbe.reqType == CHIRequestType:SnpUnique))) { + tbe.actions.pushFront(Event:SendSnpData); + } else { + tbe.actions.pushFront(Event:SendSnpIResp); + } +} + +action(Send_WriteBackOrWriteEvict, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataBlkValid.isFull()); + assert(tbe.dataValid); + assert(is_HN == false); + + assert(tbe.dataUnique || tbe.dataDirty); + assert(tbe.dir_sharers.isEmpty()); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + if (tbe.dataDirty) { + prepareRequest(tbe, CHIRequestType:WriteBackFull, out_msg); + } else { + prepareRequest(tbe, CHIRequestType:WriteEvictFull, out_msg); + } + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + tbe.expected_req_resp.setExpectedCount(1); +} + +action(Send_WriteCleanFull, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataBlkValid.isFull()); + assert(tbe.dataValid); + assert(is_HN == false); + assert(tbe.dataDirty); + assert(tbe.dataMaybeDirtyUpstream == false); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:WriteCleanFull, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + tbe.expected_req_resp.setExpectedCount(1); +} + +action(Send_WriteNoSnp, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:WriteNoSnp, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + // allow to expect this on top of data coming from upstream; + // so addExpectedCount + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + tbe.expected_req_resp.addExpectedCount(1); +} + +action(Send_WriteNoSnp_Partial, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequest(tbe, CHIRequestType:WriteNoSnpPtl, out_msg); + out_msg.accAddr := tbe.accAddr; + out_msg.accSize := tbe.accSize; + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + // allow to expect this on top of data coming from upstream; + // so addExpectedCount + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + tbe.expected_req_resp.addExpectedCount(1); +} + +action(Send_WriteUnique, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + if (tbe.accSize == blockSize) { + prepareRequest(tbe, CHIRequestType:WriteUniqueFull, out_msg); + } else { + prepareRequest(tbe, CHIRequestType:WriteUniquePtl, out_msg); + out_msg.accAddr := tbe.accAddr; + out_msg.accSize := tbe.accSize; + } + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + // allow to expect this on top of data coming from upstream; + // so addExpectedCount + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + // if receive only DBIDResp then will expect Comp later + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp); + tbe.expected_req_resp.addExpectedCount(1); +} + +action(Send_SnpCleanInvalid, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_snp_resp.hasExpected() == false); + // at least one sharer or owner othrwise should not execute this + assert(tbe.dir_sharers.count() > 0); + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpCleanInvalid, out_msg); + out_msg.Destination.addNetDest(tbe.dir_sharers); + out_msg.retToSrc := false; + } + setExpectedForInvSnoop(tbe, false); +} + +action(Send_SnpCleanInvalid_NoReq, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_snp_resp.hasExpected() == false); + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpCleanInvalid, out_msg); + out_msg.Destination.addNetDest(tbe.dir_sharers); + out_msg.Destination.remove(tbe.requestor); + // at least one sharer other than requestor + assert(out_msg.Destination.count() > 0); + out_msg.retToSrc := false; + setExpectedForInvSnoop(tbe, false); + tbe.expected_snp_resp.setExpectedCount(out_msg.Destination.count()); + } +} + +action(Send_SnpUnique, desc="") { + assert(is_valid(tbe)); + // at least one sharer or owner othrwise should not execute this + assert(tbe.dir_sharers.count() > 0); + + setExpectedForInvSnoop(tbe, true); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpUnique, out_msg); + out_msg.Destination.addNetDest(tbe.dir_sharers); + out_msg.retToSrc := false; + } +} + +action(Send_SnpUnique_RetToSrc, desc="") { + assert(is_valid(tbe)); + // at least one sharer or owner othrwise should not execute this + assert(tbe.dir_sharers.count() > 0); + + setExpectedForInvSnoop(tbe, true); + + MachineID dest; + if (tbe.dir_ownerExists) { + dest := tbe.dir_owner; + } else { + // TODO should be random or the closest one + dest := tbe.dir_sharers.smallestElement(); + } + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpUnique, out_msg); + out_msg.Destination.add(dest); + out_msg.retToSrc := true; + } + // if other sharers send with retToSrc=false to others + if (tbe.dir_sharers.count() > 1) { + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpUnique, out_msg); + out_msg.Destination.addNetDest(tbe.dir_sharers); + out_msg.Destination.remove(dest); + out_msg.retToSrc := false; + } + } +} + +action(Send_SnpUniqueFwd, desc="") { + assert(is_valid(tbe)); + // single sharer or owner otherwise should not execute this + assert(tbe.dir_sharers.count() == 1); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_I_Fwded_UC); + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_I_Fwded_UD_PD); + tbe.expected_snp_resp.addExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpUniqueFwd, out_msg); + out_msg.Destination.addNetDest(tbe.dir_sharers); + out_msg.retToSrc := false; + } +} + +action(Send_SnpShared, desc="") { + assert(is_valid(tbe)); + + // only sent to a dirty or exclusive snoopee + assert(tbe.dataMaybeDirtyUpstream); + assert(tbe.dir_ownerExists); + assert(tbe.dir_sharers.count() > 0); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_PD); + tbe.expected_snp_resp.setExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpShared, out_msg); + out_msg.Destination.add(tbe.dir_owner); + out_msg.retToSrc := false; + } +} + +action(Send_SnpSharedFwd_ToOwner, desc="") { + assert(is_valid(tbe)); + + // the dirty snoopee must go to SC and send data + assert(tbe.dataMaybeDirtyUpstream); + assert(tbe.dir_ownerExists); + assert(tbe.dir_sharers.count() > 0); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + + bool allowFwdSD := tbe.reqType != CHIRequestType:ReadNotSharedDirty; + + // get us a copy if we have allocated a cache entry for this block + bool retToSrc := tbe.doCacheFill && (tbe.dataToBeInvalid == false); + + if (allowFwdSD) { + if (retToSrc) { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_Fwded_SC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_Fwded_SD_PD); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I_Fwded_SC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I_Fwded_SD_PD); + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SC_Fwded_SC); + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SC_Fwded_SD_PD); + } + } else { + if (retToSrc) { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_Fwded_SC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I_Fwded_SC); + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SC_Fwded_SC); + } + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_PD_Fwded_SC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I_PD_Fwded_SC); + } + tbe.expected_snp_resp.addExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + if (allowFwdSD) { + prepareRequest(tbe, CHIRequestType:SnpSharedFwd, out_msg); + } else { + prepareRequest(tbe, CHIRequestType:SnpNotSharedDirtyFwd, out_msg); + } + out_msg.Destination.add(tbe.dir_owner); + out_msg.retToSrc := retToSrc; + } +} + +action(Send_SnpSharedFwd_ToSharer, desc="") { + assert(is_valid(tbe)); + // send to onde of the sharers with shared clean data + assert(tbe.dataMaybeDirtyUpstream == false); + assert(tbe.dir_ownerExists == false); + assert(tbe.dir_sharers.count() > 0); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + // if we have a block allocated for this line, asks snoopee to forward + // data to us as well + bool retToSrc := tbe.doCacheFill; + if (retToSrc) { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC_Fwded_SC); + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SC_Fwded_SC); + } + tbe.expected_snp_resp.addExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpSharedFwd, out_msg); + // TODO should be random or the closest one to the fwd dest + out_msg.Destination.add(tbe.dir_sharers.smallestElement()); + out_msg.retToSrc := retToSrc; + } +} + +action(Send_SnpOnce, desc="") { + assert(is_valid(tbe)); + + // send to one of the sharers or owner to get a copy of the line + assert(tbe.dir_sharers.count() > 0); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + + if (tbe.dir_ownerExists) { + if (tbe.dir_ownerIsExcl) { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_UC); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_UD); + } else { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SD); + } + } else { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_SC); + } + tbe.expected_snp_resp.addExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpOnce, out_msg); + if (tbe.dir_ownerExists) { + out_msg.Destination.add(tbe.dir_owner); + } else { + // TODO should be random or the closest one + out_msg.Destination.add(tbe.dir_sharers.smallestElement()); + } + out_msg.retToSrc := true; + } +} + +action(Send_SnpOnceFwd, desc="") { + assert(is_valid(tbe)); + + // send to one of the sharers or owner to get a copy of the line + assert(tbe.dir_sharers.count() > 0); + + assert(tbe.expected_snp_resp.expected() == 0); + clearExpectedSnpResp(tbe); + + if (tbe.dir_ownerExists) { + if (tbe.dir_ownerIsExcl) { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_UC_Fwded_I); + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_UD_Fwded_I); + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SD_Fwded_I); + } + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_SC_Fwded_I); + } + tbe.expected_snp_resp.addExpectedCount(1); + + enqueue(snpOutPort, CHIRequestMsg, snoop_latency) { + prepareRequest(tbe, CHIRequestType:SnpOnceFwd, out_msg); + if (tbe.dir_ownerExists) { + out_msg.Destination.add(tbe.dir_owner); + } else { + // TODO should be random or the closest one + out_msg.Destination.add(tbe.dir_sharers.smallestElement()); + } + out_msg.retToSrc := false; + } +} + + +action(ExpectNCBWrData, desc="") { + // Expected data + int num_msgs := tbe.accSize / data_channel_size; + if ((tbe.accSize % data_channel_size) != 0) { + num_msgs := num_msgs + 1; + } + tbe.expected_req_resp.clear(num_msgs); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:NCBWrData); + tbe.expected_req_resp.setExpectedCount(1); + + // Clear the mask bits we expect to receive + tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize, false); +} + +action(ExpectCompAck, desc="") { + assert(is_valid(tbe)); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompAck); + tbe.expected_req_resp.addExpectedCount(1); +} + +action(Receive_ReqDataResp, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_req_resp.hasExpected()); + peek(datInPort, CHIDataMsg) { + // Decrement pending + if (tbe.expected_req_resp.receiveData(in_msg.type) == false) { + error("Received unexpected message"); + } + // Copy data to tbe only if we didn't have valid data or the received + // data is dirty + if ((tbe.dataBlkValid.isFull() == false) || + (in_msg.type == CHIDataType:CompData_UD_PD) || + (in_msg.type == CHIDataType:CompData_SD_PD) || + (in_msg.type == CHIDataType:CBWrData_UD_PD) || + (in_msg.type == CHIDataType:CBWrData_SD_PD) || + (in_msg.type == CHIDataType:NCBWrData)) { + // clear mask if started to receive new data + if(tbe.dataBlkValid.isFull()){ + tbe.dataBlkValid.clear(); + } + tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask); + assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false); + tbe.dataBlkValid.orMask(in_msg.bitMask); + } + } +} + +action(Receive_RespSepDataFromCompData, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_req_resp.hasExpected()); + // check if a previous CompData msg already counted as a RespSepData + if (tbe.expected_req_resp.receivedRespType(CHIResponseType:RespSepData) == false) { + if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) { + error("Received unexpected message"); + } + if (is_HN == false) { + // must now ack the responder + tbe.actions.pushFrontNB(Event:SendCompAck); + } + } +} + +action(Receive_RespSepData, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_req_resp.hasExpected()); + if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) { + error("Received unexpected message"); + } + if (is_HN == false) { + // must now ack the responder + tbe.actions.pushFrontNB(Event:SendCompAck); + } +} + +action(Receive_ReadReceipt, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_req_resp.hasExpected()); + if (tbe.expected_req_resp.receiveResp(CHIResponseType:ReadReceipt) == false) { + error("Received unexpected message"); + } +} + +action(Receive_SnpDataResp, desc="") { + assert(is_valid(tbe)); + assert(tbe.expected_snp_resp.hasExpected()); + peek(datInPort, CHIDataMsg) { + // Decrement pending + if (tbe.expected_snp_resp.receiveData(in_msg.type) == false) { + error("Received unexpected message"); + } + // Copy data to tbe only if we didn't have valid data or the received + // data is dirty + if ((tbe.dataBlkValid.isFull() == false) || + (in_msg.type == CHIDataType:SnpRespData_I_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_I_Fwded_SD_PD) || + (in_msg.type == CHIDataType:SnpRespData_I_PD_Fwded_SC)) { + // clear mask if started to receive new data + if(tbe.dataBlkValid.isFull()){ + tbe.dataBlkValid.clear(); + } + tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask); + assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false); + tbe.dataBlkValid.orMask(in_msg.bitMask); + } + } +} + +action(UpdateDirState_FromReqDataResp, desc="") { + assert(is_valid(tbe)); + // only perform the update once we received all chunks + if (tbe.expected_req_resp.hasReceivedData()) { + assert(tbe.dataBlkValid.isFull()); + peek(datInPort, CHIDataMsg) { + + if (in_msg.type == CHIDataType:CBWrData_UC) { + assert(tbe.dir_ownerExists && tbe.dir_ownerIsExcl && (tbe.dir_owner == in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.remove(in_msg.responder); + + } else if (in_msg.type == CHIDataType:CBWrData_UD_PD) { + assert(tbe.dir_ownerExists && tbe.dir_ownerIsExcl && (tbe.dir_owner == in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + if (tbe.pendReqType != CHIRequestType:WriteCleanFull) { + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.remove(in_msg.responder); + } + + } else if (in_msg.type == CHIDataType:CBWrData_SC) { + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != in_msg.responder)); + tbe.dir_sharers.remove(in_msg.responder); + + } else if (in_msg.type == CHIDataType:CBWrData_SD_PD) { + assert(tbe.dir_ownerExists && (tbe.dir_ownerIsExcl == false) && (tbe.dir_owner == in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + if (tbe.pendReqType != CHIRequestType:WriteCleanFull) { + tbe.dir_sharers.remove(in_msg.responder); + } + + } else if (in_msg.type == CHIDataType:CBWrData_I) { + // nothing to do here; just check + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder) == false); + + } else { + error("Unsuported data type"); + } + } + } + printTBEState(tbe); +} + +action(UpdateDirState_FromSnpDataResp, desc="") { + assert(is_valid(tbe)); + // only perform the update once we received all chunks + if (tbe.expected_snp_resp.hasReceivedData()) { + assert(tbe.dataBlkValid.isFull()); + peek(datInPort, CHIDataMsg) { + + if (in_msg.type == CHIDataType:SnpRespData_I) { + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.remove(in_msg.responder); + + } else if (in_msg.type == CHIDataType:SnpRespData_I_PD) { + assert(tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.remove(in_msg.responder); + + } else if ((in_msg.type == CHIDataType:SnpRespData_SC_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD_Fwded_SC)) { + // the owner must have been the responder, if there was one + assert((tbe.dir_ownerExists == false) || + (tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder))); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + if ((in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SD_PD)) { + tbe.dir_sharers.add(tbe.requestor); + } + if (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) { + tbe.dir_ownerExists := true; + tbe.dir_owner := tbe.requestor; + } + + } else if ((in_msg.type == CHIDataType:SnpRespData_I_Fwded_SD_PD) || + (in_msg.type == CHIDataType:SnpRespData_I_PD_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_I_Fwded_SC)) { + // the owner must have been the responder, if there was one + assert((tbe.dir_ownerExists == false) || + (tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder))); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.remove(in_msg.responder); + tbe.dir_sharers.add(tbe.requestor); + if (in_msg.type == CHIDataType:SnpRespData_I_Fwded_SD_PD) { + tbe.dir_ownerExists := true; + tbe.dir_owner := tbe.requestor; + } + + } else if ((in_msg.type == CHIDataType:SnpRespData_SD) || + (in_msg.type == CHIDataType:SnpRespData_UC) || + (in_msg.type == CHIDataType:SnpRespData_UD)) { + // expected only in response to a SnpOnce; just do some checks + // also may get SnpRespData_SC, but handled properly above + assert(tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder)); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + + } else { + error("Unsuported data type"); + } + } + } + printTBEState(tbe); +} + +action(UpdateDataState_FromReqDataResp, desc="") { + assert(is_valid(tbe)); + // only perform the update once we received all chunks + if (tbe.expected_req_resp.hasReceivedData()) { + assert(tbe.dataBlkValid.isFull()); + peek(datInPort, CHIDataMsg) { + + if ((in_msg.type == CHIDataType:CompData_UC) || + (in_msg.type == CHIDataType:DataSepResp_UC)) { + assert(tbe.dataUnique == false); + assert((tbe.dataValid && tbe.dataDirty) == false); + tbe.dataDirty := false; + tbe.dataUnique := true; + tbe.dataValid := true; + assert(tbe.dataMaybeDirtyUpstream == false); + + } else if (in_msg.type == CHIDataType:CompData_UD_PD) { + assert(tbe.dataUnique == false); + assert((tbe.dataValid && tbe.dataDirty) == false); + tbe.dataDirty := true; + tbe.dataUnique := true; + tbe.dataValid := true; + assert(tbe.dataMaybeDirtyUpstream == false); + + } else if (in_msg.type == CHIDataType:CompData_SC) { + assert(tbe.dataUnique == false); + assert((tbe.dataValid && tbe.dataDirty) == false); + tbe.dataDirty := false; + tbe.dataUnique := false; + tbe.dataValid := true; + assert(tbe.dataMaybeDirtyUpstream == false); + + } else if (in_msg.type == CHIDataType:CompData_SD_PD) { + assert(tbe.dataUnique == false); + assert((tbe.dataValid && tbe.dataDirty) == false); + tbe.dataDirty := true; + tbe.dataUnique := false; + tbe.dataValid := true; + assert(tbe.dataMaybeDirtyUpstream == false); + + } else if (in_msg.type == CHIDataType:CompData_I) { + tbe.dataValid := true; + tbe.dataToBeInvalid := true; + assert(tbe.dataMaybeDirtyUpstream == false); + + } else if (in_msg.type == CHIDataType:CBWrData_UC) { + assert(tbe.dataUnique); + tbe.dataMaybeDirtyUpstream := false; + tbe.dataValid := true; + + } else if (in_msg.type == CHIDataType:CBWrData_SC) { + // stale WB, nothing to do ?? + + } else if (in_msg.type == CHIDataType:CBWrData_UD_PD) { + assert(tbe.dataUnique); + tbe.dataDirty := true; + tbe.dataValid := true; + tbe.dataMaybeDirtyUpstream := false; + + } else if (in_msg.type == CHIDataType:CBWrData_SD_PD) { + tbe.dataDirty := true; + tbe.dataValid := true; + tbe.dataMaybeDirtyUpstream := false; + + } else if (in_msg.type == CHIDataType:CBWrData_I) { + // stale WB, nothing to do ?? + + } else { + error("Unsuported data type"); + } + } + } + printTBEState(tbe); +} + +action(UpdateDataState_FromWUDataResp, desc="") { + assert(is_valid(tbe)); + int offset := addressOffset(tbe.accAddr, tbe.addr); + if (tbe.expected_req_resp.hasReceivedData()) { + assert(tbe.dataBlkValid.test(offset)); + assert(tbe.dataBlkValid.test(offset + tbe.accSize - 1)); + peek(datInPort, CHIDataMsg) { + assert(in_msg.type == CHIDataType:NCBWrData); + tbe.dataDirty := true; + tbe.dataValid := tbe.accSize == blockSize; + } + } + printTBEState(tbe); +} + +action(UpdateDataState_FromCUResp, desc="") { + assert(is_valid(tbe)); + peek(rspInPort, CHIResponseMsg) { + assert(in_msg.type == CHIResponseType:Comp_UC); + assert(tbe.dataUnique == false); + tbe.dataUnique := tbe.dataValid || (tbe.dir_sharers.count() > 0); + // self and upstream may have been invalidated while waiting for this + // expect to follow up with a ReadUnique + } + printTBEState(tbe); +} + +action(UpdateDataState_FromSnpDataResp, desc="") { + assert(is_valid(tbe)); + // only perform the update once we received all chunks + if (tbe.expected_snp_resp.hasReceivedData()) { + assert(tbe.dataBlkValid.isFull()); + peek(datInPort, CHIDataMsg) { + + if ((in_msg.type == CHIDataType:SnpRespData_I_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD) || + (in_msg.type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_I_PD_Fwded_SC)) { + tbe.dataDirty := true; + tbe.dataValid := true; + tbe.dataMaybeDirtyUpstream := false; + + } else if ((in_msg.type == CHIDataType:SnpRespData_SD) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) || + (in_msg.type == CHIDataType:SnpRespData_I_Fwded_SD_PD)) { + tbe.dataDirty := true; + tbe.dataValid := true; + tbe.dataMaybeDirtyUpstream := true; + + } else if ((in_msg.type == CHIDataType:SnpRespData_I) || + (in_msg.type == CHIDataType:SnpRespData_SC) || + (in_msg.type == CHIDataType:SnpRespData_SC_Fwded_SC) || + (in_msg.type == CHIDataType:SnpRespData_I_Fwded_SC)) { + tbe.dataValid := true; + tbe.dataMaybeDirtyUpstream := false; + + } else if ((in_msg.type == CHIDataType:SnpRespData_UC) || + (in_msg.type == CHIDataType:SnpRespData_UD)) { + tbe.dataValid := true; + tbe.dataUnique := true; + tbe.dataMaybeDirtyUpstream := true; + if (in_msg.type == CHIDataType:SnpRespData_UD){ + tbe.dataDirty := true; + } + + } else { + error("Unsuported data type"); + } + } + } + printTBEState(tbe); +} + +action(UpdateDirState_FromReqResp, desc="") { + peek(rspInPort, CHIResponseMsg) { + if ((in_msg.type == CHIResponseType:CompAck) && tbe.updateDirOnCompAck) { + assert(tbe.requestor == in_msg.responder); + + tbe.dir_sharers.add(in_msg.responder); + + if (tbe.requestorToBeOwner) { + assert(tbe.dataMaybeDirtyUpstream); + assert(tbe.dir_ownerExists == false); + assert(tbe.requestorToBeExclusiveOwner == false); + tbe.dir_owner := in_msg.responder; + tbe.dir_ownerExists := true; + tbe.dir_ownerIsExcl := false; + + } else if (tbe.requestorToBeExclusiveOwner) { + assert(tbe.dataMaybeDirtyUpstream); + assert(tbe.dir_ownerExists == false); + assert(tbe.dir_sharers.count() == 1); + tbe.dir_owner := in_msg.responder; + tbe.dir_ownerExists := true; + tbe.dir_ownerIsExcl := true; + } + } + } + printTBEState(tbe); +} + +action(UpdateDirState_FromSnpResp, desc="") { + peek(rspInPort, CHIResponseMsg) { + + if (in_msg.type == CHIResponseType:SnpResp_I) { + // must have been a known sharer otherwise we would receive data + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_sharers.remove(in_msg.responder); + if (tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder)){ + tbe.dir_ownerExists := false; + } + + } else if (in_msg.type == CHIResponseType:SnpResp_SC) { + // expected from a sharer that already has it in shared state + assert(tbe.dir_sharers.isElement(in_msg.responder)); + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != in_msg.responder)); + + } else if ((in_msg.type == CHIResponseType:SnpResp_SC_Fwded_SC) || + (in_msg.type == CHIResponseType:SnpResp_SC_Fwded_SD_PD)) { + // the SnpSharedFwd must have been sent to the owner if there was one + assert((tbe.dir_ownerExists == false) || + (tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder))); + assert(tbe.dir_sharers.isElement(in_msg.responder)); + tbe.dir_ownerExists := false; + tbe.dir_ownerIsExcl := false; + tbe.dir_sharers.add(tbe.requestor); + if (in_msg.type == CHIResponseType:SnpResp_SC_Fwded_SD_PD) { + // Requestor is new owner + tbe.dir_ownerExists := true; + tbe.dir_owner := tbe.requestor; + } + + } else if ((in_msg.type == CHIResponseType:SnpResp_I_Fwded_UC) || + (in_msg.type == CHIResponseType:SnpResp_I_Fwded_UD_PD)) { + // must have been a single sharer that received SnpUniqueFwd + assert(tbe.dir_sharers.isElement(in_msg.responder)); + assert(tbe.dir_sharers.count() == 1); + tbe.dir_sharers.remove(in_msg.responder); + // requestor is the new owner + tbe.dir_sharers.add(tbe.requestor); + tbe.dir_ownerExists := true; + tbe.dir_ownerIsExcl := true; + tbe.dir_owner := tbe.requestor; + + } else if ((in_msg.type == CHIResponseType:SnpResp_UC_Fwded_I) || + (in_msg.type == CHIResponseType:SnpResp_UD_Fwded_I) || + (in_msg.type == CHIResponseType:SnpResp_SD_Fwded_I)) { + // SnpSharedFwd; just confirm + assert(tbe.dir_sharers.isElement(in_msg.responder)); + assert(tbe.dir_ownerExists && (tbe.dir_owner == in_msg.responder)); + + } else if (in_msg.type == CHIResponseType:SnpResp_SC_Fwded_I) { + // SnpSharedFwd; just confirm + assert(tbe.dir_sharers.isElement(in_msg.responder)); + assert((tbe.dir_ownerExists == false) || (tbe.dir_owner != in_msg.responder)); + } + + tbe.dataMaybeDirtyUpstream := tbe.dir_ownerExists; + + } + printTBEState(tbe); +} + +action(Receive_ReqResp, desc="") { + assert(tbe.expected_req_resp.hasExpected()); + peek(rspInPort, CHIResponseMsg) { + // Decrement pending + if (tbe.expected_req_resp.receiveResp(in_msg.type) == false) { + error("Received unexpected message"); + } + assert(in_msg.stale == tbe.is_stale); + } +} + +action(Receive_ReqResp_WUNeedComp, desc="") { + tbe.defer_expected_comp := true; +} + +action(Receive_ReqResp_WUComp, desc="") { + if (tbe.defer_expected_comp) { + tbe.defer_expected_comp := false; + } else if (tbe.expected_req_resp.receiveResp(CHIResponseType:Comp) == false) { + error("Received unexpected message"); + } +} + +action(Receive_SnpResp, desc="") { + assert(tbe.expected_snp_resp.hasExpected()); + peek(rspInPort, CHIResponseMsg) { + // Decrement pending + if (tbe.expected_snp_resp.receiveResp(in_msg.type) == false) { + error("Received unexpected message"); + } + assert(in_msg.stale == tbe.is_stale); + } +} + +action(Receive_RetryAck, desc="") { + assert(is_valid(tbe)); + assert(tbe.pendReqAllowRetry); + assert(tbe.rcvdRetryAck == false); + tbe.rcvdRetryAck := true; + destsWaitingRetry.addNetDest(tbe.pendReqDest); + enqueueDoRetry(tbe); +} + +action(Receive_PCrdGrant, desc="") { + assert(tbe.pendReqAllowRetry); + assert(tbe.rcvdRetryCredit == false); + tbe.rcvdRetryCredit := true; + enqueueDoRetry(tbe); +} + +action(Send_Retry, desc="") { + assert(tbe.pendReqAllowRetry); + assert(tbe.rcvdRetryCredit); + assert(tbe.rcvdRetryAck); + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequestRetry(tbe, out_msg); + } +} + +action(Receive_RetryAck_Hazard, desc="") { + TBE hazard_tbe := getHazardTBE(tbe); + assert(hazard_tbe.pendReqAllowRetry); + assert(hazard_tbe.rcvdRetryAck == false); + hazard_tbe.rcvdRetryAck := true; + destsWaitingRetry.addNetDest(hazard_tbe.pendReqDest); + enqueueDoRetry(hazard_tbe); +} + +action(Receive_PCrdGrant_Hazard, desc="") { + TBE hazard_tbe := getHazardTBE(tbe); + assert(hazard_tbe.pendReqAllowRetry); + assert(hazard_tbe.rcvdRetryCredit == false); + hazard_tbe.rcvdRetryCredit := true; + enqueueDoRetry(hazard_tbe); +} + +action(Send_Retry_Hazard, desc="") { + TBE hazard_tbe := getHazardTBE(tbe); + assert(hazard_tbe.pendReqAllowRetry); + assert(hazard_tbe.rcvdRetryCredit); + assert(hazard_tbe.rcvdRetryAck); + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequestRetry(hazard_tbe, out_msg); + } +} + +action(Send_CompData, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataValid); + + bool is_rd_once := tbe.reqType == CHIRequestType:ReadOnce; + bool is_rd_shared := (tbe.reqType == CHIRequestType:ReadShared) || + (tbe.reqType == CHIRequestType:ReadNotSharedDirty); + bool is_rd_nsd := tbe.reqType == CHIRequestType:ReadNotSharedDirty; + bool is_rd_unique := tbe.reqType == CHIRequestType:ReadUnique; + + if (is_rd_once) { + tbe.snd_msgType := CHIDataType:CompData_I; + } else if (tbe.dataToBeInvalid) { + // We will drop the data so propagate it's coherent state upstream + if (tbe.dataUnique && tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:CompData_UD_PD; + } else if (tbe.dataUnique) { + tbe.snd_msgType := CHIDataType:CompData_UC; + } else if (tbe.dataDirty) { + if (is_rd_nsd) { + tbe.snd_msgType := CHIDataType:CompData_SC; + } else { + tbe.snd_msgType := CHIDataType:CompData_SD_PD; + } + } else { + tbe.snd_msgType := CHIDataType:CompData_SC; + } + } else if (is_rd_unique || + (is_rd_shared && tbe.dataUnique && + fwd_unique_on_readshared && (tbe.dir_ownerExists == false))) { + // propagates dirtyness + assert(tbe.dataUnique); + if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:CompData_UD_PD; + } else { + tbe.snd_msgType := CHIDataType:CompData_UC; + } + } else if (is_rd_shared) { + // still keeping a copy so can send as SC + tbe.snd_msgType := CHIDataType:CompData_SC; + } else { + error("Invalid request type"); + } + + tbe.dataMaybeDirtyUpstream := tbe.dataMaybeDirtyUpstream || + (tbe.snd_msgType == CHIDataType:CompData_UD_PD) || + (tbe.snd_msgType == CHIDataType:CompData_SD_PD) || + (tbe.snd_msgType == CHIDataType:CompData_UC); + tbe.requestorToBeExclusiveOwner := tbe.requestorToBeExclusiveOwner || + (tbe.snd_msgType == CHIDataType:CompData_UD_PD) || + (tbe.snd_msgType == CHIDataType:CompData_UC); + tbe.requestorToBeOwner := tbe.requestorToBeOwner || + (tbe.snd_msgType == CHIDataType:CompData_SD_PD); + + tbe.snd_destination := tbe.requestor; + setupPendingSend(tbe); + printTBEState(tbe); +} + +action(Send_WBData, desc="") { + assert(is_valid(tbe)); + if (is_HN) { + assert(tbe.dataBlkValid.isFull()); + assert(tbe.dataDirty); + assert(tbe.dataValid); + tbe.snd_msgType := CHIDataType:NCBWrData; + } else { + if (tbe.dataValid == false) { + // only possible when the WB was made stale by a snoop + assert(tbe.is_stale); + tbe.dataBlkValid.fillMask(); + tbe.snd_msgType := CHIDataType:CBWrData_I; + } else if (tbe.dataUnique) { + assert(tbe.dataBlkValid.isFull()); + if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:CBWrData_UD_PD; + } else { + tbe.snd_msgType := CHIDataType:CBWrData_UC; + } + } else { + assert(tbe.dataBlkValid.isFull()); + if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:CBWrData_SD_PD; + } else { + tbe.snd_msgType := CHIDataType:CBWrData_SC; + } + } + } + tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr); + setupPendingSend(tbe); +} + +action(Send_WUData, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataBlkValid.count() > 0); + tbe.snd_msgType := CHIDataType:NCBWrData; + tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr); + setupPendingPartialSend(tbe); +} + +action(CheckWUComp, desc="") { + assert(is_valid(tbe)); + if (tbe.defer_expected_comp) { + tbe.defer_expected_comp := false; + tbe.expected_req_resp.addExpectedCount(1); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp); + } +} + +action(Send_SnpRespData, desc="") { + assert(is_HN == false); + assert(is_valid(tbe)); + assert(tbe.dataBlkValid.isFull()); + assert(tbe.dataValid); + + assert(tbe.snpNeedsData || + (tbe.dataDirty && (tbe.reqType == CHIRequestType:SnpCleanInvalid)) || + ((tbe.dataDirty || tbe.dataUnique) && (tbe.reqType == CHIRequestType:SnpShared)) || + ((tbe.dataDirty || tbe.dataUnique) && (tbe.reqType == CHIRequestType:SnpUnique))); + + if (tbe.dataToBeInvalid) { + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:SnpRespData_I_PD; + } else { + tbe.snd_msgType := CHIDataType:SnpRespData_I; + } + } else if (tbe.dataToBeSharedClean) { + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:SnpRespData_SC_PD; + } else { + tbe.snd_msgType := CHIDataType:SnpRespData_SC; + } + } else { + assert(tbe.reqType == CHIRequestType:SnpOnce); + if (tbe.dataDirty && tbe.dataUnique) { + tbe.snd_msgType := CHIDataType:SnpRespData_UD; + } else if (tbe.dataDirty) { + tbe.snd_msgType := CHIDataType:SnpRespData_SD; + } else if (tbe.dataUnique) { + tbe.snd_msgType := CHIDataType:SnpRespData_UC; + } else { + tbe.snd_msgType := CHIDataType:SnpRespData_SC; + } + } + + tbe.snd_destination := tbe.requestor; + setupPendingSend(tbe); +} + +action(Send_CompData_SnpUniqueFwd, desc="") { + assert(tbe.dataValid); + assert(tbe.dataToBeInvalid); + assert(tbe.dataMaybeDirtyUpstream == false); + + if (tbe.dataDirty) { + tbe.fwdedState := State:UD; + tbe.snd_msgType := CHIDataType:CompData_UD_PD; + } else { + tbe.fwdedState := State:UC; + tbe.snd_msgType := CHIDataType:CompData_UC; + } + tbe.actions.pushFront(Event:SendSnpFwdedResp); + + tbe.snd_destination := tbe.fwdRequestor; + setupPendingSend(tbe); +} + +action(Send_CompData_SnpSharedFwd, desc="") { + assert(tbe.dataValid); + assert(tbe.dataToBeSharedClean); + assert(tbe.dataMaybeDirtyUpstream == false); + + if (tbe.dataDirty) { + tbe.fwdedState := State:SD; + tbe.snd_msgType := CHIDataType:CompData_SD_PD; + } else { + tbe.fwdedState := State:SC; + tbe.snd_msgType := CHIDataType:CompData_SC; + } + if (tbe.snpNeedsData) { + tbe.actions.pushFront(Event:SendSnpFwdedData); + } else { + tbe.actions.pushFront(Event:SendSnpFwdedResp); + } + + tbe.snd_destination := tbe.fwdRequestor; + setupPendingSend(tbe); +} + +action(Send_CompData_SnpNSDFwd, desc="") { + assert(tbe.dataValid); + assert(tbe.dataToBeSharedClean); + assert(tbe.dataMaybeDirtyUpstream == false); + + tbe.snd_msgType := CHIDataType:CompData_SC; + tbe.fwdedState := State:SC; + if (tbe.dataDirty || tbe.snpNeedsData) { + tbe.actions.pushFront(Event:SendSnpFwdedData); + } else { + tbe.actions.pushFront(Event:SendSnpFwdedResp); + } + + tbe.snd_destination := tbe.fwdRequestor; + setupPendingSend(tbe); +} + +action(Send_CompData_SnpOnceFwd, desc="") { + assert(tbe.dataValid); + + tbe.fwdedState := State:I; + tbe.snd_msgType := CHIDataType:CompData_I; + tbe.actions.pushFront(Event:SendSnpFwdedResp); + + tbe.snd_destination := tbe.fwdRequestor; + setupPendingSend(tbe); +} + +action(Send_SnpRespDataFwded, desc="") { + assert(tbe.dataValid); + + // right only using this for the SnpShared/SnpNSD, so check + assert(tbe.dataToBeSharedClean); + assert(tbe.dataMaybeDirtyUpstream == false); + + // We have the data (locally or upstream) or are dropping it + bool keepData := (tbe.dir_sharers.count() > 0) || + (tbe.dataToBeInvalid == false); + + if (keepData) { + if (tbe.fwdedState == State:SD) { + tbe.snd_msgType := CHIDataType:SnpRespData_SC_Fwded_SD_PD; + } else if (tbe.dataDirty && (tbe.fwdedState == State:SC)) { + tbe.snd_msgType := CHIDataType:SnpRespData_SC_PD_Fwded_SC; + } else { + tbe.snd_msgType := CHIDataType:SnpRespData_SC_Fwded_SC; + } + } else { + if (tbe.fwdedState == State:SD) { + tbe.snd_msgType := CHIDataType:SnpRespData_I_Fwded_SD_PD; + } else if (tbe.dataDirty && (tbe.fwdedState == State:SC)) { + tbe.snd_msgType := CHIDataType:SnpRespData_I_PD_Fwded_SC; + } else { + tbe.snd_msgType := CHIDataType:SnpRespData_I_Fwded_SC; + } + } + + tbe.snd_destination := tbe.requestor; + setupPendingSend(tbe); +} + +action(Send_FwdSnpResp, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataValid); + + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + + // We have the data (locally or upstream) or are dropping it + bool keepData := (tbe.dir_sharers.count() > 0) || + (tbe.dataToBeInvalid == false); + + if (keepData && tbe.dataToBeSharedClean) { + assert((tbe.reqType == CHIRequestType:SnpSharedFwd) || + (tbe.reqType == CHIRequestType:SnpNotSharedDirtyFwd)); + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.fwdedState == State:SD) { + out_msg.type := CHIResponseType:SnpResp_SC_Fwded_SD_PD; + } else { + assert(tbe.fwdedState == State:SC); + out_msg.type := CHIResponseType:SnpResp_SC_Fwded_SC; + } + + } else if (keepData) { + assert(tbe.reqType == CHIRequestType:SnpOnceFwd); + assert(tbe.fwdedState == State:I); + if (tbe.dataUnique && (tbe.dataDirty || tbe.dataMaybeDirtyUpstream)) { + out_msg.type := CHIResponseType:SnpResp_UD_Fwded_I; + } else if (tbe.dataUnique) { + out_msg.type := CHIResponseType:SnpResp_UC_Fwded_I; + } else if (tbe.dataDirty || tbe.dataMaybeDirtyUpstream) { + out_msg.type := CHIResponseType:SnpResp_SD_Fwded_I; + } else { + out_msg.type := CHIResponseType:SnpResp_SC_Fwded_I; + } + + } else { + assert(tbe.reqType == CHIRequestType:SnpUniqueFwd); + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.fwdedState == State:UD) { + out_msg.type := CHIResponseType:SnpResp_I_Fwded_UD_PD; + } else { + assert(tbe.fwdedState == State:UC); + out_msg.type := CHIResponseType:SnpResp_I_Fwded_UC; + } + } + } +} + +action(Send_Data, desc="") { + assert(tbe.snd_pendEv); + assert(tbe.snd_pendBytes.count() > 0); + tbe.snd_pendEv := false; + enqueue(datOutPort, CHIDataMsg, data_latency) { + out_msg.addr := tbe.addr; + out_msg.type := tbe.snd_msgType; + + int offset := tbe.snd_pendBytes.firstBitSet(true); + assert(offset < blockSize); + int range := tbe.snd_pendBytes.firstBitSet(false, offset) - offset; + assert((range > 0) && (range <= blockSize)); + if (range > data_channel_size) { + range := data_channel_size; + } + tbe.snd_pendBytes.setMask(offset, range, false); + + out_msg.dataBlk := tbe.dataBlk; + out_msg.bitMask.setMask(offset, range); + + out_msg.responder := machineID; + + out_msg.Destination.add(tbe.snd_destination); + } + + // send next chunk (if any) next cycle + scheduleSendData(tbe, 1); +} + +action(Send_RespSepData, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:RespSepData; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_CompI, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:Comp_I; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_CompUC, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:Comp_UC; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_CompAck, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:CompAck; + out_msg.responder := machineID; + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + } +} + +action(Send_CompI_Stale, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:Comp_I; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + // We don't know if this is a stale writeback or a bug, so flag the + // reponse so the requestor can make further checks + out_msg.stale := true; + } +} + +action(Send_CompDBIDResp, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:CompDBIDResp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_CompDBIDResp_Stale, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:CompDBIDResp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + // We don't know if this is a stale writeback or a bug, so flag the + // reponse so the requestor can make further checks + out_msg.stale := true; + } +} + +action(Send_DBIDResp, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:DBIDResp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_Comp_WU, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, comp_wu_latency + response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:Comp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_SnpRespI, desc="") { + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:SnpResp_I; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + +action(Send_RetryAck, desc="") { + peek(retryTriggerInPort, RetryTriggerMsg) { + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := in_msg.addr; + out_msg.type := CHIResponseType:RetryAck; + out_msg.responder := machineID; + out_msg.Destination.add(in_msg.retryDest); + } + } +} + +action(Send_PCrdGrant, desc="") { + peek(retryTriggerInPort, RetryTriggerMsg) { + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := in_msg.addr; + out_msg.type := CHIResponseType:PCrdGrant; + out_msg.responder := machineID; + out_msg.Destination.add(in_msg.retryDest); + } + } +} + +// Note on CheckUpgrade_FromStore/CheckUpgrade_FromCU/CheckUpgrade_FromRU +// We will always get Comp_UC; but if our data is invalidated before +// Comp_UC we would need to go to UCE. Since we don't use the UCE state +// we remain in the transient state and follow-up with ReadUnique. +// Note this assumes the responder knows we have invalid data when sending +// us Comp_UC and does not register us as owner. + +action(CheckUpgrade_FromStore, desc="") { + assert(is_HN == false); + if (tbe.dataUnique) { + // success, just send CompAck next + assert(tbe.dataValid); + } else { + tbe.actions.pushFront(Event:SendReadUnique); + } + tbe.actions.pushFront(Event:SendCompAck); +} + +action(CheckUpgrade_FromCU, desc="") { + assert(is_HN == false); + if (tbe.dataUnique == false) { + // actually failed, so just cancel the directory update + assert(tbe.dir_sharers.isElement(tbe.requestor) == false); + tbe.requestorToBeExclusiveOwner := false; + tbe.updateDirOnCompAck := false; + } + // otherwise nothing else to do here other than acking the CleanUnique + tbe.actions.pushFront(Event:SendCompAck); +} + +action(CheckUpgrade_FromRU, desc="") { + assert(is_HN == false); + if (tbe.dataUnique) { + // success, just send CompAck next + assert(tbe.dataValid); + } else { + // will need to get data instead + tbe.actions.pushFront(Event:SendReadUnique); + } + tbe.actions.pushFront(Event:SendCompAck); +} + +action(Finalize_UpdateCacheFromTBE, desc="") { + assert(is_valid(tbe)); + State final := tbe.finalState; + if ((final == State:UD_RSC) || (final == State:SD_RSC) || (final == State:UC_RSC) || + (final == State:SC_RSC) || (final == State:UD) || (final == State:UD_T) || + (final == State:SD) || (final == State:UC) || (final == State:SC) || + (final == State:UC_RU) || (final == State:UD_RU) || (final == State:UD_RSD) || + (final == State:SD_RSD)) { + assert(tbe.dataBlkValid.isFull()); + assert(tbe.dataValid); + assert(is_valid(cache_entry)); + cache_entry.DataBlk := tbe.dataBlk; + DPRINTF(RubySlicc, "Cached data %s pfb %s\n", tbe.dataBlk, cache_entry.HWPrefetched); + } else { + // make sure only deallocate the cache entry if data is invalid + assert(tbe.dataValid == false); + if (is_valid(cache_entry)) { + cache.deallocate(address); + unset_cache_entry(); + } + } +} + +action(Finalize_UpdateDirectoryFromTBE, desc="") { + assert(is_valid(tbe)); + State final := tbe.finalState; + if ((final == State:UD_RSC) || (final == State:SD_RSC) || (final == State:UC_RSC) || + (final == State:SC_RSC) || (final == State:UC_RU) || (final == State:UD_RU) || + (final == State:UD_RSD) || (final == State:SD_RSD) || (final == State:RU) || + (final == State:RSC) || (final == State:RSD) || (final == State:RUSD) || + (final == State:RUSC)) { + DirEntry dir_entry := getDirEntry(address); + assert(is_valid(dir_entry)); + assert(tbe.dir_sharers.count() > 0); + dir_entry.ownerExists := tbe.dir_ownerExists; + dir_entry.ownerIsExcl := tbe.dir_ownerIsExcl; + dir_entry.owner := tbe.dir_owner; + dir_entry.sharers := tbe.dir_sharers; + } else { + assert((tbe.dir_ownerExists == false) && tbe.dir_sharers.isEmpty()); + if(directory.isTagPresent(address)) { + directory.deallocate(address); + } + } +} + +action(Deallocate_CacheBlock, desc="") { + assert(is_valid(cache_entry)); + cache.deallocate(address); + unset_cache_entry(); +} + +action(Allocate_DirEntry, desc="") { + assert(directory.isTagPresent(address) == false); + directory.allocate(address); +} + +action(Deallocate_DirEntry, desc="") { + assert(directory.isTagPresent(address)); + directory.deallocate(address); +} + +action(CheckCacheFill, desc="") { + assert(is_valid(tbe)); + + // only perform the write if we have valid data and need to write + bool need_fill := tbe.dataValid && (tbe.dataToBeInvalid == false) && tbe.doCacheFill; + bool execute_next := true; + + if (need_fill && is_valid(cache_entry)) { + // can write + tbe.actions.pushFront(Event:DataArrayWrite); + tbe.actions.pushFront(Event:FillPipe); + + } else if (need_fill && cache.cacheAvail(address)) { + // don't have a cache block, but there is space to allocate one + set_cache_entry(cache.allocate(address, new CacheEntry)); + tbe.actions.pushFront(Event:DataArrayWriteOnFill); + tbe.actions.pushFront(Event:FillPipe); + + } else if (need_fill) { + // performs a cache block replacement. CheckCacheFill executes again + // after the replacement + execute_next := false; + + // pick a victim to deallocate + Addr victim_addr := cache.cacheProbe(address); + CacheEntry victim_entry := getCacheEntry(victim_addr); + assert(is_valid(victim_entry)); + TBE victim_tbe := getCurrentActiveTBE(victim_addr); + + if (is_invalid(victim_tbe)) { + DPRINTF(RubySlicc, "Eviction for %#x victim: %#x state=%s\n", + address, victim_addr, victim_entry.state); + enqueue(replTriggerOutPort, ReplacementMsg, 0) { + out_msg.addr := victim_addr; + out_msg.from_addr := address; + if (unify_repl_TBEs) { + out_msg.slot := tbe.storSlot; + DPRINTF(RubySlicc, "Reusing slot %d\n", out_msg.slot); + } + } + } else { + DPRINTF(RubySlicc, "Eviction for %#x victim: %#x state=%s\n", + address, victim_addr, victim_tbe.state); + // just wait until the transaction finishes to try again + victim_tbe.wakeup_pending_tgr := true; + } + + // wait until we can deallocate the victim_addr + stall_and_wait(triggerInPort, victim_addr); + } + + // only do the usual Pop_TriggerQueue+ProcessNextState if we have a block + if (execute_next) { + triggerInPort.dequeue(clockEdge()); + clearPendingAction(tbe); + processNextState(address, tbe, cache_entry); + } else { + wakeupPendingSnps(tbe); // might have stalled snoops that can execute now + } +} + + +action(Finalize_DeallocateRequest, desc="") { + assert(is_valid(tbe)); + assert(tbe.actions.empty()); + wakeupPendingReqs(tbe); + wakeupPendingSnps(tbe); + wakeupPendingTgrs(tbe); + + if (tbe.is_req_tbe) { + deallocateReqTBE(tbe); + processRetryQueue(); + + } else if (tbe.is_snp_tbe) { + deallocateSnpTBE(tbe); + + } else { + deallocateReplacementTBE(tbe); + if (unify_repl_TBEs) { + processRetryQueue(); + } + } + unset_tbe(); + + incomingTransactionEnd(address, curTransitionNextState()); +} + +action(Pop_ReqRdyQueue, desc="") { + reqRdyPort.dequeue(clockEdge()); +} + +action(Pop_RespInQueue, desc="") { + rspInPort.dequeue(clockEdge()); +} + +action(Pop_SnoopRdyQueue, desc="") { + snpRdyPort.dequeue(clockEdge()); +} + +action(Pop_DataInQueue, desc="") { + datInPort.dequeue(clockEdge()); +} + +// NOTICE a trigger event may wakeup another stalled trigger event so +// this is always called first in the transitions so we don't pop the +// wrong message +action(Pop_TriggerQueue, desc="") { + triggerInPort.dequeue(clockEdge()); +} + +action(Pop_ReplTriggerQueue, desc="") { + replTriggerInPort.dequeue(clockEdge()); + // wakeup the transaction that triggered this eviction + wakeup_port(triggerInPort, address); +} + +action(Pop_RetryTriggerQueue, desc="") { + retryTriggerInPort.dequeue(clockEdge()); +} + +action(ProcessNextState, desc="") { + assert(is_valid(tbe)); + processNextState(address, tbe, cache_entry); +} + +action(ProcessNextState_ClearPending, desc="") { + assert(is_valid(tbe)); + clearPendingAction(tbe); + processNextState(address, tbe, cache_entry); +} + +action(Callback_LoadHit, desc="") { + assert(is_valid(tbe)); + assert(tbe.reqType == CHIRequestType:Load); + if (tbe.is_local_pf == false) { + assert(tbe.dataValid); + DPRINTF(RubySlicc, "Read data %s\n", tbe.dataBlk); + sequencer.readCallback(tbe.addr, tbe.dataBlk, false); + } +} + +action(Callback_StoreHit, desc="") { + assert(is_valid(tbe)); + assert((tbe.reqType == CHIRequestType:StoreLine) || + (tbe.reqType == CHIRequestType:Store)); + if (tbe.is_local_pf == false) { + assert(tbe.dataValid); + DPRINTF(RubySlicc, "Write before %s\n", tbe.dataBlk); + sequencer.writeCallback(tbe.addr, tbe.dataBlk, false); + DPRINTF(RubySlicc, "Write after %s\n", tbe.dataBlk); + tbe.dataDirty := true; + } +} + +action(Callback_ExpressPrefetchHit, desc="") { + // have not allocated TBE, but must clear the reservation + assert(is_invalid(tbe)); + storTBEs.decrementReserved(); + assert(storTBEs.areNSlotsAvailable(1)); + assert(use_prefetcher); + + cache.profilePrefetchHit(); + peek(reqRdyPort, CHIRequestMsg) { + assert(in_msg.is_local_pf); + notifyPfComplete(in_msg.addr); + } +} + +// This is called everytime a data message is received but only goes +// though once all the blocks are present (tbe.dataValid) +// NOTE: should create a separate trigger for this callback ? +action(Callback_Miss, desc="") { + assert(is_valid(tbe)); + if (tbe.dataValid && tbe.is_local_pf) { + assert(use_prefetcher); + notifyPfComplete(tbe.addr); + + } else if (tbe.dataValid && (tbe.reqType == CHIRequestType:Load)) { + DPRINTF(RubySlicc, "Read data %s\n", tbe.dataBlk); + sequencer.readCallback(tbe.addr, tbe.dataBlk, true); + + } else if (tbe.dataValid && ((tbe.reqType == CHIRequestType:Store) || + (tbe.reqType == CHIRequestType:StoreLine))) { + DPRINTF(RubySlicc, "Write before %s\n", tbe.dataBlk); + sequencer.writeCallback(tbe.addr, tbe.dataBlk, true); + DPRINTF(RubySlicc, "Write after %s\n", tbe.dataBlk); + tbe.dataDirty := true; + + // sets a use time out for store misses to prevent LL/SC livelocks + int use_timeout_latency := scLockLatency(); + if (use_timeout_latency > 0) { + if (tbe.hasUseTimeout) { + assert(useTimerTable.isSet(tbe.addr)); + } else { + useTimerTable.set( + tbe.addr, + clockEdge() + cyclesToTicks(intToCycles(use_timeout_latency))); + tbe.hasUseTimeout := true; + } + // also decay the timeout + scLockDecayLatency(); + } + } +} + +action(Unset_Timeout_TBE, desc="") { + assert(is_valid(tbe)); + assert(tbe.hasUseTimeout); + assert(useTimerTable.isSet(tbe.addr)); + useTimerTable.unset(tbe.addr); + tbe.hasUseTimeout := false; + // A snoop may have been stalled without setting the TBE flag + wakeup_port(snpRdyPort, address); +} + +action(Unset_Timeout_Cache, desc="") { + assert(useTimerTable.isSet(address)); + useTimerTable.unset(address); + wakeup_port(snpRdyPort, address); +} + +action(Callback_WriteUnique, desc="") { + assert(is_valid(tbe)); + assert((tbe.is_local_pf || tbe.is_remote_pf) == false); + assert((tbe.reqType == CHIRequestType:StoreLine) || + (tbe.reqType == CHIRequestType:Store)); + assert(tbe.dataValid == false); + sequencer.writeUniqueCallback(tbe.addr, tbe.dataBlk); + DPRINTF(RubySlicc, "WriteUnique data %s\n", tbe.dataBlk); + // set mask; note data is never considered valid + assert(tbe.dataBlkValid.isEmpty()); + tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize); +} + +action(Profile_Miss, desc="") { + assert(is_valid(tbe)); + bool is_demand := (tbe.is_local_pf || tbe.is_remote_pf) == false; + bool is_remote_can_notify := tbe.is_remote_pf && upstream_prefetch_trains_prefetcher; + if (is_demand) { + cache.profileDemandMiss(); + } else { + assert(use_prefetcher || tbe.is_remote_pf); + cache.profilePrefetchMiss(); + } + // notify prefetcher about this demand miss + if (use_prefetcher && tbe.isSeqReqValid && (is_demand || is_remote_can_notify)) { + bool is_read := false; + if (isReadReqType(tbe.reqType)) { + is_read := true; + } else { + assert(isWriteReqType(tbe.reqType)); + } + + // FIXME: this dataBlk is likely to have stale data. This should be fixed + // if our prefetcher uses cached data to make prefetch decisions. + notifyPfMiss(tbe.seqReq, is_read, tbe.dataBlk); + } +} + +action(Profile_Hit, desc="") { + assert(is_valid(tbe)); + assert(is_valid(cache_entry)); + assert(tbe.dataValid); + bool is_demand := (tbe.is_local_pf || tbe.is_remote_pf) == false; + bool is_remote_can_notify := tbe.is_remote_pf && upstream_prefetch_trains_prefetcher; + if (is_demand) { + cache.profileDemandHit(); + } else { + assert(use_prefetcher || tbe.is_remote_pf); + cache.profilePrefetchHit(); + } + // notify prefetcher about this demand hit + if (use_prefetcher && tbe.isSeqReqValid && (is_demand || is_remote_can_notify)) { + bool is_read := false; + if (isReadReqType(tbe.reqType)) { + is_read := true; + } else { + assert(isWriteReqType(tbe.reqType)); + } + notifyPfHit(tbe.seqReq, is_read, tbe.dataBlk); + + cache_entry.HWPrefetched := false; + } +} + +action(Profile_Fill, desc="") { + assert(is_valid(tbe)); + assert(is_valid(cache_entry)); + if (use_prefetcher && tbe.isSeqReqValid) { + + cache_entry.HWPrefetched := tbe.is_local_pf || + (tbe.is_remote_pf && + (upstream_prefetch_trains_prefetcher == false)); + + // Prefetchers that use this info require notifications from both + // demand and pf fills (unlike notifyPfHit/notifyPfMiss) + notifyPfFill(tbe.seqReq, tbe.dataBlk, tbe.is_local_pf); + } +} + +action(Profile_Eviction, desc="") { + if (sc_lock_enabled && sequencer.llscCheckMonitor(address)) { + DPRINTF(LLSC, "Invalidating monitored address %#x\n", address); + scLockIncLatency(); + } + if (send_evictions) { + DPRINTF(RubySlicc, "Sending invalidation for %#x to the sequencer\n", address); + sequencer.evictionCallback(address); + } + if (use_prefetcher && is_valid(cache_entry)) { + notifyPfEvict(address, cache_entry.HWPrefetched); + } +} + +action(Profile_OutgoingStart, desc="") { + outgoingTransactionStart(address, curTransitionEvent()); +} + +action(Profile_OutgoingEnd_DataResp, desc="") { + assert(is_valid(tbe)); + // completes once all data is received + if (tbe.expected_req_resp.hasReceivedData()) { + outgoingTransactionEnd(address, tbe.rcvdRetryAck); + } +} + +action(Profile_OutgoingEnd_DatalessResp, desc="") { + assert(is_valid(tbe)); + outgoingTransactionEnd(address, tbe.rcvdRetryAck); +} + +action(TagArrayRead, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks( + tagLatency((tbe.reqType == CHIRequestType:Load) || + (tbe.reqType == CHIRequestType:Store) || + (tbe.reqType == CHIRequestType:StoreLine))); +} + +action(TagArrayWrite, desc="") { + assert(is_valid(tbe)); + // when hasUseTimeout is set the final state is UD_T, but adding a delay + // between now and triggering Fin_UD_T may allow the timer to expire and then + // we end up in the wrong state + if (dealloc_wait_for_tag && (tbe.hasUseTimeout == false)) { + tbe.delayNextAction := curTick() + cyclesToTicks(tagLatency(false)); + } +} + +action(DataArrayRead, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(dataLatency()); +} + +action(DataArrayWrite, desc="") { + assert(is_valid(tbe)); + assert(is_valid(cache_entry)); + assert(tbe.doCacheFill); + if(wait_for_cache_wr) { + tbe.delayNextAction := curTick() + cyclesToTicks(dataLatency()); + } +} + +action(ReadHitPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(read_hit_latency); +} + +action(ReadMissPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(read_miss_latency); +} + +action(WriteFEPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(write_fe_latency); +} + +action(WriteBEPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(write_be_latency); +} + +action(FillPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(fill_latency); +} + +action(SnpSharedPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency); +} + +action(SnpInvPipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency + snp_inv_latency); +} + +action(SnpOncePipe, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency); +} diff --git a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm new file mode 100644 index 0000000000..db008b0abf --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm @@ -0,0 +1,1255 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +//////////////////////////////////////////////////////////////////////////// +// CHI-cache function definitions +//////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////// +// External functions + +Tick clockEdge(); +Tick curTick(); +Tick cyclesToTicks(Cycles c); +Cycles ticksToCycles(Tick t); +void set_cache_entry(AbstractCacheEntry b); +void unset_cache_entry(); +void set_tbe(TBE b); +void unset_tbe(); +MachineID mapAddressToDownstreamMachine(Addr addr); + +void incomingTransactionStart(Addr, Event, State, bool); +void incomingTransactionEnd(Addr, State); +void outgoingTransactionStart(Addr, Event); +void outgoingTransactionEnd(Addr, bool); +Event curTransitionEvent(); +State curTransitionNextState(); + +// Placeholders for future prefetch support +void notifyPfHit(RequestPtr req, bool is_read, DataBlock blk) { } +void notifyPfMiss(RequestPtr req, bool is_read, DataBlock blk) { } +void notifyPfFill(RequestPtr req, DataBlock blk, bool from_pf) { } +void notifyPfEvict(Addr blkAddr, bool hwPrefetched) { } +void notifyPfComplete(Addr addr) { } + +//////////////////////////////////////////////////////////////////////////// +// Interface functions required by SLICC + +CacheEntry getCacheEntry(Addr addr), return_by_pointer="yes" { + return static_cast(CacheEntry, "pointer", cache.lookup(addr)); +} + +DirEntry getDirEntry(Addr addr), return_by_pointer = "yes" { + if (directory.isTagPresent(addr)) { + return directory.lookup(addr); + } else { + return OOD; + } +} + +State getState(TBE tbe, CacheEntry cache_entry, Addr addr) { + if (is_valid(tbe)) { + return tbe.state; + } else if (is_valid(cache_entry)) { + return cache_entry.state; + } else { + DirEntry dir_entry := getDirEntry(addr); + if (is_valid(dir_entry)) { + return dir_entry.state; + } else { + return State:I; + } + } +} + +void setState(TBE tbe, CacheEntry cache_entry, Addr addr, State state) { + if (is_valid(tbe)) { + tbe.state := state; + } + if (is_valid(cache_entry)) { + cache_entry.state := state; + } + DirEntry dir_entry := getDirEntry(addr); + if (is_valid(dir_entry)) { + dir_entry.state := state; + } +} + +TBE getCurrentActiveTBE(Addr addr), return_by_pointer="yes" { + // snoops take precedence over wbs and reqs + // it's invalid to have a replacement and a req active at the same time + // for the same line + TBE snp_tbe := snpTBEs[addr]; + if (is_valid(snp_tbe)) { + return snp_tbe; + } + TBE req_tbe := TBEs[addr]; + TBE repl_tbe := replTBEs[addr]; + if (is_valid(req_tbe)) { + assert(is_invalid(repl_tbe)); + return req_tbe; + } + if (is_valid(repl_tbe)) { + assert(is_invalid(req_tbe)); + return repl_tbe; + } + return OOD; +} + +AccessPermission getAccessPermission(Addr addr) { + TBE tbe := getCurrentActiveTBE(addr); + if(is_valid(tbe)) { + assert(Cache_State_to_permission(tbe.state) == AccessPermission:Busy); + if (tbe.expected_req_resp.hasExpected() || + tbe.expected_snp_resp.hasExpected()) { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, AccessPermission:Busy); + return AccessPermission:Busy; + } + else if (tbe.dataValid && (tbe.dataMaybeDirtyUpstream == false)) { + if (tbe.dataUnique) { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, AccessPermission:Read_Write); + return AccessPermission:Read_Write; + } else { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, AccessPermission:Read_Only); + return AccessPermission:Read_Only; + } + } else { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, AccessPermission:Busy); + return AccessPermission:Busy; + } + } + CacheEntry cache_entry := getCacheEntry(addr); + if(is_valid(cache_entry)) { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, cache_entry.state, Cache_State_to_permission(cache_entry.state)); + return Cache_State_to_permission(cache_entry.state); + } + DPRINTF(RubySlicc, "%x %s,%s\n", addr, State:I, AccessPermission:NotPresent); + return AccessPermission:NotPresent; +} + +void setAccessPermission(CacheEntry cache_entry, Addr addr, State state) { + if (is_valid(cache_entry)) { + cache_entry.changePermission(Cache_State_to_permission(state)); + } +} + +void functionalRead(Addr addr, Packet *pkt, WriteMask &mask) { + // read if bitmask has bytes not in mask or if data is dirty + + TBE tbe := getCurrentActiveTBE(addr); + CacheEntry cache_entry := getCacheEntry(addr); + DPRINTF(RubySlicc, "functionalRead %x\n", addr); + WriteMask read_mask; + bool dirty := false; + bool from_tbe := false; + + if (is_valid(tbe)) { + from_tbe := true; + dirty := tbe.dataDirty; + if (tbe.dataValid) { + read_mask.fillMask(); + } else { + read_mask := tbe.dataBlkValid; + // could have received dirty data but tbe.dataDirty not set yet because + // some data is pending, so check for dirty received message types + dirty := dirty || + tbe.expected_req_resp.receivedDataType(CHIDataType:CompData_UD_PD) || + tbe.expected_req_resp.receivedDataType(CHIDataType:CompData_SD_PD) || + tbe.expected_req_resp.receivedDataType(CHIDataType:CBWrData_UD_PD) || + tbe.expected_req_resp.receivedDataType(CHIDataType:CBWrData_SD_PD) || + tbe.expected_req_resp.receivedDataType(CHIDataType:NCBWrData) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_I_PD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_SC_PD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_SD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_UD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_SC_Fwded_SD_PD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_SC_PD_Fwded_SC) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_I_Fwded_SD_PD) || + tbe.expected_snp_resp.receivedDataType(CHIDataType:SnpRespData_I_PD_Fwded_SC); + } + } else if (is_valid(cache_entry) && + ((Cache_State_to_permission(cache_entry.state) == AccessPermission:Read_Write) || + (Cache_State_to_permission(cache_entry.state) == AccessPermission:Read_Only))) { + from_tbe := false; + read_mask.fillMask(); + dirty := (cache_entry.state == State:UD) || (cache_entry.state == State:UD_RSC) || + (cache_entry.state == State:SD) || (cache_entry.state == State:SD_RSC) || + (cache_entry.state == State:UD_RU) || (cache_entry.state == State:UD_RSD) || + (cache_entry.state == State:SD_RSD) || (cache_entry.state == State:UD_T); + } + + WriteMask test_mask := mask; + test_mask.orMask(read_mask); + if ((test_mask.cmpMask(mask) == false) || dirty) { + if (from_tbe) { + if(testAndReadMask(addr, tbe.dataBlk, read_mask, pkt)) { + DPRINTF(RubySlicc, "functionalRead tbe %x %s dirty=%d %s %s\n", addr, tbe.dataBlk, tbe.dataDirty, read_mask, mask); + mask.orMask(read_mask); + } + } else { + if (testAndReadMask(addr, cache_entry.DataBlk, read_mask, pkt)) { + DPRINTF(RubySlicc, "functionalRead cache %x %s dirty=%d %s %s\n", addr, cache_entry.DataBlk, dirty, read_mask, mask); + mask.orMask(read_mask); + } + } + } +} + +int functionalWrite(Addr addr, Packet *pkt) { + int num_functional_writes := 0; + TBE tbe := getCurrentActiveTBE(addr); + if(is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.dataBlk, pkt); + DPRINTF(RubySlicc, "functionalWrite tbe %x %s\n", addr, tbe.dataBlk); + } + CacheEntry cache_entry := getCacheEntry(addr); + if (is_valid(cache_entry)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, cache_entry.DataBlk, pkt); + DPRINTF(RubySlicc, "functionalWrite cache %x %s\n", addr, cache_entry.DataBlk); + } + return num_functional_writes; +} + +Cycles mandatoryQueueLatency(RubyRequestType type) { + return intToCycles(1); +} + +Cycles tagLatency(bool from_sequencer) { + if (from_sequencer) { + //mandatoryQueueLatency accounts for 1 cy + return cache.getTagLatency() - intToCycles(1); + } else { + return cache.getTagLatency(); + } +} + +Cycles dataLatency() { + return cache.getDataLatency(); +} + +bool inCache(Addr addr) { + CacheEntry entry := getCacheEntry(makeLineAddress(addr)); + // NOTE: we consider data for the addr to be in cache if it exists in local, + // upstream, or both caches. + if ((is_valid(entry) == false) || (entry.state == State:I)) { + return false; + } else { + return true; + } +} + +bool hasBeenPrefetched(Addr addr) { + CacheEntry entry := getCacheEntry(makeLineAddress(addr)); + if (is_valid(entry)) { + return entry.HWPrefetched; + } else { + return false; + } +} + +bool inMissQueue(Addr addr) { + Addr line_addr := makeLineAddress(addr); + TBE tbe := getCurrentActiveTBE(line_addr); + return is_valid(tbe); +} + +void notifyCoalesced(Addr addr, RubyRequestType type, RequestPtr req, + DataBlock data_blk, bool was_miss) { + DPRINTF(RubySlicc, "notifyCoalesced(addr=%#x, type=%s, was_miss=%d)\n", + addr, type, was_miss); + if (was_miss) { + cache.profileDemandMiss(); + } else { + cache.profileDemandHit(); + } + if (use_prefetcher) { + bool is_read := (type == RubyRequestType:LD) || + (type == RubyRequestType:Load_Linked) || + (type == RubyRequestType:IFETCH); + if (was_miss) { + notifyPfMiss(req, is_read, data_blk); + } else { + notifyPfHit(req, is_read, data_blk); + } + } +} + + +//////////////////////////////////////////////////////////////////////////// +// Helper functions + + +void clearExpectedReqResp(TBE tbe) { + assert(blockSize >= data_channel_size); + assert((blockSize % data_channel_size) == 0); + tbe.expected_req_resp.clear(blockSize / data_channel_size); +} + +void clearExpectedSnpResp(TBE tbe) { + assert(blockSize >= data_channel_size); + assert((blockSize % data_channel_size) == 0); + tbe.expected_snp_resp.clear(blockSize / data_channel_size); +} + +void initializeTBE(TBE tbe, Addr addr, int storSlot) { + assert(is_valid(tbe)); + + tbe.wakeup_pending_req := false; + tbe.wakeup_pending_snp := false; + tbe.wakeup_pending_tgr := false; + + tbe.addr := addr; + + tbe.storSlot := storSlot; + + clearExpectedReqResp(tbe); + clearExpectedSnpResp(tbe); + tbe.defer_expected_comp := false; + + tbe.requestorToBeOwner := false; + tbe.requestorToBeExclusiveOwner := false; + tbe.updateDirOnCompAck := true; + + tbe.dataToBeInvalid := false; + tbe.dataToBeSharedClean := false; + + tbe.doCacheFill := false; + + tbe.pendReqType := CHIRequestType:null; + + tbe.pendAction := Event:null; + tbe.finalState := State:null; + tbe.delayNextAction := intToTick(0); + + tbe.is_stale := false; +} + +TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" { + // We must have reserved resources for this allocation + storTBEs.decrementReserved(); + assert(storTBEs.areNSlotsAvailable(1)); + + TBEs.allocate(addr); + TBE tbe := TBEs[addr]; + + initializeTBE(tbe, addr, storTBEs.addEntryToNewSlot()); + + assert(tbe.is_snp_tbe == false); + assert(tbe.is_repl_tbe == false); + tbe.is_req_tbe := true; + + tbe.accAddr := in_msg.accAddr; + tbe.accSize := in_msg.accSize; + tbe.requestor := in_msg.requestor; + tbe.reqType := in_msg.type; + + tbe.isSeqReqValid := in_msg.isSeqReqValid; + tbe.seqReq := in_msg.seqReq; + tbe.is_local_pf := in_msg.is_local_pf; + tbe.is_remote_pf := in_msg.is_remote_pf; + + tbe.use_DMT := false; + tbe.use_DCT := false; + + tbe.hasUseTimeout := false; + + return tbe; +} + + +TBE allocateSnoopTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" { + // We must have reserved resources for this allocation + storSnpTBEs.decrementReserved(); + assert(storSnpTBEs.areNSlotsAvailable(1)); + + snpTBEs.allocate(addr); + TBE tbe := snpTBEs[addr]; + initializeTBE(tbe, addr, storSnpTBEs.addEntryToNewSlot()); + + assert(tbe.is_req_tbe == false); + assert(tbe.is_repl_tbe == false); + tbe.is_snp_tbe := true; + + tbe.accAddr := addr; + tbe.accSize := blockSize; + tbe.requestor := in_msg.requestor; + tbe.fwdRequestor := in_msg.fwdRequestor; + tbe.reqType := in_msg.type; + + tbe.snpNeedsData := in_msg.retToSrc; + + tbe.use_DMT := false; + tbe.use_DCT := false; + + return tbe; +} + + +TBE _allocateReplacementTBE(Addr addr, int storSlot), return_by_pointer="yes" { + TBE tbe := replTBEs[addr]; + initializeTBE(tbe, addr, storSlot); + + assert(tbe.is_req_tbe == false); + assert(tbe.is_snp_tbe == false); + tbe.is_repl_tbe := true; + + tbe.accAddr := addr; + tbe.accSize := blockSize; + tbe.requestor := machineID; + tbe.reqType := CHIRequestType:null; + + tbe.use_DMT := false; + tbe.use_DCT := false; + + return tbe; +} + +TBE allocateReplacementTBE(Addr addr), return_by_pointer="yes" { + // We must have resources for this allocation + assert(storReplTBEs.areNSlotsAvailable(1)); + + replTBEs.allocate(addr); + return _allocateReplacementTBE(addr, storReplTBEs.addEntryToNewSlot()); +} + +TBE allocateReplacementTBEOnSlot(Addr addr, int slot), return_by_pointer="yes" { + // only when reusing slot from main TBE table + assert(unify_repl_TBEs); + storTBEs.addEntryToSlot(slot); + + replTBEs.allocate(addr); + return _allocateReplacementTBE(addr, slot); +} + +TBE getHazardTBE(TBE tbe), return_by_pointer="yes" { + assert(is_valid(tbe)); + assert(tbe.is_snp_tbe); + TBE hazard_tbe := TBEs[tbe.addr]; + if (tbe.is_req_hazard) { + assert(tbe.is_repl_hazard == false); + } else { + assert(tbe.is_repl_hazard); + hazard_tbe := replTBEs[tbe.addr]; + } + assert(is_valid(hazard_tbe)); + return hazard_tbe; +} + +void scheduleSendData(TBE tbe, int when) { + if (tbe.snd_pendBytes.count() > 0) { + assert(tbe.snd_pendEv == false); + tbe.snd_pendEv := true; + // enqueue send event + tbe.pendAction := Event:TX_Data; + enqueue(triggerOutPort, TriggerMsg, intToCycles(when)) { + out_msg.addr := tbe.addr; + out_msg.from_hazard := tbe.is_req_hazard || tbe.is_repl_hazard; + } + } +} + +void setupPendingSend(TBE tbe) { + assert(blockSize >= data_channel_size); + assert((blockSize % data_channel_size) == 0); + // data must be complete in the TBE + assert(tbe.dataBlkValid.isFull()); + tbe.snd_pendBytes.fillMask(); + scheduleSendData(tbe, 0); +} + +void setupPendingPartialSend(TBE tbe) { + assert(blockSize >= data_channel_size); + assert((blockSize % data_channel_size) == 0); + // data must be complete in the TBE + assert(tbe.dataBlkValid.count() > 0); + tbe.snd_pendBytes := tbe.dataBlkValid; + scheduleSendData(tbe, 0); +} + +// common code for downstream requests +void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) { + out_msg.addr := tbe.addr; + out_msg.accAddr := tbe.addr; + out_msg.accSize := blockSize; + out_msg.requestor := machineID; + out_msg.fwdRequestor := tbe.requestor; + out_msg.type := type; + out_msg.allowRetry := false; + tbe.pendReqAllowRetry := false; + tbe.rcvdRetryAck := false; + tbe.rcvdRetryCredit := false; + tbe.pendReqType := type; + out_msg.isSeqReqValid := tbe.isSeqReqValid; + out_msg.seqReq := tbe.seqReq; + out_msg.is_local_pf := false; + out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf; +} + +void allowRequestRetry(TBE tbe, CHIRequestMsg & out_msg) { + out_msg.allowRetry := true; + tbe.pendReqAllowRetry := true; + tbe.pendReqAccAddr := out_msg.accAddr; + tbe.pendReqAccSize := out_msg.accSize; + tbe.pendReqDest := out_msg.Destination; + tbe.pendReqD2OrigReq := out_msg.dataToFwdRequestor; + tbe.pendReqRetToSrc := out_msg.retToSrc; +} + +void prepareRequestRetry(TBE tbe, CHIRequestMsg & out_msg) { + assert(tbe.pendReqAllowRetry); + tbe.pendReqAllowRetry := false; + out_msg.allowRetry := false; + + out_msg.addr := tbe.addr; + out_msg.requestor := machineID; + out_msg.fwdRequestor := tbe.requestor; + out_msg.accAddr := tbe.pendReqAccAddr; + out_msg.accSize := tbe.pendReqAccSize; + out_msg.type := tbe.pendReqType; + out_msg.Destination := tbe.pendReqDest; + out_msg.dataToFwdRequestor := tbe.pendReqD2OrigReq; + out_msg.retToSrc := tbe.pendReqRetToSrc; + out_msg.isSeqReqValid := tbe.isSeqReqValid; + out_msg.seqReq := tbe.seqReq; + out_msg.is_local_pf := false; + out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf; +} + +void enqueueDoRetry(TBE tbe) { + if (tbe.rcvdRetryAck && tbe.rcvdRetryCredit) { + enqueue(retryTriggerOutPort, RetryTriggerMsg, 0) { + out_msg.addr := tbe.addr; + out_msg.event := Event:DoRetry; + } + destsWaitingRetry.removeNetDest(tbe.pendReqDest); + } +} + +void processRetryQueue() { + // send credit if requestor waiting for it and we have resources + bool has_avail := storTBEs.areNSlotsAvailable(1); + assert(unify_repl_TBEs || has_avail); + // the slot might still be used by a replacement if unify_repl_TBEs is set + if (retryQueue.empty() == false && has_avail) { + storTBEs.incrementReserved(); + RetryQueueEntry e := retryQueue.next(); + retryQueue.pop(); + enqueue(retryTriggerOutPort, RetryTriggerMsg, 0) { + out_msg.addr := e.addr; + out_msg.retryDest := e.retryDest; + out_msg.event := Event:SendPCrdGrant; + } + } +} + +void printResources() { + if (unify_repl_TBEs) { + assert(storReplTBEs.size() == 0); + assert(storReplTBEs.reserved() == 0); + DPRINTF(RubySlicc, "Resources(used/rsvd/max): TBEs=%d/%d/%d snpTBEs=%d/%d/%d replTBEs=%d/%d/%d\n", + storTBEs.size(), storTBEs.reserved(), storTBEs.capacity(), + storSnpTBEs.size(), storSnpTBEs.reserved(), storSnpTBEs.capacity(), + storTBEs.size(), storTBEs.reserved(), storTBEs.capacity()); + } else { + DPRINTF(RubySlicc, "Resources(used/rsvd/max): TBEs=%d/%d/%d snpTBEs=%d/%d/%d replTBEs=%d/%d/%d\n", + storTBEs.size(), storTBEs.reserved(), storTBEs.capacity(), + storSnpTBEs.size(), storSnpTBEs.reserved(), storSnpTBEs.capacity(), + storReplTBEs.size(), storReplTBEs.reserved(), storReplTBEs.capacity()); + } + DPRINTF(RubySlicc, "Resources(in/out size): req=%d/%d rsp=%d/%d dat=%d/%d snp=%d/%d trigger=%d\n", + reqIn.getSize(curTick()), reqOut.getSize(curTick()), + rspIn.getSize(curTick()), rspOut.getSize(curTick()), + datIn.getSize(curTick()), datOut.getSize(curTick()), + snpIn.getSize(curTick()), snpOut.getSize(curTick()), + triggerQueue.getSize(curTick())); +} + +bool needCacheEntry(CHIRequestType req_type, + CacheEntry cache_entry, DirEntry dir_entry, + bool is_prefetch) { + // never allocates: + // - if entry already valid + // - if using DMT; the request is a Read*; and dir entry is invalid + // oterwise follow config params + if (is_valid(cache_entry) || + (enable_DMT && is_invalid(dir_entry) && + ((req_type == CHIRequestType:ReadShared) || + (req_type == CHIRequestType:ReadUnique) || + (req_type == CHIRequestType:ReadOnce)))) { + return false; + } else { + return is_prefetch || + (alloc_on_readshared && ((req_type == CHIRequestType:ReadShared) || + (req_type == CHIRequestType:ReadNotSharedDirty))) || + (alloc_on_readunique && (req_type == CHIRequestType:ReadUnique)) || + (alloc_on_readonce && (req_type == CHIRequestType:ReadOnce)) || + (alloc_on_writeback && ((req_type == CHIRequestType:WriteBackFull) || + (req_type == CHIRequestType:WriteCleanFull) || + (req_type == CHIRequestType:WriteEvictFull) || + (is_HN && (req_type == CHIRequestType:WriteUniqueFull)))) || + (alloc_on_seq_acc && ((req_type == CHIRequestType:Load) || + (req_type == CHIRequestType:Store))) || + (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)); + } +} + +bool needDeallocCacheEntry(CHIRequestType req_type) { + return (dealloc_on_shared && ((req_type == CHIRequestType:ReadShared) || + (req_type == CHIRequestType:ReadNotSharedDirty))) || + (dealloc_on_unique && ((req_type == CHIRequestType:ReadUnique) || + (req_type == CHIRequestType:CleanUnique))); +} + +bool upstreamHasUnique(State state) { + return (state == State:RU) || (state == State:UD_RU) || (state == State:UC_RU); +} + +bool upstreamHasShared(State state) { + return (state == State:RSC) || (state == State:RSD) || + (state == State:RUSD) || (state == State:RUSC) || + (state == State:UD_RSD) || (state == State:SD_RSD) || + (state == State:UD_RSC) || (state == State:SD_RSC) || + (state == State:UC_RSC) || (state == State:SC_RSC); +} + +void printTBEState(TBE tbe) { + DPRINTF(RubySlicc, "STATE: addr: %#x data present=%d valid=%d unique=%d dirty=%d mu_dirty=%d dir ownerV=%d ownerE=%d sharers=%d tobe_I=%d tobe_SC=%d doFill=%d pendAction=%s\n", + tbe.addr, tbe.dataBlkValid.isFull(), tbe.dataValid, tbe.dataUnique, + tbe.dataDirty, tbe.dataMaybeDirtyUpstream, tbe.dir_ownerExists, + tbe.dir_ownerIsExcl,tbe.dir_sharers.count(), + tbe.dataToBeInvalid, tbe.dataToBeSharedClean, + tbe.doCacheFill, tbe.pendAction); + DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid); +} + +void copyCacheAndDir(CacheEntry cache_entry, DirEntry dir_entry, + TBE tbe, State initialState) { + assert(is_valid(tbe)); + + // have dir entry + if (is_valid(dir_entry)) { + assert((initialState == State:UD_RSC) || (initialState == State:SD_RSC) || + (initialState == State:UC_RSC) || (initialState == State:SC_RSC) || + (initialState == State:UD_RU) || (initialState == State:UC_RU) || + (initialState == State:RU) || (initialState == State:RSC) || + (initialState == State:RSD) || (initialState == State:RUSD) || + (initialState == State:RUSC) || + (initialState == State:UD_RSD) || (initialState == State:SD_RSD)); + tbe.dir_sharers := dir_entry.sharers; + tbe.dir_owner := dir_entry.owner; + tbe.dir_ownerExists := dir_entry.ownerExists; + tbe.dir_ownerIsExcl := dir_entry.ownerIsExcl; + assert(tbe.dir_sharers.count() > 0); + } else { + tbe.dir_sharers.clear(); + tbe.dir_ownerExists := false; + } + // Sanity checks + assert((tbe.dir_ownerExists && tbe.dir_ownerIsExcl) == + ((initialState == State:UD_RU) || (initialState == State:UC_RU) || + (initialState == State:RU))); + assert((tbe.dir_ownerExists && (tbe.dir_ownerIsExcl == false)) == + ((initialState == State:RSD) || (initialState == State:RUSD) || + (initialState == State:UD_RSD) || (initialState == State:SD_RSD))); + + // have usable data + if (is_valid(cache_entry) && + ((initialState == State:UD) || (initialState == State:SD) || + (initialState == State:UC) || (initialState == State:SC) || + (initialState == State:UD_RSC) || (initialState == State:SD_RSC) || + (initialState == State:UC_RSC) || (initialState == State:SC_RSC) || + (initialState == State:UD_RSD) || (initialState == State:SD_RSD) || + (initialState == State:UD_T))) { + tbe.dataBlk := cache_entry.DataBlk; + tbe.dataBlkValid.fillMask(); + tbe.dataValid := true; + DPRINTF(RubySlicc, "Cached data %s\n", tbe.dataBlk); + } else { + assert(is_invalid(cache_entry) || + (is_valid(cache_entry) && (initialState == State:UD_RU) || + (initialState == State:UC_RU))); + tbe.dataBlkValid.clear(); + tbe.dataValid := false; + } + + // set MRU for accessed block + if (is_valid(cache_entry) && ((tbe.is_local_pf || tbe.is_remote_pf) == false)) { + cache.setMRU(cache_entry); + } + + // data is dirty here + tbe.dataDirty := (initialState == State:UD) || (initialState == State:UD_RSC) || + (initialState == State:SD) || (initialState == State:SD_RSC) || + (initialState == State:UD_RU) || (initialState == State:UD_RSD) || + (initialState == State:SD_RSD) || (initialState == State:UD_T); + + // maybe dirty upstream + tbe.dataMaybeDirtyUpstream := (initialState == State:UD_RU) || (initialState == State:UC_RU) || + (initialState == State:UD_RSD) || (initialState == State:SD_RSD) || + (initialState == State:RU) || (initialState == State:RSD) || + (initialState == State:RUSD); + assert(tbe.dir_ownerExists == tbe.dataMaybeDirtyUpstream); + + // data is unique here or upstream + tbe.dataUnique := (initialState == State:UD) || (initialState == State:UD_RSC) || + (initialState == State:UD_RU) || (initialState == State:UC) || + (initialState == State:UC_RSC) || (initialState == State:UC_RU) || + (initialState == State:RU) || (initialState == State:RUSD) || + (initialState == State:RUSC) || + (initialState == State:UD_RSD) || (initialState == State:UD_T); + + // it is locked until timeout ? + tbe.hasUseTimeout := initialState == State:UD_T; + + tbe.dataToBeSharedClean := false; + tbe.dataToBeInvalid := false; + + printTBEState(tbe); +} + +void copyCacheAndDirTBEs(TBE src, TBE dst) { + assert(is_valid(src)); + assert(is_valid(dst)); + dst.dataBlk := src.dataBlk; + dst.dataBlkValid := src.dataBlkValid; + dst.dataValid := src.dataValid; + dst.dataDirty := src.dataDirty; + dst.dataMaybeDirtyUpstream := src.dataMaybeDirtyUpstream; + dst.dataUnique := src.dataUnique; + dst.dir_sharers := src.dir_sharers; + dst.dir_owner := src.dir_owner; + dst.dir_ownerExists := src.dir_ownerExists; + dst.dir_ownerIsExcl := src.dir_ownerIsExcl; + printTBEState(dst); +} + +void deallocateReqTBE(TBE tbe) { + assert(is_valid(tbe)); + assert(tbe.is_req_tbe); + storTBEs.removeEntryFromSlot(tbe.storSlot); + TBEs.deallocate(tbe.addr); +} + +void deallocateSnpTBE(TBE tbe) { + assert(is_valid(tbe)); + assert(tbe.is_snp_tbe); + storSnpTBEs.removeEntryFromSlot(tbe.storSlot); + snpTBEs.deallocate(tbe.addr); +} + +void deallocateReplacementTBE(TBE tbe) { + assert(is_valid(tbe)); + assert(tbe.is_repl_tbe); + if (unify_repl_TBEs) { + storTBEs.removeEntryFromSlot(tbe.storSlot); + } else { + storReplTBEs.removeEntryFromSlot(tbe.storSlot); + } + replTBEs.deallocate(tbe.addr); +} + +void setDataToBeStates(TBE tbe) { + assert(is_valid(tbe)); + if (tbe.dataToBeInvalid) { + tbe.dataValid := false; + tbe.dataBlkValid.clear(); + } + if (tbe.dataToBeSharedClean) { + tbe.dataUnique := false; + tbe.dataDirty := false; + assert(tbe.dataMaybeDirtyUpstream == false); + } + tbe.dataToBeInvalid := false; + tbe.dataToBeSharedClean := false; +} + +void setExpectedForInvSnoop(TBE tbe, bool expectCleanWB) { + assert(tbe.expected_snp_resp.hasExpected() == false); + assert(tbe.dir_sharers.count() > 0); + clearExpectedSnpResp(tbe); + if (expectCleanWB) { + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I); + } + if (tbe.dataMaybeDirtyUpstream) { + assert(tbe.dir_ownerExists); + tbe.expected_snp_resp.addExpectedDataType(CHIDataType:SnpRespData_I_PD); + if ((expectCleanWB == false) || (tbe.dir_sharers.count() > 1)) { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_I); + } + } else { + tbe.expected_snp_resp.addExpectedRespType(CHIResponseType:SnpResp_I); + } + tbe.expected_snp_resp.setExpectedCount(tbe.dir_sharers.count()); +} + +State makeFinalStateHelper(State cs, State ds) { + if (ds == State:RSC) { + if (cs == State:UD) { + return State:UD_RSC; + } else if (cs == State:SD) { + return State:SD_RSC; + } else if (cs == State:UC) { + return State:UC_RSC; + } else if (cs == State:SC) { + return State:SC_RSC; + } else { + return State:RSC; + } + } else if (ds == State:RU) { + if (cs == State:UD) { + return State:UD_RU; + } else if (cs == State:UC) { + return State:UC_RU; + } else { + assert(cs != State:SC); + assert(cs != State:SD); + return State:RU; + } + } else if (ds == State:RSD) { + if (cs == State:UD) { + return State:UD_RSD; + } else if (cs == State:SD) { + return State:SD_RSD; + } else { + assert(cs == State:I); + return State:RSD; + } + } else if (ds == State:RUSD) { + if (cs == State:UD) { + return State:UD_RSD; + } else { + assert(cs == State:I); + return State:RUSD; + } + } else if (ds == State:RUSC) { + if (cs == State:UC) { + return State:UC_RSC; + } else if (cs == State:UD) { + return State:UD_RSC; + } else { + assert(cs == State:I); + return State:RUSC; + } + } else { + assert(ds == State:I); + return cs; + } +} + +State makeFinalState(TBE tbe, CacheEntry cache_entry, DirEntry dir_entry) { + setDataToBeStates(tbe); + printTBEState(tbe); + + State cache_state := State:I; + State dir_state := State:I; + + if (tbe.dir_ownerExists) { + assert(is_valid(dir_entry)); + assert(tbe.dataMaybeDirtyUpstream); + if (tbe.dir_ownerIsExcl) { + assert(tbe.dir_sharers.count() == 1); + dir_state := State:RU; + } else { + assert(tbe.dir_sharers.count() >= 1); + if (tbe.dataUnique) { + dir_state := State:RUSD; + } else { + dir_state := State:RSD; + } + } + } else if (tbe.dir_sharers.count() > 0) { + assert(is_valid(dir_entry)); + assert(tbe.dataMaybeDirtyUpstream == false); + if (tbe.dataUnique) { + dir_state := State:RUSC; + } else { + dir_state := State:RSC; + } + } + + if (tbe.dataValid && is_valid(cache_entry)) { + if (tbe.dataUnique && tbe.dataDirty) { + if (tbe.hasUseTimeout) { + cache_state := State:UD_T; + } else { + cache_state := State:UD; + } + } else if (tbe.dataUnique && (tbe.dataDirty == false)) { + cache_state := State:UC; + } else if ((tbe.dataUnique == false) && tbe.dataDirty) { + assert(allow_SD); + cache_state := State:SD; + } else { + cache_state := State:SC; + } + } + + return makeFinalStateHelper(cache_state, dir_state); +} + +// This is used only with the finalization transitions +State getNextState(Addr address) { + TBE tbe := getCurrentActiveTBE(address); + assert(is_valid(tbe)); + assert(tbe.pendAction == Event:Final); + tbe.finalState := makeFinalState(tbe, getCacheEntry(address), getDirEntry(address)); + assert(tbe.finalState != State:null); + return tbe.finalState; +} + + +int scLockLatency() { + return sc_lock_multiplier * sc_lock_base_latency_cy; +} + +void scLockIncLatency() +{ + sc_lock_multiplier := sc_lock_multiplier + sc_lock_multiplier_inc; + if (sc_lock_multiplier > sc_lock_multiplier_max) { + sc_lock_multiplier := sc_lock_multiplier_max; + } + DPRINTF(LLSC, "SC lock latency increased to %d cy\n", scLockLatency()); +} + +void scLockDecayLatency() +{ + sc_lock_multiplier := sc_lock_multiplier - sc_lock_multiplier_decay; + if (sc_lock_multiplier < 0) { + sc_lock_multiplier := 0; + } + DPRINTF(LLSC, "SC lock latency decayed to %d cy\n", scLockLatency()); +} + +void clearPendingAction(TBE tbe) { + // only clear pendAction if snd_pendEv not set + if (tbe.snd_pendEv) { + assert(tbe.pendAction == Event:TX_Data); + } else { + tbe.pendAction := Event:null; + } +} + +bool isReadReqType(CHIRequestType type) { + if (type == CHIRequestType:Load || + type == CHIRequestType:ReadShared || + type == CHIRequestType:ReadNotSharedDirty || + type == CHIRequestType:ReadOnce) { + return true; + } + return false; +} + +bool isWriteReqType(CHIRequestType type) { + if (type == CHIRequestType:Store || + type == CHIRequestType:StoreLine || + type == CHIRequestType:WriteUniquePtl || + type == CHIRequestType:WriteUniqueFull || + type == CHIRequestType:ReadUnique) { + return true; + } + return false; +} + +//////////////////////////////////////////////////////////////////////////// +// State->Event converters + +Event reqToEvent(CHIRequestType type, bool is_prefetch) { + if (type == CHIRequestType:Load) { + if (is_prefetch == false) { + return Event:Load; + } else { + return Event:Prefetch; + } + } else if (type == CHIRequestType:Store) { + return Event:Store; + } else if (type == CHIRequestType:StoreLine) { + return Event:Store; + } else if (type == CHIRequestType:ReadShared) { + return Event:ReadShared; + } else if (type == CHIRequestType:ReadNotSharedDirty) { + return Event:ReadNotSharedDirty; + } else if (type == CHIRequestType:ReadUnique) { + if (is_HN) { + return Event:ReadUnique_PoC; + } else { + return Event:ReadUnique; + } + } else if (type == CHIRequestType:CleanUnique) { + return Event:CleanUnique; + } else if (type == CHIRequestType:ReadOnce) { + return Event:ReadOnce; + } else if (type == CHIRequestType:Evict) { + return Event:Evict; + } else if (type == CHIRequestType:WriteBackFull) { + return Event:WriteBackFull; + } else if (type == CHIRequestType:WriteEvictFull) { + return Event:WriteEvictFull; + } else if (type == CHIRequestType:WriteCleanFull) { + return Event:WriteCleanFull; + } else if (type == CHIRequestType:WriteUniquePtl) { + if (is_HN) { + return Event:WriteUniquePtl_PoC; + } else { + return Event:WriteUnique; // all WriteUnique handled the same when ~PoC + } + } else if (type == CHIRequestType:WriteUniqueFull) { + if (is_HN && alloc_on_writeback) { + return Event:WriteUniqueFull_PoC_Alloc; + } else if (is_HN) { + return Event:WriteUniqueFull_PoC; + } else { + return Event:WriteUnique; // all WriteUnique handled the same when ~PoC + } + } else { + error("Invalid CHIRequestType"); + } +} + +Event respToEvent (CHIResponseType type, TBE tbe) { + bool on_hazard := is_valid(tbe) && (tbe.is_req_hazard || tbe.is_repl_hazard); + if (type == CHIResponseType:Comp_I) { + return Event:Comp_I; + } else if (type == CHIResponseType:Comp_UC) { + return Event:Comp_UC; + } else if (type == CHIResponseType:Comp_SC) { + return Event:Comp_SC; + } else if (type == CHIResponseType:CompDBIDResp) { + return Event:CompDBIDResp; + } else if (type == CHIResponseType:DBIDResp) { + return Event:DBIDResp; + } else if (type == CHIResponseType:Comp) { + return Event:Comp; + } else if (type == CHIResponseType:CompAck) { + return Event:CompAck; + } else if (type == CHIResponseType:ReadReceipt) { + return Event:ReadReceipt; + } else if (type == CHIResponseType:RespSepData) { + return Event:RespSepData; + } else if (type == CHIResponseType:SnpResp_I) { + return Event:SnpResp_I; + } else if (type == CHIResponseType:SnpResp_I_Fwded_UC) { + return Event:SnpResp_I_Fwded_UC; + } else if (type == CHIResponseType:SnpResp_I_Fwded_UD_PD) { + return Event:SnpResp_I_Fwded_UD_PD; + } else if (type == CHIResponseType:SnpResp_SC) { + return Event:SnpResp_SC; + } else if (type == CHIResponseType:SnpResp_SC_Fwded_SC) { + return Event:SnpResp_SC_Fwded_SC; + } else if (type == CHIResponseType:SnpResp_SC_Fwded_SD_PD) { + return Event:SnpResp_SC_Fwded_SD_PD; + } else if (type == CHIResponseType:SnpResp_SD_Fwded_I) { + return Event:SnpResp_SD_Fwded_I; + } else if (type == CHIResponseType:SnpResp_SC_Fwded_I) { + return Event:SnpResp_SC_Fwded_I; + } else if (type == CHIResponseType:SnpResp_UD_Fwded_I) { + return Event:SnpResp_UD_Fwded_I; + } else if (type == CHIResponseType:SnpResp_UC_Fwded_I) { + return Event:SnpResp_UC_Fwded_I; + } else if (type == CHIResponseType:RetryAck) { + if (is_HN) { + if (on_hazard) { + return Event:RetryAck_PoC_Hazard; + } else { + return Event:RetryAck_PoC; + } + } else { + if (on_hazard) { + return Event:RetryAck_Hazard; + } else { + return Event:RetryAck; + } + } + } else if (type == CHIResponseType:PCrdGrant) { + if (is_HN) { + if (on_hazard) { + return Event:PCrdGrant_PoC_Hazard; + } else { + return Event:PCrdGrant_PoC; + } + } else { + if (on_hazard) { + return Event:PCrdGrant_Hazard; + } else { + return Event:PCrdGrant; + } + } + } else { + error("Invalid CHIResponseType"); + } +} + +Event dataToEvent (CHIDataType type) { + if (type == CHIDataType:CompData_I) { + return Event:CompData_I; + } else if (type == CHIDataType:CompData_UC) { + return Event:CompData_UC; + } else if (type == CHIDataType:CompData_SC) { + return Event:CompData_SC; + } else if (type == CHIDataType:CompData_UD_PD) { + return Event:CompData_UD_PD; + } else if (type == CHIDataType:CompData_SD_PD) { + return Event:CompData_SD_PD; + } else if (type == CHIDataType:DataSepResp_UC) { + return Event:DataSepResp_UC; + } else if (type == CHIDataType:CBWrData_I) { + return Event:CBWrData_I; + } else if (type == CHIDataType:CBWrData_UC) { + return Event:CBWrData_UC; + } else if (type == CHIDataType:CBWrData_SC) { + return Event:CBWrData_SC; + } else if (type == CHIDataType:CBWrData_UD_PD) { + return Event:CBWrData_UD_PD; + } else if (type == CHIDataType:CBWrData_SD_PD) { + return Event:CBWrData_SD_PD; + } else if (type == CHIDataType:NCBWrData) { + return Event:NCBWrData; + } else if (type == CHIDataType:SnpRespData_I_PD) { + return Event:SnpRespData_I_PD; + } else if (type == CHIDataType:SnpRespData_I) { + return Event:SnpRespData_I; + } else if (type == CHIDataType:SnpRespData_SC_PD) { + return Event:SnpRespData_SC_PD; + } else if (type == CHIDataType:SnpRespData_SC) { + return Event:SnpRespData_SC; + } else if (type == CHIDataType:SnpRespData_SD) { + return Event:SnpRespData_SD; + } else if (type == CHIDataType:SnpRespData_UC) { + return Event:SnpRespData_UC; + } else if (type == CHIDataType:SnpRespData_UD) { + return Event:SnpRespData_UD; + } else if (type == CHIDataType:SnpRespData_SC_Fwded_SC) { + return Event:SnpRespData_SC_Fwded_SC; + } else if (type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) { + return Event:SnpRespData_SC_Fwded_SD_PD; + } else if (type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) { + return Event:SnpRespData_SC_PD_Fwded_SC; + } else if (type == CHIDataType:SnpRespData_I_Fwded_SD_PD) { + return Event:SnpRespData_I_Fwded_SD_PD; + } else if (type == CHIDataType:SnpRespData_I_PD_Fwded_SC) { + return Event:SnpRespData_I_PD_Fwded_SC; + } else if (type == CHIDataType:SnpRespData_I_Fwded_SC) { + return Event:SnpRespData_I_Fwded_SC; + } else { + error("Invalid CHIDataType"); + } +} + +Event snpToEvent (CHIRequestType type) { + if (type == CHIRequestType:SnpCleanInvalid) { + return Event:SnpCleanInvalid; + } else if (type == CHIRequestType:SnpShared) { + return Event:SnpShared; + } else if (type == CHIRequestType:SnpUnique) { + return Event:SnpUnique; + } else if (type == CHIRequestType:SnpSharedFwd) { + return Event:SnpSharedFwd; + } else if (type == CHIRequestType:SnpNotSharedDirtyFwd) { + return Event:SnpNotSharedDirtyFwd; + } else if (type == CHIRequestType:SnpUniqueFwd) { + return Event:SnpUniqueFwd; + } else if (type == CHIRequestType:SnpOnce) { + return Event:SnpOnce; + } else if (type == CHIRequestType:SnpOnceFwd) { + return Event:SnpOnceFwd; + } else { + error("Invalid CHIRequestType"); + } +} + +////////////////////////////////////////// +// Cache bank utilization tracking + +enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { + TagArrayRead, desc="Read or write the dir/cache tag/data array"; + TagArrayWrite, desc="Read or write the dir/cache tag/data array"; + DataArrayRead, desc="Read or write the dir/cache tag/data array"; + DataArrayWrite, desc="Read or write the dir/cache tag/data array"; + + DestinationAvailable, desc="Check if there is a pending retry from the destination"; + + ReplTBEAvailable, desc="Check if a replacement TBE is available"; +} + +void recordRequestType(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + cache.recordRequestType(CacheRequestType:DataArrayRead, addr); + } else if (request_type == RequestType:DataArrayWrite) { + cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); + } else if (request_type == RequestType:TagArrayRead) { + cache.recordRequestType(CacheRequestType:TagArrayRead, addr); + } else if (request_type == RequestType:TagArrayWrite) { + cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); + } +} + +bool _checkResourceAvailable(RequestType request_type, Addr addr) { + if (request_type == RequestType:DataArrayRead) { + return cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:DataArrayWrite) { + return cache.checkResourceAvailable(CacheResourceType:DataArray, addr); + } else if (request_type == RequestType:TagArrayRead) { + return cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:TagArrayWrite) { + return cache.checkResourceAvailable(CacheResourceType:TagArray, addr); + } else if (request_type == RequestType:DestinationAvailable) { + if (throttle_req_on_retry) { + MachineID dest := mapAddressToDownstreamMachine(addr); + DPRINTF(RubySlicc, "Checking %s for addr %#x dest %s\n", request_type, addr, dest); + return destsWaitingRetry.isElement(dest) == false; + } else { + return true; + } + } else if (request_type == RequestType:ReplTBEAvailable) { + // if unify_repl_TBEs the replacement uses the same slot as the request + // that initiated it, so the resource is always available + return unify_repl_TBEs || storReplTBEs.areNSlotsAvailable(1); + } else { + error("Invalid RequestType type in checkResourceAvailable"); + return true; + } +} + +bool checkResourceAvailable(RequestType request_type, Addr addr) { + bool avail := _checkResourceAvailable(request_type, addr); + if (avail == false) { + DPRINTF(RubySlicc, "Resource %s not available for addr: %#x\n", request_type, addr); + } + return avail; +} diff --git a/src/mem/ruby/protocol/chi/CHI-cache-ports.sm b/src/mem/ruby/protocol/chi/CHI-cache-ports.sm new file mode 100644 index 0000000000..6a4fe5b16c --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-cache-ports.sm @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Outbound port definitions + +out_port(reqOutPort, CHIRequestMsg, reqOut); +out_port(snpOutPort, CHIRequestMsg, snpOut); +out_port(rspOutPort, CHIResponseMsg, rspOut); +out_port(datOutPort, CHIDataMsg, datOut); +out_port(triggerOutPort, TriggerMsg, triggerQueue); +out_port(retryTriggerOutPort, RetryTriggerMsg, retryTriggerQueue); +out_port(replTriggerOutPort, TriggerMsg, replTriggerQueue); +out_port(reqRdyOutPort, CHIRequestMsg, reqRdy); +out_port(snpRdyOutPort, CHIRequestMsg, snpRdy); + + +// Include helper functions here. Some of them require the outports to be +// already defined +// Notice 'processNextState' and 'wakeupPending*' functions are defined after +// the required input ports. Currently the SLICC compiler does not support +// separate declaration and definition of functions in the .sm files. +include "CHI-cache-funcs.sm"; + + +// Inbound port definitions and internal triggers queues +// Notice we never stall input ports connected to the network +// Incoming data and responses are always consumed. +// Incoming requests/snoop are moved to the respective internal rdy queue +// if a TBE can be allocated, or retried otherwise. + +// Trigger events from the UD_T state +in_port(useTimerTable_in, Addr, useTimerTable, rank=11) { + if (useTimerTable_in.isReady(clockEdge())) { + Addr readyAddress := useTimerTable.nextAddress(); + trigger(Event:UseTimeout, readyAddress, getCacheEntry(readyAddress), + getCurrentActiveTBE(readyAddress)); + } +} + + +// Response +in_port(rspInPort, CHIResponseMsg, rspIn, rank=10, + rsc_stall_handler=rspInPort_rsc_stall_handler) { + if (rspInPort.isReady(clockEdge())) { + printResources(); + peek(rspInPort, CHIResponseMsg) { + TBE tbe := getCurrentActiveTBE(in_msg.addr); + trigger(respToEvent(in_msg.type, tbe), in_msg.addr, + getCacheEntry(in_msg.addr), tbe); + } + } +} +bool rspInPort_rsc_stall_handler() { + error("rspInPort must never stall\n"); + return false; +} + + +// Data +in_port(datInPort, CHIDataMsg, datIn, rank=9, + rsc_stall_handler=datInPort_rsc_stall_handler) { + if (datInPort.isReady(clockEdge())) { + printResources(); + peek(datInPort, CHIDataMsg) { + int received := in_msg.bitMask.count(); + assert((received <= data_channel_size) && (received > 0)); + trigger(dataToEvent(in_msg.type), in_msg.addr, + getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr)); + } + } +} +bool datInPort_rsc_stall_handler() { + error("datInPort must never stall\n"); + return false; +} + + +// Snoops with an allocated TBE +in_port(snpRdyPort, CHIRequestMsg, snpRdy, rank=8, + rsc_stall_handler=snpRdyPort_rsc_stall_handler) { + if (snpRdyPort.isReady(clockEdge())) { + printResources(); + peek(snpRdyPort, CHIRequestMsg) { + assert(in_msg.allowRetry == false); + TBE tbe := getCurrentActiveTBE(in_msg.addr); + if (is_valid(tbe) && tbe.hasUseTimeout) { + // we may be in the BUSY_INTR waiting for a cache block, but if + // the timeout is set the snoop must still wait, so trigger the + // stall form here to prevent creating other states + trigger(Event:SnpStalled, in_msg.addr, + getCacheEntry(in_msg.addr), tbe); + } else { + trigger(snpToEvent(in_msg.type), in_msg.addr, + getCacheEntry(in_msg.addr), tbe); + } + } + } +} +bool snpRdyPort_rsc_stall_handler() { + error("snpRdyPort must never stall\n"); + return false; +} +void wakeupPendingSnps(TBE tbe) { + if (tbe.wakeup_pending_snp) { + Addr addr := tbe.addr; + wakeup_port(snpRdyPort, addr); + tbe.wakeup_pending_snp := false; + } +} + + +// Incoming snoops +// Not snoops are not retried, so the snoop channel is stalled if no +// Snp TBEs available +in_port(snpInPort, CHIRequestMsg, snpIn, rank=7) { + if (snpInPort.isReady(clockEdge())) { + assert(is_HN == false); + printResources(); + peek(snpInPort, CHIRequestMsg) { + assert(in_msg.allowRetry == false); + trigger(Event:AllocSnoop, in_msg.addr, + getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr)); + } + } +} + + +// Retry action triggers +// These are handled before other triggers since a retried request should +// be enqueued ahead of a new request +// TODO: consider moving DoRetry to the triggerQueue +in_port(retryTriggerInPort, RetryTriggerMsg, retryTriggerQueue, rank=6, + rsc_stall_handler=retryTriggerInPort_rsc_stall_handler) { + if (retryTriggerInPort.isReady(clockEdge())) { + printResources(); + peek(retryTriggerInPort, RetryTriggerMsg) { + Event ev := in_msg.event; + TBE tbe := getCurrentActiveTBE(in_msg.addr); + assert((ev == Event:SendRetryAck) || (ev == Event:SendPCrdGrant) || + (ev == Event:DoRetry)); + if (ev == Event:DoRetry) { + assert(is_valid(tbe)); + if (tbe.is_req_hazard || tbe.is_repl_hazard) { + ev := Event:DoRetry_Hazard; + } + } + trigger(ev, in_msg.addr, getCacheEntry(in_msg.addr), tbe); + } + } +} +bool retryTriggerInPort_rsc_stall_handler() { + DPRINTF(RubySlicc, "Retry trigger queue resource stall\n"); + retryTriggerInPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat)); + return true; +} + + +// Action triggers +in_port(triggerInPort, TriggerMsg, triggerQueue, rank=5, + rsc_stall_handler=triggerInPort_rsc_stall_handler) { + if (triggerInPort.isReady(clockEdge())) { + printResources(); + peek(triggerInPort, TriggerMsg) { + TBE tbe := getCurrentActiveTBE(in_msg.addr); + assert(is_valid(tbe)); + if (in_msg.from_hazard != (tbe.is_req_hazard || tbe.is_repl_hazard)) { + // possible when handling a snoop hazard and an action from the + // the initial transaction got woken up. Stall the action until the + // hazard ends + assert(in_msg.from_hazard == false); + assert(tbe.is_req_hazard || tbe.is_repl_hazard); + trigger(Event:ActionStalledOnHazard, in_msg.addr, + getCacheEntry(in_msg.addr), tbe); + } else { + trigger(tbe.pendAction, in_msg.addr, getCacheEntry(in_msg.addr), tbe); + } + } + } +} +bool triggerInPort_rsc_stall_handler() { + DPRINTF(RubySlicc, "Trigger queue resource stall\n"); + triggerInPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat)); + return true; +} +void wakeupPendingTgrs(TBE tbe) { + if (tbe.wakeup_pending_tgr) { + Addr addr := tbe.addr; + wakeup_port(triggerInPort, addr); + tbe.wakeup_pending_tgr := false; + } +} + + +// internally triggered evictions +// no stall handler for this one since it doesn't make sense try the next +// request when out of TBEs +in_port(replTriggerInPort, ReplacementMsg, replTriggerQueue, rank=4) { + if (replTriggerInPort.isReady(clockEdge())) { + printResources(); + peek(replTriggerInPort, ReplacementMsg) { + TBE tbe := getCurrentActiveTBE(in_msg.addr); + CacheEntry cache_entry := getCacheEntry(in_msg.addr); + Event trigger := Event:null; + if (is_valid(cache_entry) && + ((upstreamHasUnique(cache_entry.state) && dealloc_backinv_unique) || + (upstreamHasShared(cache_entry.state) && dealloc_backinv_shared))) { + trigger := Event:Global_Eviction; + } else { + if (is_HN) { + trigger := Event:LocalHN_Eviction; + } else { + trigger := Event:Local_Eviction; + } + } + trigger(trigger, in_msg.addr, cache_entry, tbe); + } + } +} + + +// Requests with an allocated TBE +in_port(reqRdyPort, CHIRequestMsg, reqRdy, rank=3, + rsc_stall_handler=reqRdyPort_rsc_stall_handler) { + if (reqRdyPort.isReady(clockEdge())) { + printResources(); + peek(reqRdyPort, CHIRequestMsg) { + CacheEntry cache_entry := getCacheEntry(in_msg.addr); + TBE tbe := getCurrentActiveTBE(in_msg.addr); + + DirEntry dir_entry := getDirEntry(in_msg.addr); + + // Special case for possibly stale writebacks or evicts + if (in_msg.type == CHIRequestType:WriteBackFull) { + if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) || + (dir_entry.owner != in_msg.requestor)) { + trigger(Event:WriteBackFull_Stale, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.type == CHIRequestType:WriteEvictFull) { + if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) || + (dir_entry.ownerIsExcl == false) || (dir_entry.owner != in_msg.requestor)) { + trigger(Event:WriteEvictFull_Stale, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.type == CHIRequestType:WriteCleanFull) { + if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) || + (dir_entry.ownerIsExcl == false) || (dir_entry.owner != in_msg.requestor)) { + trigger(Event:WriteCleanFull_Stale, in_msg.addr, cache_entry, tbe); + } + } else if (in_msg.type == CHIRequestType:Evict) { + if (is_invalid(dir_entry) || + (dir_entry.sharers.isElement(in_msg.requestor) == false)) { + trigger(Event:Evict_Stale, in_msg.addr, cache_entry, tbe); + } + } + + // Normal request path + trigger(reqToEvent(in_msg.type, in_msg.is_local_pf), in_msg.addr, cache_entry, tbe); + } + } +} +bool reqRdyPort_rsc_stall_handler() { + DPRINTF(RubySlicc, "ReqRdy queue resource stall\n"); + reqRdyPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat)); + return true; +} +void wakeupPendingReqs(TBE tbe) { + if (tbe.wakeup_pending_req) { + Addr addr := tbe.addr; + wakeup_port(reqRdyPort, addr); + tbe.wakeup_pending_req := false; + } +} + + +// Incoming new requests +in_port(reqInPort, CHIRequestMsg, reqIn, rank=2, + rsc_stall_handler=reqInPort_rsc_stall_handler) { + if (reqInPort.isReady(clockEdge())) { + printResources(); + peek(reqInPort, CHIRequestMsg) { + if (in_msg.allowRetry) { + trigger(Event:AllocRequest, in_msg.addr, + getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr)); + } else { + trigger(Event:AllocRequestWithCredit, in_msg.addr, + getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr)); + } + } + } +} +bool reqInPort_rsc_stall_handler() { + error("reqInPort must never stall\n"); + return false; +} + + +// Incoming new sequencer requests +in_port(seqInPort, RubyRequest, mandatoryQueue, rank=1) { + if (seqInPort.isReady(clockEdge())) { + printResources(); + peek(seqInPort, RubyRequest) { + trigger(Event:AllocSeqRequest, in_msg.LineAddress, + getCacheEntry(in_msg.LineAddress), + getCurrentActiveTBE(in_msg.LineAddress)); + } + } +} + + +// Incoming new prefetch requests +in_port(pfInPort, RubyRequest, prefetchQueue, rank=0) { + if (pfInPort.isReady(clockEdge())) { + printResources(); + peek(pfInPort, RubyRequest) { + trigger(Event:AllocPfRequest, in_msg.LineAddress, + getCacheEntry(in_msg.LineAddress), + getCurrentActiveTBE(in_msg.LineAddress)); + } + } +} + +void processNextState(Addr address, TBE tbe, CacheEntry cache_entry) { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "GoToNextState expected_req_resp=%d expected_snp_resp=%d snd_pendEv=%d snd_pendBytes=%d\n", + tbe.expected_req_resp.expected(), + tbe.expected_snp_resp.expected(), + tbe.snd_pendEv, tbe.snd_pendBytes.count()); + + // if no pending trigger and not expecting to receive anything, enqueue + // next + bool has_nb_trigger := (tbe.actions.empty() == false) && + tbe.actions.frontNB() && + (tbe.snd_pendEv == false); + int expected_msgs := tbe.expected_req_resp.expected() + + tbe.expected_snp_resp.expected() + + tbe.snd_pendBytes.count(); + if ((tbe.pendAction == Event:null) && ((expected_msgs == 0) || has_nb_trigger)) { + Cycles trigger_latency := intToCycles(0); + if (tbe.delayNextAction > curTick()) { + trigger_latency := ticksToCycles(tbe.delayNextAction) - + ticksToCycles(curTick()); + tbe.delayNextAction := intToTick(0); + } + + tbe.pendAction := Event:null; + if (tbe.actions.empty()) { + // time to go to the final state + tbe.pendAction := Event:Final; + } else { + tbe.pendAction := tbe.actions.front(); + tbe.actions.pop(); + } + assert(tbe.pendAction != Event:null); + enqueue(triggerOutPort, TriggerMsg, trigger_latency) { + out_msg.addr := tbe.addr; + out_msg.from_hazard := tbe.is_req_hazard || tbe.is_repl_hazard; + } + } + + printTBEState(tbe); + + // we might be going to BUSY_INTERRUPTABLE so wakeup pending snoops + // if any + wakeupPendingSnps(tbe); +} diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm new file mode 100644 index 0000000000..d69d28e0c5 --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm @@ -0,0 +1,1218 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//////////////////////////////////////////////////////////////////////////// +// CHI-cache transition definition +//////////////////////////////////////////////////////////////////////////// + +// Allocate resources and move to the ready queue +transition({I,SC,UC,SD,UD,RU,RSC,RSD,RUSD,SC_RSC,UC_RSC,SD_RSC,UD_RSC,UC_RU,UD_RU,UD_RSD,SD_RSD,RUSC + BUSY_INTR,BUSY_BLKD}, AllocRequest) { + AllocateTBE_Request; +} + +transition({I,SC,UC,SD,UD,RU,RSC,RSD,RUSD,SC_RSC,UC_RSC,SD_RSC,UD_RSC,UC_RU,UD_RU,UD_RSD,SD_RSD,RUSC + BUSY_INTR,BUSY_BLKD}, AllocRequestWithCredit) { + AllocateTBE_Request_WithCredit; +} + +transition({I,SC,UC,SD,UD,RU,RSC,RSD,RUSD,SC_RSC,UC_RSC,SD_RSC,UD_RSC,UC_RU,UD_RU,UD_RSD,SD_RSD,RUSC + BUSY_INTR,BUSY_BLKD}, SendRetryAck) { + Send_RetryAck; + Pop_RetryTriggerQueue; +} + +transition({I,SC,UC,SD,UD,RU,RSC,RSD,RUSD,SC_RSC,UC_RSC,SD_RSC,UD_RSC,UC_RU,UD_RU,UD_RSD,SD_RSD,RUSC + BUSY_INTR,BUSY_BLKD}, SendPCrdGrant) { + Send_PCrdGrant; + Pop_RetryTriggerQueue; +} + +transition({I,SC,UC,SD,UD,UD_T,RU,RSC,RSD,RUSD,SC_RSC,UC_RSC,SD_RSC,UD_RSC,UC_RU,UD_RU,UD_RSD,SD_RSD,RUSC + BUSY_INTR,BUSY_BLKD}, AllocSnoop) { + AllocateTBE_Snoop; +} + +transition({UD,UD_T,SD,UC,SC,I,BUSY_INTR,BUSY_BLKD}, AllocSeqRequest) { + AllocateTBE_SeqRequest; +} + +transition({I,SC,UC,SD,UD,UD_T,RU,RSC,RSD,RUSD,SC_RSC,SD_RSC,SD_RSD,UC_RSC,UC_RU,UD_RU,UD_RSD,UD_RSC,RUSC + BUSY_INTR,BUSY_BLKD}, AllocPfRequest) { + AllocateTBE_PfRequest; +} + +transition({BUSY_INTR,BUSY_BLKD}, TagArrayRead) {TagArrayRead} { + Pop_TriggerQueue; + TagArrayRead; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, TagArrayWrite) {TagArrayWrite} { + Pop_TriggerQueue; + TagArrayWrite; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, DataArrayRead) {DataArrayRead} { + Pop_TriggerQueue; + DataArrayRead; + ProcessNextState_ClearPending; +} + +// goes to BUSY_INTR as we may need to accept snoops while waiting +// on potential replacement +transition({BUSY_INTR,BUSY_BLKD}, CheckCacheFill, BUSY_INTR) { + CheckCacheFill; + // CheckCacheFill either does Pop_TriggerQueue+ProcessNextState_ClearPending + // or a stall depending on block availability +} + +transition({BUSY_INTR,BUSY_BLKD}, DataArrayWrite) {DataArrayWrite} { + Pop_TriggerQueue; + DataArrayWrite; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, DataArrayWriteOnFill) {DataArrayWrite} { + Pop_TriggerQueue; + Profile_Fill; + DataArrayWrite; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, ReadHitPipe) { + Pop_TriggerQueue; + ReadHitPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, ReadMissPipe) { + Pop_TriggerQueue; + ReadMissPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, WriteFEPipe) { + Pop_TriggerQueue; + WriteFEPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, WriteBEPipe) { + Pop_TriggerQueue; + WriteBEPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, FillPipe) { + Pop_TriggerQueue; + FillPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, SnpSharedPipe) { + Pop_TriggerQueue; + SnpSharedPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, SnpInvPipe) { + Pop_TriggerQueue; + SnpInvPipe; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR,BUSY_BLKD}, SnpOncePipe) { + Pop_TriggerQueue; + SnpOncePipe; + ProcessNextState_ClearPending; +} + +// ReadShared / ReadNotSharedDirty + +transition(I, {ReadShared,ReadNotSharedDirty}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadShared_Miss; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC,RUSC}, {ReadShared,ReadNotSharedDirty}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadShared_HitUpstream_NoOwner; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,SD,UC,SC}, {ReadShared,ReadNotSharedDirty}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadShared_Hit; + Allocate_DirEntry; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,UD_RSD,SD_RSD}, {ReadShared,ReadNotSharedDirty}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadShared_Hit; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RU,UC_RU,RU,RSD,RUSD}, {ReadShared,ReadNotSharedDirty}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadShared_HitUpstream; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// ReadOnce + +transition(I, ReadOnce, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadOnce_Miss; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,SD,UC,SC,UD_RSC,SD_RSC,UC_RSC,SC_RSC,UD_RSD,SD_RSD}, ReadOnce, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadOnce_Hit; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RU,UC_RU,RU,RSD,RUSD,RSC,RUSC}, ReadOnce, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadOnce_HitUpstream; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + +// ReadUnique + +transition(I, {ReadUnique,ReadUnique_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_Miss; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,UC}, {ReadUnique,ReadUnique_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_Hit; + Allocate_DirEntry; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RSC,UC_RSC,UD_RSD}, {ReadUnique,ReadUnique_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_Hit_InvUpstream; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RU,UC_RU,RU,RUSD,RUSC}, {ReadUnique,ReadUnique_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_HitUpstream; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SC,SD}, ReadUnique_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_AutoUpgrade; + Initiate_ReadUnique_Hit; + Allocate_DirEntry; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SC_RSC, SD_RSC, SD_RSD}, ReadUnique_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_AutoUpgrade; + Initiate_ReadUnique_Hit_InvUpstream; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC,RSD}, ReadUnique_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_AutoUpgrade; + Initiate_ReadUnique_HitUpstream; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + +transition({SC,SD}, ReadUnique, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_Upgrade; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SC_RSC, SD_RSC, RSC, SD_RSD, RSD}, ReadUnique, BUSY_BLKD) { + Initiate_Request; + Initiate_ReadUnique_Upgrade; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// CleanUnique + +transition({I, SC, UC, SD, UD, RU, RSC, RSD, RUSD, RUSC, + SC_RSC, SD_RSD, SD_RSC, UC_RSC, UC_RU, UD_RU, UD_RSD, UD_RSC}, CleanUnique, BUSY_BLKD) { + Initiate_Request; + Initiate_CleanUnique; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// WriteUniquePtl + +transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, + {WriteUnique, WriteUniquePtl_PoC, WriteUniqueFull_PoC, WriteUniqueFull_PoC_Alloc}, + BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_LocalWrite; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {WriteUniquePtl_PoC, WriteUniqueFull_PoC, WriteUniqueFull_PoC_Alloc}, + BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUniqueFull_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {WriteUnique}, BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_LocalWrite_AfterUpgrade; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSD,RUSD,RUSC,RU}, {WriteUniquePtl_PoC, WriteUniqueFull_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_Writeback; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC,I}, {WriteUniquePtl_PoC, WriteUniqueFull_PoC}, BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_PartialWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) { + Initiate_Request; + Initiate_WriteUnique_Forward; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + +// Load / Store from sequencer & Prefetch from prefetcher + +transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) { + Initiate_Request; + Initiate_LoadHit; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// Prefetch hits if either this cache or one of its upstream caches has a +// valid block. +// In some states, using the normal hit path for a prefetch will deallocate +// the local cache entry at the end since our data is stale. If the cache is +// inclusive for unique data we need to keep the block, so just bypass the +// normal path. +transition({UD,UD_T,SD,UC,SC,RU,RSC,RSD,RUSD,SC_RSC,SD_RSC,SD_RSD,UC_RSC,UC_RU,UD_RU,UD_RSD,UD_RSC}, Prefetch) { + Callback_ExpressPrefetchHit; + Pop_ReqRdyQueue; +} + +transition(BUSY_BLKD, LoadHit) { + Pop_TriggerQueue; + Callback_LoadHit; + ProcessNextState_ClearPending; +} + +transition({UD,UD_T,UC}, Store, BUSY_BLKD) { + Initiate_Request; + Initiate_StoreHit; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, StoreHit) { + Pop_TriggerQueue; + Callback_StoreHit; + ProcessNextState_ClearPending; +} + +transition(I, {Load,Prefetch}, BUSY_BLKD) { + Initiate_Request; + Initiate_LoadMiss; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, Store, BUSY_BLKD) { + Initiate_Request; + Initiate_StoreMiss; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD,SC}, Store, BUSY_BLKD) { + Initiate_Request; + Initiate_StoreUpgrade; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// write timeout + +transition(UD_T, UseTimeout, UD) { + Unset_Timeout_Cache; +} + +transition({BUSY_BLKD,BUSY_INTR}, UseTimeout) { + Unset_Timeout_TBE; +} + +// Evict from Upstream + +transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,RSC,RSD,RUSD,RUSC,UD_RSD,SD_RSD}, Evict, BUSY_BLKD) { + Initiate_Request; + Initiate_Evict; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD, UD_RSC, SD_RSC, UC_RSC, SC_RSC, UD_RU, UC_RU, UD_RSD, SD_RSD, RU, RSC, RSD, RUSD, RUSC, SD, UC, SC, I}, + Evict_Stale) { + Initiate_Request_Stale; + Send_CompI_Stale; + Finalize_DeallocateRequest; + Pop_ReqRdyQueue; +} + +// WriteBack from upstream + +transition({UD_RU, UC_RU, RU, UD_RSD, SD_RSD, RSD, RUSD}, {WriteBackFull, WriteCleanFull}, BUSY_BLKD) { + Initiate_Request; + Initiate_CopyBack; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RU, UC_RU, RU}, WriteEvictFull, BUSY_BLKD) { + Initiate_Request; + Initiate_CopyBack; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD_RSC, UC_RSC, SC_RSC, UD, RU, RSD, RUSD, RUSC, UD_RSD, SD_RSD, RSC, UD_RU, UC_RU, SD, UC, SC, I}, + {WriteBackFull_Stale, WriteEvictFull_Stale, WriteCleanFull_Stale}, BUSY_BLKD) { + Initiate_Request_Stale; + Initiate_CopyBack_Stale; + Pop_ReqRdyQueue; + ProcessNextState; +} + +// Cache Replacement + +// When in UD_RU,UC_RU,UD_RSD,SD_RSD we also just drop the line since an upstream +// cache has an up-to-data line that it will either WriteBack or WriteEvict +transition({SC,UC,SC_RSC,UC_RSC, + UD_RU,UC_RU,UD_RSD,SD_RSD}, LocalHN_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_JustDrop; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition({UD,SD,UD_RSC,SD_RSC}, LocalHN_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_WB; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition(SC, Local_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_Evict; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition({UD,SD,UC}, Local_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_WB; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition({UD_RU,UC_RU,UD_RSD,SD_RSD,SC_RSC,UC_RSC}, Local_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_JustDrop; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition({UD_RSC,SD_RSC}, Local_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_WB; + Profile_Eviction; + Deallocate_CacheBlock; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition({UD_RSC,SD_RSC,UC_RSC,UD_RU,UC_RU,UD_RSD}, Global_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_WB_BackInvalidate; + Profile_Eviction; + Deallocate_CacheBlock; + Deallocate_DirEntry; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +transition(SC_RSC, Global_Eviction, BUSY_BLKD) {ReplTBEAvailable} { + Initiate_Replacement; + Initiate_Replacement_Evict_BackInvalidte; + Profile_Eviction; + Deallocate_CacheBlock; + Deallocate_DirEntry; + Pop_ReplTriggerQueue; + ProcessNextState; +} + +// This could happen if enqueued the eviction when the line was busy +// or couldn't handle it immediately due to no TBE available +transition({RU,RSC,RSD,RUSD,I}, {Local_Eviction, LocalHN_Eviction}) { + Pop_ReplTriggerQueue; +} +transition(I, Global_Eviction) { + Pop_ReplTriggerQueue; +} + +// Snoops + +// SnpCleanInvalid/SnpUnique/SnpUniqueFwd +// All invalidating snoops have a simular behavior + +transition({UD,SD,UC,SC,UD_RSC,SD_RSC,UC_RSC,UD_RU,UC_RU,RU,RUSD,RUSC,RSD,UD_RSD,SD_RSD,SC_RSC,RSC}, + {SnpUnique,SnpUniqueFwd,SnpCleanInvalid}, BUSY_BLKD) { + Initiate_Snoop; + Initiate_InvalidationSnoop; + Profile_Eviction; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +transition(BUSY_INTR, {SnpUnique,SnpUniqueFwd,SnpCleanInvalid}, BUSY_BLKD) { + Initiate_Snoop_Hazard; + Initiate_InvalidationSnoop; + Profile_Eviction; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +// SnpShared / SnpNotSharedDirty + +transition({UD,UD_RSC,SD,SD_RSC,UC,UC_RSC,UD_RU,UC_RU,RU,UD_RSD,SD_RSD,RSD,RUSD,RUSC}, + {SnpShared,SnpSharedFwd,SnpNotSharedDirtyFwd}, BUSY_BLKD) { + Initiate_Snoop; + Initiate_SnpShared; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +transition({SC, SC_RSC, RSC}, {SnpSharedFwd, SnpNotSharedDirtyFwd}, BUSY_BLKD) { + Initiate_Snoop; + Initiate_SnpShared; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +transition(BUSY_INTR, {SnpShared,SnpSharedFwd,SnpNotSharedDirtyFwd}, BUSY_BLKD) { + Initiate_Snoop_Hazard; + Initiate_SnpShared; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +// SnpOnce +transition({UD,UD_T,UD_RSC,UD_RU,UD_RSD,SD,SD_RSC,SD_RSD,UC,UC_RSC,UC_RU,SC,SC_RSC,RU,RSC,RSD,RUSD,RUSC}, + {SnpOnce,SnpOnceFwd}, BUSY_BLKD) { + Initiate_Snoop; + Initiate_SnpOnce; + Pop_SnoopRdyQueue; + ProcessNextState; +} + +transition(BUSY_INTR, {SnpOnce,SnpOnceFwd}, BUSY_BLKD) { + Initiate_Snoop_Hazard; + Initiate_SnpOnce; + Pop_SnoopRdyQueue; + ProcessNextState; +} + + +// Stalls + +transition({BUSY_BLKD,BUSY_INTR}, + {ReadShared, ReadNotSharedDirty, ReadUnique, ReadUnique_PoC, + ReadOnce, CleanUnique, + Load, Store, Prefetch, + WriteBackFull, WriteBackFull_Stale, + WriteEvictFull, WriteEvictFull_Stale, + WriteCleanFull, WriteCleanFull_Stale, + Evict, Evict_Stale, + WriteUnique,WriteUniquePtl_PoC, + WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc}) { + StallRequest; +} + +transition({BUSY_BLKD,BUSY_INTR}, + {Global_Eviction, Local_Eviction, LocalHN_Eviction}) { + StallLocalEviction; +} + +// Kill the timer and try again as a snoop may be pending as well +transition(UD_T, {Global_Eviction, Local_Eviction, LocalHN_Eviction}, UD) { + Unset_Timeout_Cache; + Pop_ReplTriggerQueue; +} + +transition(BUSY_BLKD, + {SnpCleanInvalid,SnpShared,SnpUnique,SnpSharedFwd,SnpUniqueFwd, + SnpNotSharedDirtyFwd, SnpOnce}) { + StallSnoop; +} + +transition({BUSY_BLKD,BUSY_INTR}, SnpStalled) { + StallSnoop; +} + +transition(UD_T, {SnpCleanInvalid,SnpShared,SnpUnique,SnpSharedFwd,SnpUniqueFwd, + SnpNotSharedDirtyFwd}) { + StallSnoop_NoTBE; +} + +transition({BUSY_BLKD,BUSY_INTR}, ActionStalledOnHazard) { + StallActionOnHazard; +} + +// Trigger-specifc transitions + +transition(BUSY_BLKD, SendWriteBackOrWriteEvict, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_WriteBackOrWriteEvict; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWriteClean, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_WriteCleanFull; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_WriteUnique; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWriteNoSnp, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_WriteNoSnp; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWriteNoSnpPartial, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_WriteNoSnp_Partial; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + + +transition(BUSY_BLKD, SendEvict, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_Evict; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +// May get here from BUSY_INTR +transition({BUSY_BLKD, BUSY_INTR}, SendCompData, BUSY_BLKD) { + Pop_TriggerQueue; + Send_CompData; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWBData) { + Pop_TriggerQueue; + Send_WBData; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWUData) { + Pop_TriggerQueue; + Send_WUData; + CheckWUComp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendWUDataCB) { + Pop_TriggerQueue; + Callback_WriteUnique; + Send_WUData; + CheckWUComp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendInvSnpResp) { + Pop_TriggerQueue; + Send_InvSnpResp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpData) { + Pop_TriggerQueue; + Send_SnpRespData; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpUniqueFwdCompData) { + Pop_TriggerQueue; + Send_CompData_SnpUniqueFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpSharedFwdCompData) { + Pop_TriggerQueue; + Send_CompData_SnpSharedFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpNotSharedDirtyFwdCompData) { + Pop_TriggerQueue; + Send_CompData_SnpNSDFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpOnceFwdCompData) { + Pop_TriggerQueue; + Send_CompData_SnpOnceFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpFwdedData) { + Pop_TriggerQueue; + Send_SnpRespDataFwded; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpFwdedResp) { + Pop_TriggerQueue; + Send_FwdSnpResp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompAck) { + Pop_TriggerQueue; + Send_CompAck; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpIResp) { + Pop_TriggerQueue; + Send_SnpRespI; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompIResp) { + Pop_TriggerQueue; + Send_CompI; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompUCResp) { + Pop_TriggerQueue; + Send_CompUC; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendRespSepData) { + Pop_TriggerQueue; + Send_RespSepData; + ProcessNextState_ClearPending; +} + +transition({BUSY_INTR, BUSY_BLKD}, WaitCompAck) { + Pop_TriggerQueue; + ExpectCompAck; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, RestoreFromHazard, BUSY_INTR) { + Pop_TriggerQueue; + RestoreFromHazard; +} + +transition(BUSY_BLKD, SendReadShared, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_ReadShared; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendReadOnce, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_ReadOnce; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendReadUnique, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_ReadUnique; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCleanUnique, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_CleanUnique; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendReadNoSnp, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_ReadNoSnp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendReadNoSnpDMT, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_ReadNoSnpDMT; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpShared) { + Pop_TriggerQueue; + Send_SnpShared; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpSharedFwdToOwner) { + Pop_TriggerQueue; + Send_SnpSharedFwd_ToOwner; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpSharedFwdToSharer) { + Pop_TriggerQueue; + Send_SnpSharedFwd_ToSharer; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpOnceFwd) { + Pop_TriggerQueue; + Send_SnpOnceFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpOnce) { + Pop_TriggerQueue; + Send_SnpOnce; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpUnique) { + Pop_TriggerQueue; + Send_SnpUnique; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpUniqueRetToSrc) { + Pop_TriggerQueue; + Send_SnpUnique_RetToSrc; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpUniqueFwd) { + Pop_TriggerQueue; + Send_SnpUniqueFwd; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpCleanInvalid) { + Pop_TriggerQueue; + Send_SnpCleanInvalid; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendSnpCleanInvalidNoReq) { + Pop_TriggerQueue; + Send_SnpCleanInvalid_NoReq; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompDBIDResp) { + Pop_TriggerQueue; + Send_CompDBIDResp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompDBIDResp_WU) { + Pop_TriggerQueue; + ExpectNCBWrData; + Send_CompDBIDResp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendDBIDResp_WU) { + Pop_TriggerQueue; + ExpectNCBWrData; + Send_DBIDResp; + ProcessNextState_ClearPending; +} + +transition({BUSY_BLKD,BUSY_INTR}, SendComp_WU) { + Pop_TriggerQueue; + Send_Comp_WU; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendCompDBIDRespStale) { + Pop_TriggerQueue; + Send_CompDBIDResp_Stale; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, MaintainCoherence) { + Pop_TriggerQueue; + Initiate_MaitainCoherence; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, FinishCleanUnique) { + Pop_TriggerQueue; + Finish_CleanUnique; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, CheckUpgrade_FromStore) { + Pop_TriggerQueue; + Callback_Miss; // note: Callback happens only if tbe.dataValid + CheckUpgrade_FromStore; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, CheckUpgrade_FromCU) { + Pop_TriggerQueue; + CheckUpgrade_FromCU; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, CheckUpgrade_FromRU) { + Pop_TriggerQueue; + CheckUpgrade_FromRU; + ProcessNextState_ClearPending; +} + + +// Generic send/receive transitions + +// waiting for data +transition(BUSY_BLKD, + {CBWrData_I,CBWrData_SC,CBWrData_SD_PD,CBWrData_UC,CBWrData_UD_PD}) { + Receive_ReqDataResp; + UpdateDirState_FromReqDataResp; + UpdateDataState_FromReqDataResp; + Pop_DataInQueue; + ProcessNextState; +} + +// could be waiting for both data and CompDBIDResp on a WriteUnique +transition({BUSY_BLKD,BUSY_INTR}, NCBWrData) { + Receive_ReqDataResp; + UpdateDataState_FromWUDataResp; + Pop_DataInQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, + {SnpRespData_I_PD,SnpRespData_I,SnpRespData_SC_PD, + SnpRespData_SC,SnpRespData_SD,SnpRespData_UD, + SnpRespData_SC_Fwded_SC,SnpRespData_SC_Fwded_SD_PD, + SnpRespData_SC_PD_Fwded_SC,SnpRespData_I_Fwded_SD_PD, + SnpRespData_I_PD_Fwded_SC,SnpRespData_I_Fwded_SC}) { + Receive_SnpDataResp; + UpdateDirState_FromSnpDataResp; + UpdateDataState_FromSnpDataResp; + Pop_DataInQueue; + ProcessNextState; +} + +transition({BUSY_BLKD,BUSY_INTR}, RespSepData, BUSY_BLKD) { + Receive_RespSepData; + Pop_RespInQueue; + ProcessNextState; +} + +transition({BUSY_BLKD,BUSY_INTR}, DataSepResp_UC, BUSY_BLKD) { + Receive_ReqDataResp; + UpdateDataState_FromReqDataResp; + Callback_Miss; + Profile_OutgoingEnd_DataResp; + Pop_DataInQueue; + ProcessNextState; +} + +transition({BUSY_BLKD,BUSY_INTR}, + {CompData_I,CompData_SC,CompData_SD_PD,CompData_UC,CompData_UD_PD}, + BUSY_BLKD) { + Receive_RespSepDataFromCompData; + Receive_ReqDataResp; + UpdateDataState_FromReqDataResp; + Callback_Miss; + Profile_OutgoingEnd_DataResp; + Pop_DataInQueue; + ProcessNextState; +} + +transition(BUSY_INTR, ReadReceipt, BUSY_BLKD) { + Receive_ReadReceipt; + Pop_RespInQueue; + ProcessNextState; +} + +// Retry handling + +transition(BUSY_INTR, {RetryAck, RetryAck_PoC}) { + Receive_RetryAck; + Pop_RespInQueue; + ProcessNextState; +} + +transition(BUSY_INTR, {PCrdGrant, PCrdGrant_PoC}) { + Receive_PCrdGrant; + Pop_RespInQueue; + ProcessNextState; +} + +// RetryAck/PCrdGrant on BUSY_BLKD is only expected in a PoC/HN when waiting +// for CompAck after sending down a request with DMT enabled. Handle the same +// as BUSY_INTR + +transition(BUSY_BLKD, RetryAck_PoC) { + Receive_RetryAck; + Pop_RespInQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, PCrdGrant_PoC) { + Receive_PCrdGrant; + Pop_RespInQueue; + ProcessNextState; +} + +// RetryAck/PCrdGrant received during a snoop hazard may arrive in both +// BUSY_BLKD and BUSY_INTR +transition({BUSY_INTR,BUSY_BLKD}, {RetryAck_Hazard, RetryAck_PoC_Hazard}) { + Receive_RetryAck_Hazard; + Pop_RespInQueue; + ProcessNextState; +} + +transition({BUSY_INTR,BUSY_BLKD}, {PCrdGrant_Hazard, PCrdGrant_PoC_Hazard}) { + Receive_PCrdGrant_Hazard; + Pop_RespInQueue; + ProcessNextState; +} + +// Resend the request after RetryAck+PCrdGrant received + +transition({BUSY_INTR,BUSY_BLKD}, DoRetry) { + Send_Retry; + Pop_RetryTriggerQueue; +} + +transition({BUSY_INTR,BUSY_BLKD}, DoRetry_Hazard) { + Send_Retry_Hazard; + Pop_RetryTriggerQueue; +} + +// waiting for completion ack +transition({BUSY_BLKD,BUSY_INTR}, CompAck) { + Receive_ReqResp; + UpdateDirState_FromReqResp; + Pop_RespInQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, + {SnpResp_I,SnpResp_SC, + SnpResp_I_Fwded_UC,SnpResp_I_Fwded_UD_PD, + SnpResp_SC_Fwded_SC,SnpResp_SC_Fwded_SD_PD, + SnpResp_UC_Fwded_I,SnpResp_UD_Fwded_I, + SnpResp_SC_Fwded_I,SnpResp_SD_Fwded_I}) { + Receive_SnpResp; + UpdateDirState_FromSnpResp; + Pop_RespInQueue; + ProcessNextState; +} + +// waiting for WB or evict ack +transition(BUSY_INTR, + {CompDBIDResp,Comp_I}, BUSY_BLKD) { + Receive_ReqResp; + Profile_OutgoingEnd_DatalessResp; + Pop_RespInQueue; + ProcessNextState; +} + +// currently this happens after a CleanUnique +transition(BUSY_INTR, Comp_UC, BUSY_BLKD) { + Receive_ReqResp; + UpdateDataState_FromCUResp; + Profile_OutgoingEnd_DatalessResp; + Pop_RespInQueue; + ProcessNextState; +} + +// alternative flow for WU with separate Comp +transition(BUSY_INTR, DBIDResp, BUSY_BLKD) { + Receive_ReqResp; + Receive_ReqResp_WUNeedComp; + Pop_RespInQueue; + ProcessNextState; +} +transition(BUSY_BLKD, Comp) { + Receive_ReqResp_WUComp; + Profile_OutgoingEnd_DatalessResp; + Pop_RespInQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, TX_Data) { + Pop_TriggerQueue; + Send_Data; + ProcessNextState_ClearPending; +} + +// Finalization transition + +transition({BUSY_BLKD,BUSY_INTR}, Final, *) { + Pop_TriggerQueue; + Finalize_UpdateCacheFromTBE; + Finalize_UpdateDirectoryFromTBE; + Finalize_DeallocateRequest; +} diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm new file mode 100644 index 0000000000..160f674078 --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -0,0 +1,775 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(MachineType:Cache, "Cache coherency protocol") : + // Sequencer to insert Load/Store requests. + // May be null if this is not a L1 cache + Sequencer * sequencer; + + // Cache for storing local lines. + // NOTE: it is assumed that a cache tag and directory lookups and updates + // happen in parallel. The cache tag latency is used for both cases. + CacheMemory * cache; + + // Additional pipeline latency modeling for the different request types + // When defined, these are applied after the initial tag array read and + // sending necessary snoops. + Cycles read_hit_latency := 0; + Cycles read_miss_latency := 0; + Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req + Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data + Cycles fill_latency := 0; // Fill latency + Cycles snp_latency := 0; // Applied before handling any snoop + Cycles snp_inv_latency := 0; // Additional latency for invalidating snoops + + // Waits for cache data array write to complete before executing next action + // Note a new write will always block if bank stalls are enabled in the cache + bool wait_for_cache_wr := "False"; + + // Request TBE allocation latency + Cycles allocation_latency := 0; + + // Enqueue latencies for outgoing messages + // NOTE: should remove this and only use parameters above? + Cycles request_latency := 1; + Cycles response_latency := 1; + Cycles snoop_latency := 1; + Cycles data_latency := 1; + + // When an SC fails, unique lines are locked to this controller for a period + // proportional to the number of consecutive failed SC requests. See + // the usage of sc_lock_multiplier and llscCheckMonitor for details + int sc_lock_base_latency_cy := 4; + int sc_lock_multiplier_inc := 4; + int sc_lock_multiplier_decay := 1; + int sc_lock_multiplier_max := 256; + bool sc_lock_enabled; + + // Recycle latency on resource stalls + Cycles stall_recycle_lat := 1; + + // Notify the sequencer when a line is evicted. This should be set is the + // sequencer is not null and handled LL/SC request types. + bool send_evictions; + + // Number of entries in the snoop and replacement TBE tables + // notice the "number_of_TBEs" parameter is defined by AbstractController + int number_of_snoop_TBEs; + int number_of_repl_TBEs; + + // replacements use the same TBE slot as the request that triggered it + // in this case the number_of_repl_TBEs parameter is ignored + bool unify_repl_TBEs; + + // wait for the final tag update to complete before deallocating TBE and + // going to final stable state + bool dealloc_wait_for_tag := "False"; + + // Width of the data channel. Data transfer are split in multiple messages + // at the protocol level when this is less than the cache line size. + int data_channel_size; + + // Set when this is used as the home node and point of coherency of the + // system. Must be false for every other cache level. + bool is_HN; + + // Enables direct memory transfers between SNs and RNs when the data is + // not cache in the HN. + bool enable_DMT; + + // Use ReadNoSnpSep instead of ReadNoSnp for DMT requests, which allows + // the TBE to be deallocated at HNFs before the requester receives the data + bool enable_DMT_early_dealloc := "False"; + + // Enables direct cache transfers, i.e., use forwarding snoops whenever + // possible. + bool enable_DCT; + + // Use separate Comp/DBIDResp responses for WriteUnique + bool comp_wu := "False"; + // additional latency for the WU Comp response + Cycles comp_wu_latency := 0; + + // Controls cache clusivity for different request types. + // set all alloc_on* to false to completelly disable caching + bool alloc_on_readshared; + bool alloc_on_readunique; + bool alloc_on_readonce; + bool alloc_on_writeback; + bool alloc_on_seq_acc; + bool alloc_on_seq_line_write; + // Controls if the clusivity is strict. + bool dealloc_on_unique; + bool dealloc_on_shared; + bool dealloc_backinv_unique; + bool dealloc_backinv_shared; + + // If the responder has the line in UC or UD state, propagate this state + // on a ReadShared. Notice data won't be deallocated if dealloc_on_unique is + // set + bool fwd_unique_on_readshared := "False"; + + // Allow receiving data in SD state. + bool allow_SD; + + // stall new requests to destinations with a pending retry + bool throttle_req_on_retry := "True"; + + // Use prefetcher + bool use_prefetcher, default="false"; + + // Message Queues + + // Interface to the network + // Note vnet_type is used by Garnet only. "response" type is assumed to + // have data, so use it for data channels and "none" for the rest. + // network="To" for outbound queue; network="From" for inbound + // virtual networks: 0=request, 1=snoop, 2=response, 3=data + + MessageBuffer * reqOut, network="To", virtual_network="0", vnet_type="none"; + MessageBuffer * snpOut, network="To", virtual_network="1", vnet_type="none"; + MessageBuffer * rspOut, network="To", virtual_network="2", vnet_type="none"; + MessageBuffer * datOut, network="To", virtual_network="3", vnet_type="response"; + + MessageBuffer * reqIn, network="From", virtual_network="0", vnet_type="none"; + MessageBuffer * snpIn, network="From", virtual_network="1", vnet_type="none"; + MessageBuffer * rspIn, network="From", virtual_network="2", vnet_type="none"; + MessageBuffer * datIn, network="From", virtual_network="3", vnet_type="response"; + + // Mandatory queue for receiving requests from the sequencer + MessageBuffer * mandatoryQueue; + + // Internal queue for trigger events + MessageBuffer * triggerQueue; + + // Internal queue for retry trigger events + MessageBuffer * retryTriggerQueue; + + // Internal queue for accepted requests + MessageBuffer * reqRdy; + + // Internal queue for accepted snoops + MessageBuffer * snpRdy; + + // Internal queue for eviction requests + MessageBuffer * replTriggerQueue; + + // Prefetch queue for receiving prefetch requests from prefetcher + MessageBuffer * prefetchQueue; + + // Requests that originated from a prefetch in a upstream cache are treated + // as demand access in this cache. Notice the demand access stats are still + // updated only on true demand requests. + bool upstream_prefetch_trains_prefetcher := "False"; + +{ + + //////////////////////////////////////////////////////////////////////////// + // States + //////////////////////////////////////////////////////////////////////////// + + state_declaration(State, default="Cache_State_null") { + // Stable states + + I, AccessPermission:Invalid, desk="Invalid / not present locally or upstream"; + + // States when block is present in local cache only + SC, AccessPermission:Read_Only, desc="Shared Clean"; + UC, AccessPermission:Read_Write, desc="Unique Clean"; + SD, AccessPermission:Read_Only, desc="Shared Dirty"; + UD, AccessPermission:Read_Write, desc="Unique Dirty"; + UD_T, AccessPermission:Read_Write, desc="UD with use timeout"; + + // Invalid in local cache but present in upstream caches + RU, AccessPermission:Invalid, desk="Upstream requester has line in UD/UC"; + RSC, AccessPermission:Invalid, desk="Upstream requester has line in SC"; + RSD, AccessPermission:Invalid, desk="Upstream requester has line in SD and maybe SC"; + RUSC, AccessPermission:Invalid, desk="RSC + this node stills has exclusive access"; + RUSD, AccessPermission:Invalid, desk="RSD + this node stills has exclusive access"; + + // Both in local and upstream caches. In some cases local maybe stale + SC_RSC, AccessPermission:Read_Only, desk="SC + RSC"; + SD_RSC, AccessPermission:Read_Only, desk="SD + RSC"; + SD_RSD, AccessPermission:Read_Only, desk="SD + RSD"; + UC_RSC, AccessPermission:Read_Write, desk="UC + RSC"; + UC_RU, AccessPermission:Invalid, desk="UC + RU"; + UD_RU, AccessPermission:Invalid, desk="UD + RU"; + UD_RSD, AccessPermission:Read_Write, desk="UD + RSD"; + UD_RSC, AccessPermission:Read_Write, desk="UD + RSC"; + + // Generic transient state + // There is only a transient "BUSY" state. The actions taken at this state + // and the final stable state are defined by information in the TBE. + // While on BUSY_INTR, we will reply to incoming snoops and the + // state of the cache line may change. While on BUSY_BLKD snoops + // are blocked + BUSY_INTR, AccessPermission:Busy, desc="Waiting for data and/or ack"; + BUSY_BLKD, AccessPermission:Busy, desc="Waiting for data and/or ack; blocks snoops"; + + // Null state for debugging + null, AccessPermission:Invalid, desc="Null state"; + } + + + //////////////////////////////////////////////////////////////////////////// + // Events + //////////////////////////////////////////////////////////////////////////// + + enumeration(Event) { + // Events triggered by incoming requests. Allocate TBE and move + // request or snoop to the ready queue + AllocRequest, desc="Allocates a TBE for a request. Triggers a retry if table is full"; + AllocRequestWithCredit, desc="Allocates a TBE for a request. Always succeeds."; + AllocSeqRequest, desc="Allocates a TBE for a sequencer request. Stalls requests if table is full"; + AllocPfRequest, desc="Allocates a TBE for a prefetch request. Stalls requests if table is full"; + AllocSnoop, desc="Allocates a TBE for a snoop. Stalls snoop if table is full"; + + // Events triggered by sequencer requests or snoops in the rdy queue + // See CHIRequestType in CHi-msg.sm for descriptions + Load; + Store; + Prefetch; + ReadShared; + ReadNotSharedDirty; + ReadUnique; + ReadUnique_PoC; + ReadOnce; + CleanUnique; + Evict; + WriteBackFull; + WriteEvictFull; + WriteCleanFull; + WriteUnique; + WriteUniquePtl_PoC; + WriteUniqueFull_PoC; + WriteUniqueFull_PoC_Alloc; + SnpCleanInvalid; + SnpShared; + SnpSharedFwd; + SnpNotSharedDirtyFwd; + SnpUnique; + SnpUniqueFwd; + SnpOnce; + SnpOnceFwd; + SnpStalled; // A snoop stall triggered from the inport + + // Events triggered by incoming response messages + // See CHIResponseType in CHi-msg.sm for descriptions + CompAck; + Comp_I; + Comp_UC; + Comp_SC; + CompDBIDResp; + DBIDResp; + Comp; + ReadReceipt; + RespSepData; + SnpResp_I; + SnpResp_I_Fwded_UC; + SnpResp_I_Fwded_UD_PD; + SnpResp_SC; + SnpResp_SC_Fwded_SC; + SnpResp_SC_Fwded_SD_PD; + SnpResp_UC_Fwded_I; + SnpResp_UD_Fwded_I; + SnpResp_SC_Fwded_I; + SnpResp_SD_Fwded_I; + RetryAck; + RetryAck_PoC; + PCrdGrant; + PCrdGrant_PoC; + RetryAck_Hazard; + RetryAck_PoC_Hazard; + PCrdGrant_Hazard; + PCrdGrant_PoC_Hazard; + + // Events triggered by incoming data response messages + // See CHIDataType in CHi-msg.sm for descriptions + CompData_I; + CompData_UC; + CompData_SC; + CompData_UD_PD; + CompData_SD_PD; + DataSepResp_UC; + CBWrData_I; + CBWrData_UC; + CBWrData_SC; + CBWrData_UD_PD; + CBWrData_SD_PD; + NCBWrData; + SnpRespData_I; + SnpRespData_I_PD; + SnpRespData_SC; + SnpRespData_SC_PD; + SnpRespData_SD; + SnpRespData_UC; + SnpRespData_UD; + SnpRespData_SC_Fwded_SC; + SnpRespData_SC_Fwded_SD_PD; + SnpRespData_SC_PD_Fwded_SC; + SnpRespData_I_Fwded_SD_PD; + SnpRespData_I_PD_Fwded_SC; + SnpRespData_I_Fwded_SC; + + // We use special events for requests that we detect to be stale. This is + // done for debugging only. We sent a stale response so the requester can + // confirm the request is indeed stale and this is not a protocol bug. + // A Write or Evict becomes stale when the requester receives a snoop that + // changes the state of the data while the request was pending. + // Actual CHI implementations don't have this check. + Evict_Stale; + WriteBackFull_Stale; + WriteEvictFull_Stale; + WriteCleanFull_Stale; + + // Cache fill handling + CheckCacheFill, desc="Check if need to write or update the cache and trigger any necessary allocation and evictions"; + + // Internal requests generated to evict or writeback a local copy + // to free-up cache space + Local_Eviction, desc="Evicts/WB the local copy of the line"; + LocalHN_Eviction, desc="Local_Eviction triggered when is HN"; + Global_Eviction, desc="Local_Eviction + back-invalidate line in all upstream requesters"; + + // Events triggered from tbe.actions + // In general, for each event we define a single transition from + // BUSY_BLKD and/or BUSY_INTR. + // See processNextState functions and Initiate_* actions. + // All triggered transitions execute in the same cycle until it has to wait + // for pending pending responses or data (set by expected_req_resp and + // expected_snp_resp). Triggers queued with pushNB are executed even if + // there are pending messages. + + // Cache/directory access events. Notice these only model the latency. + TagArrayRead, desc="Read the cache and directory tag array"; + TagArrayWrite, desc="Write the cache and directory tag array"; + DataArrayRead, desc="Read the cache data array"; + DataArrayWrite, desc="Write the cache data array"; + DataArrayWriteOnFill, desc="Write the cache data array (cache fill)"; + + // Events for modeling the pipeline latency + ReadHitPipe, desc="Latency of reads served from local cache"; + ReadMissPipe, desc="Latency of reads not served from local cache"; + WriteFEPipe, desc="Front-end latency of write requests"; + WriteBEPipe, desc="Back-end latency of write requests"; + FillPipe, desc="Cache fill latency"; + SnpSharedPipe, desc="Latency for SnpShared requests"; + SnpInvPipe, desc="Latency for SnpUnique and SnpCleanInv requests"; + SnpOncePipe, desc="Latency for SnpOnce requests"; + + // Send a read request downstream. + SendReadShared, desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false"; + SendReadOnce, desc="Send a ReadOnce"; + SendReadNoSnp, desc="Send a SendReadNoSnp"; + SendReadNoSnpDMT, desc="Send a SendReadNoSnp using DMT"; + SendReadUnique, desc="Send a ReadUnique"; + SendCompAck, desc="Send CompAck"; + // Read handling at the completer + SendCompData, desc="Send CompData"; + WaitCompAck, desc="Expect to receive CompAck"; + SendRespSepData, desc="Send RespSepData for a DMT request"; + + // Send a write request downstream. + SendWriteBackOrWriteEvict, desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)"; + SendWriteClean, desc="Send a WriteCleanFull"; + SendWriteNoSnp, desc="Send a WriteNoSnp for a full line"; + SendWriteNoSnpPartial, desc="Send a WriteNoSnpPtl"; + SendWriteUnique, desc="Send a WriteUniquePtl"; + SendWBData, desc="Send writeback data"; + SendWUData, desc="Send write unique data"; + SendWUDataCB, desc="Send write unique data from a sequencer callback"; + // Write handling at the completer + SendCompDBIDResp, desc="Ack WB with CompDBIDResp"; + SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp"; + SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data"; + SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data"; + SendComp_WU, desc="Ack WU completion"; + + // Dataless requests + SendEvict, desc="Send a Evict"; + SendCompIResp, desc="Ack Evict with Comp_I"; + SendCleanUnique,desc="Send a CleanUnique"; + SendCompUCResp, desc="Ack CleanUnique with Comp_UC"; + + // Checks if an upgrade using a CleanUnique was sucessfull + CheckUpgrade_FromStore, desc="Upgrade needed by a Store"; + CheckUpgrade_FromCU, desc="Upgrade needed by an upstream CleanUnique"; + CheckUpgrade_FromRU, desc="Upgrade needed by an upstream ReadUnique"; + + // Snoop requests + // SnpNotSharedDirty are sent instead of SnpShared for ReadNotSharedDirty + SendSnpShared, desc="Send a SnpShared/SnpNotSharedDirty to sharer in UC,UD, or SD state"; + SendSnpSharedFwdToOwner, desc="Send a SnpSharedFwd/SnpNotSharedDirtyFwd to sharer in UC,UD, or SD state"; + SendSnpSharedFwdToSharer, desc="Send a SnpSharedFwd/SnpNotSharedDirtyFwd to a sharer in SC state"; + SendSnpOnce, desc="Send a SnpOnce to a sharer"; + SendSnpOnceFwd, desc="Send a SnpOnceFwd to a sharer"; + SendSnpUnique, desc="Send a SnpUnique to all sharers"; + SendSnpUniqueRetToSrc, desc="Send a SnpUnique to all sharers. Sets RetToSrc for only one sharer."; + SendSnpUniqueFwd, desc="Send a SnpUniqueFwd to a single sharer"; + SendSnpCleanInvalid, desc="Send a SnpCleanInvalid to all sharers"; + SendSnpCleanInvalidNoReq, desc="Send a SnpCleanInvalid to all sharers except requestor"; + // Snoop responses + SendSnpData, desc="Send SnpRespData as snoop reply"; + SendSnpIResp, desc="Send SnpResp_I as snoop reply"; + SendInvSnpResp, desc="Check data state and queue either SendSnpIResp or SendSnpData"; + SendSnpUniqueFwdCompData, desc="Send CompData to SnpUniqueFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp"; + SendSnpSharedFwdCompData, desc="Send CompData to SnpUniqueFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp"; + SendSnpNotSharedDirtyFwdCompData, desc="Send CompData to SnpNotSharedDirtyFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp"; + SendSnpOnceFwdCompData, desc="Send CompData to SnpOnceFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp"; + SendSnpFwdedData, desc="Send SnpResp for a forwarding snoop"; + SendSnpFwdedResp, desc="Send SnpRespData for a forwarding snoop"; + + // Retry handling + SendRetryAck, desc="Send RetryAck"; + SendPCrdGrant, desc="Send PCrdGrant"; + DoRetry, desc="Resend the current pending request"; + DoRetry_Hazard, desc="DoRetry during a hazard"; + + // Misc triggers + LoadHit, desc="Complete a load hit"; + StoreHit, desc="Complete a store hit"; + UseTimeout, desc="Transition from UD_T -> UD"; + RestoreFromHazard, desc="Restore from a snoop hazard"; + TX_Data, desc="Transmit pending data messages"; + MaintainCoherence, desc="Queues a WriteBack or Evict before droping the only valid copy of the block"; + FinishCleanUnique, desc="Sends acks and perform any writeback after a CleanUnique"; + ActionStalledOnHazard, desc="Stall a trigger action because until finish handling snoop hazard"; + + // This is triggered once a transaction doesn't have + // any queued action and is not expecting responses/data. The transaction + // is finalized and the next stable state is stored in the cache/directory + // See the processNextState and makeFinalState functions + Final; + + null; + } + + //////////////////////////////////////////////////////////////////////////// + // Data structures + //////////////////////////////////////////////////////////////////////////// + + // Cache block size + int blockSize, default="RubySystem::getBlockSizeBytes()"; + + // CacheEntry + structure(CacheEntry, interface="AbstractCacheEntry") { + State state, desc="SLICC line state"; + DataBlock DataBlk, desc="data for the block"; + bool HWPrefetched, default="false", desc="Set if this cache entry was prefetched"; + } + + // Directory entry + structure(DirEntry, interface="AbstractCacheEntry", main="false") { + NetDest sharers, desc="All upstream controllers that have this line (includes ownwer)"; + MachineID owner, desc="Controller that has the line in UD,UC, or SD state"; + bool ownerExists, default="false", desc="true if owner exists"; + bool ownerIsExcl, default="false", desc="true if owner is UD or UC"; + State state, desc="SLICC line state"; + } + + // Helper class for tracking expected response and data messages + structure(ExpectedMap, external ="yes") { + void clear(int dataChunks); + void addExpectedRespType(CHIResponseType); + void addExpectedDataType(CHIDataType); + void setExpectedCount(int val); + void addExpectedCount(int val); + bool hasExpected(); + bool hasReceivedResp(); + bool hasReceivedData(); + int expected(); + int received(); + bool receiveResp(CHIResponseType); + bool receiveData(CHIDataType); + bool receivedDataType(CHIDataType); + bool receivedRespType(CHIResponseType); + } + + // Tracks a pending retry + structure(RetryQueueEntry) { + Addr addr, desc="Line address"; + MachineID retryDest, desc="Retry destination"; + } + + // Queue for event triggers. Used to specify a list of actions that need + // to be performed across multiple transitions. + // This class is also used to track pending retries + structure(TriggerQueue, external ="yes") { + Event front(); + Event back(); + bool frontNB(); + bool backNB(); + bool empty(); + void push(Event); + void pushNB(Event); + void pushFront(Event); + void pushFrontNB(Event); + void pop(); + // For the retry queue + void emplace(Addr,MachineID); + RetryQueueEntry next(); //SLICC won't allow to reuse front() + } + + // TBE fields + structure(TBE, desc="Transaction buffer entry definition") { + // in which table was this allocated + bool is_req_tbe, desc="Allocated in the request table"; + bool is_snp_tbe, desc="Allocated in the snoop table"; + bool is_repl_tbe, desc="Allocated in the replacements table"; + + int storSlot, desc="Slot in the storage tracker occupied by this entry"; + + // Transaction info mostly extracted from the request message + Addr addr, desc="Line address for this TBE"; + Addr accAddr, desc="Access address for Load/Store/WriteUniquePtl; otherwisse == addr"; + int accSize, desc="Access size for Load/Store/WriteUniquePtl; otherwisse == blockSize"; + CHIRequestType reqType, desc="Request type that initiated this transaction"; + MachineID requestor, desc="Requestor ID"; + MachineID fwdRequestor, desc="Requestor to receive data on fwding snoops"; + bool use_DMT, desc="Use DMT for this transaction"; + bool use_DCT, desc="Use DCT for this transaction"; + + // if either is set prefetchers are not notified on miss/hit/fill and + // demand hit/miss stats are not incremented + bool is_local_pf, desc="Request generated by a local prefetcher"; + bool is_remote_pf, desc="Request generated a prefetcher in another cache"; + + // NOTE: seqReq is a smart pointer pointing to original CPU request object + // that triggers transactions associated with this TBE. seqReq carries some + // information (e.g., PC of requesting instruction, virtual address of this + // request, etc.). Not all transactions have this field set if they are not + // triggered directly by a demand request from CPU. + RequestPtr seqReq, default="nullptr", desc="Pointer to original request from CPU/sequencer"; + bool isSeqReqValid, default="false", desc="Set if seqReq is valid (not nullptr)"; + + // Transaction state information + State state, desc="SLICC line state"; + + // Transient state information. These are set at the beggining of a + // transactions and updated as data and responses are received. After + // finalizing the transactions these are used to create the next SLICC + // stable state. + bool hasUseTimeout, desc="Line is locked under store/use timeout"; + DataBlock dataBlk, desc="Local copy of the line"; + WriteMask dataBlkValid, desc="Marks which bytes in the DataBlock are valid"; + bool dataValid, desc="Local copy is valid"; + bool dataDirty, desc="Local copy is dirtry"; + bool dataMaybeDirtyUpstream, desc="Line maybe dirty upstream"; + bool dataUnique, desc="Line is unique either locally or upsatream"; + bool dataToBeInvalid, desc="Local copy will be invalidated at the end of transaction"; + bool dataToBeSharedClean, desc="Local copy will become SC at the end of transaction"; + NetDest dir_sharers, desc="Upstream controllers that have the line (includes owner)"; + MachineID dir_owner, desc="Owner ID"; + bool dir_ownerExists, desc="Owner ID is valid"; + bool dir_ownerIsExcl, desc="Owner is UD or UC; SD otherwise"; + bool doCacheFill, desc="Write valid data to the cache when completing transaction"; + // NOTE: dataMaybeDirtyUpstream and dir_ownerExists are the same except + // when we had just sent dirty data upstream and are waiting for ack to set + // dir_ownerExists + + // Helper structures to track expected events and additional transient + // state info + + // List of actions to be performed while on a transient state + // See the processNextState function for details + TriggerQueue actions, template="", desc="List of actions"; + Event pendAction, desc="Current pending action"; + Tick delayNextAction, desc="Delay next action until given tick"; + State finalState, desc="Final state; set when pendAction==Final"; + + // List of expected responses and data. Checks the type of data against the + // expected ones for debugging purposes + // See the processNextState function for details + ExpectedMap expected_req_resp, template=""; + ExpectedMap expected_snp_resp, template=""; + bool defer_expected_comp; // expect to receive Comp before the end of transaction + CHIResponseType slicchack1; // fix compiler not including headers + CHIDataType slicchack2; // fix compiler not including headers + + // Tracks pending data messages that need to be generated when sending + // a line + bool snd_pendEv, desc="Is there a pending tx event ?"; + WriteMask snd_pendBytes, desc="Which bytes are pending transmission"; + CHIDataType snd_msgType, desc="Type of message being sent"; + MachineID snd_destination, desc="Data destination"; + + // Tracks how to update the directory when receiving a CompAck + bool updateDirOnCompAck, desc="Update directory on CompAck"; + bool requestorToBeOwner, desc="Sets dir_ownerExists"; + bool requestorToBeExclusiveOwner, desc="Sets dir_ownerIsExcl"; + // NOTE: requestor always added to dir_sharers if updateDirOnCompAck is set + + // Set for incoming snoop requests + bool snpNeedsData, desc="Set if snoop requires data as response"; + State fwdedState, desc="State of CompData sent due to a forwarding snoop"; + bool is_req_hazard, desc="Snoop hazard with an outstanding request"; + bool is_repl_hazard, desc="Snoop hazard with an outstanding writeback request"; + bool is_stale, desc="Request is now stale because of a snoop hazard"; + + // Tracks requests sent downstream + CHIRequestType pendReqType, desc="Sent request type"; + bool pendReqAllowRetry, desc="Sent request can be retried"; + bool rcvdRetryAck, desc="Received a RetryAck"; + bool rcvdRetryCredit, desc="Received a PCrdGrant"; + // NOTE: the message is retried only after receiving both RetryAck and + // PCrdGrant. A request can be retried only once. + // These are a copy of the retry msg fields in case we need to retry + Addr pendReqAccAddr; + int pendReqAccSize; + NetDest pendReqDest; + bool pendReqD2OrigReq; + bool pendReqRetToSrc; + + // This TBE stalled a message and thus we need to call wakeUpBuffers + // at some point + bool wakeup_pending_req; + bool wakeup_pending_snp; + bool wakeup_pending_tgr; + } + + // TBE table definition + structure(TBETable, external ="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + } + + structure(TBEStorage, external ="yes") { + int size(); + int capacity(); + int reserved(); + int slotsAvailable(); + bool areNSlotsAvailable(int n); + void incrementReserved(); + void decrementReserved(); + int addEntryToNewSlot(); + void addEntryToSlot(int slot); + void removeEntryFromSlot(int slot); + } + + // Directory memory definition + structure(PerfectCacheMemory, external = "yes") { + void allocate(Addr); + void deallocate(Addr); + DirEntry lookup(Addr); + bool isTagPresent(Addr); + } + + // Directory + PerfectCacheMemory directory, template=""; + + // Tracks unique lines locked after a store miss + TimerTable useTimerTable; + + // Multiplies sc_lock_base_latency to obtain the lock timeout. + // This is incremented at Profile_Eviction and decays on + // store miss completion + int sc_lock_multiplier, default="0"; + + // Definitions of the TBE tables + + // Main TBE table used for incoming requests + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + TBEStorage storTBEs, constructor="this, m_number_of_TBEs"; + + // TBE table for WriteBack/Evict requests generated by a replacement + // Notice storTBEs will be used when unify_repl_TBEs is set + TBETable replTBEs, template="", constructor="m_unify_repl_TBEs ? m_number_of_TBEs : m_number_of_repl_TBEs"; + TBEStorage storReplTBEs, constructor="this, m_number_of_repl_TBEs"; + + // TBE table for incoming snoops + TBETable snpTBEs, template="", constructor="m_number_of_snoop_TBEs"; + TBEStorage storSnpTBEs, constructor="this, m_number_of_snoop_TBEs"; + + // Retry handling + + // Destinations that will be sent PCrdGrant when a TBE becomes available + TriggerQueue retryQueue, template=""; + + + // Pending RetryAck/PCrdGrant/DoRetry + structure(RetryTriggerMsg, interface="Message") { + Addr addr; + Event event; + MachineID retryDest; + + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } + } + + // Destinations from we received a RetryAck. Sending new requests to these + // destinations will be blocked until a PCrdGrant is received if + // throttle_req_on_retry is set + NetDest destsWaitingRetry; + + // Pending transaction actions (generated by TBE:actions) + structure(TriggerMsg, interface="Message") { + Addr addr; + bool from_hazard; // this actions was generate during a snoop hazard + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } + } + + // Internal replacement request + structure(ReplacementMsg, interface="Message") { + Addr addr; + Addr from_addr; + int slot; // set only when unify_repl_TBEs is set + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } + } + + + //////////////////////////////////////////////////////////////////////////// + // Input/output port definitions + //////////////////////////////////////////////////////////////////////////// + + include "CHI-cache-ports.sm"; + // CHI-cache-ports.sm also includes CHI-cache-funcs.sm + + //////////////////////////////////////////////////////////////////////////// + // Actions and transitions + //////////////////////////////////////////////////////////////////////////// + + include "CHI-cache-actions.sm"; + include "CHI-cache-transitions.sm"; +} diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm new file mode 100644 index 0000000000..954a449517 --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-mem.sm @@ -0,0 +1,792 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +machine(MachineType:Memory, "Memory controller interface") : + + // no explicit modeling of allocation latency like the Caches, so add one + // cycle to the response enqueue latency as default + Cycles response_latency := 2; + Cycles data_latency := 1; + Cycles to_memory_controller_latency := 1; + + int data_channel_size; + + // Interface to the network + // Note vnet_type is used by Garnet only. "response" type is assumed to + // have data, so use it for data channels and "none" for the rest. + // network="To" for outbound queue; network="From" for inbound + // virtual networks: 0=request, 1=snoop, 2=response, 3=data + + MessageBuffer * reqOut, network="To", virtual_network="0", vnet_type="none"; + MessageBuffer * snpOut, network="To", virtual_network="1", vnet_type="none"; + MessageBuffer * rspOut, network="To", virtual_network="2", vnet_type="none"; + MessageBuffer * datOut, network="To", virtual_network="3", vnet_type="response"; + + MessageBuffer * reqIn, network="From", virtual_network="0", vnet_type="none"; + MessageBuffer * snpIn, network="From", virtual_network="1", vnet_type="none"; + MessageBuffer * rspIn, network="From", virtual_network="2", vnet_type="none"; + MessageBuffer * datIn, network="From", virtual_network="3", vnet_type="response"; + + // Requests that can allocate a TBE + MessageBuffer * reqRdy; + + // Data/ack to/from memory + MessageBuffer * requestToMemory; + MessageBuffer * responseFromMemory; + + // Trigger queue for internal events + MessageBuffer * triggerQueue; + +{ + + //////////////////////////////////////////////////////////////////////////// + // States + //////////////////////////////////////////////////////////////////////////// + + state_declaration(State, desc="Transaction states", default="Memory_State_READY") { + // We don't know if the line is cached, so the memory copy is maybe stable + READY, AccessPermission:Backing_Store, desk="Ready to transfer the line"; + + WAITING_NET_DATA, AccessPermission:Backing_Store_Busy, desc="Waiting data from the network"; + SENDING_NET_DATA, AccessPermission:Backing_Store_Busy, desc="Sending data to the network"; + READING_MEM, AccessPermission:Backing_Store_Busy, desc="Waiting data from memory"; + + // Null state for debugging; allow writes + null, AccessPermission:Backing_Store, desc="Null state"; + } + + + //////////////////////////////////////////////////////////////////////////// + // Events + //////////////////////////////////////////////////////////////////////////// + + enumeration(Event, desc="Memory events") { + // Checks if a request can allocate a TBE be moved to reqRdy + CheckAllocTBE; + CheckAllocTBE_WithCredit; + + // Requests + WriteNoSnpPtl; + WriteNoSnp; + ReadNoSnp; + ReadNoSnpSep; + + // Data + WriteData; + + // Memory side + MemoryData; + MemoryAck; + + // Internal event triggers + Trigger_Send; + Trigger_SendDone; + Trigger_ReceiveDone; + Trigger_SendRetry; + Trigger_SendPCrdGrant; + } + + + // Is there a less tedious way to convert messages to events ?? + + Event reqToEvent (CHIRequestType type) { + if (type == CHIRequestType:WriteNoSnpPtl) { + return Event:WriteNoSnpPtl; + } else if (type == CHIRequestType:WriteNoSnp) { + return Event:WriteNoSnp; + } else if (type == CHIRequestType:ReadNoSnp) { + return Event:ReadNoSnp; + } else if (type == CHIRequestType:ReadNoSnpSep) { + return Event:ReadNoSnpSep; + } else { + error("Invalid CHIRequestType"); + } + } + + Event respToEvent (CHIResponseType type) { + error("Invalid CHIResponseType"); + } + + Event dataToEvent (CHIDataType type) { + if (type == CHIDataType:NCBWrData) { + return Event:WriteData; + } else { + error("Invalid CHIDataType"); + } + } + + + //////////////////////////////////////////////////////////////////////////// + // Data structures + //////////////////////////////////////////////////////////////////////////// + + // Cache block size + int blockSize, default="RubySystem::getBlockSizeBytes()"; + + // TBE fields + structure(TBE, desc="...") { + int storSlot, desc="Slot in the storage tracker occupied by this entry"; + Addr addr, desc="Line address for this TBE"; + Addr accAddr, desc="Original access address. Set only for Write*Ptl"; + int accSize, desc="Access size. Set only for Write*Ptl"; + State state, desc="Current line state"; + DataBlock dataBlk, desc="Transaction data"; + WriteMask dataBlkValid, desc="valid bytes in dataBlk"; + int rxtxBytes, desc="Bytes sent or received"; + MachineID requestor, desc="Requestor that originated this request"; + MachineID destination, desc="Where we are sending data"; + bool useDataSepResp, desc="Replies with DataSepResp instead of CompData"; + } + + structure(TBETable, external ="yes") { + TBE lookup(Addr); + void allocate(Addr); + void deallocate(Addr); + bool isPresent(Addr); + bool areNSlotsAvailable(int n, Tick curTime); + } + + structure(TBEStorage, external ="yes") { + int size(); + int capacity(); + int reserved(); + int slotsAvailable(); + bool areNSlotsAvailable(int n); + void incrementReserved(); + void decrementReserved(); + int addEntryToNewSlot(); + void removeEntryFromSlot(int slot); + } + + TBETable TBEs, template="", constructor="m_number_of_TBEs"; + TBEStorage storTBEs, constructor="this, m_number_of_TBEs"; + + // Tracks all pending MemoryAcks (debug purposes only) + int pendingWrites, default="0"; + + structure(TriggerMsg, desc="...", interface="Message") { + Addr addr; + Event event; + MachineID retryDest; + + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } + } + + // Tracks a pending credit request from a retry + structure(RetryQueueEntry) { + Addr addr, desc="Line address"; + MachineID retryDest, desc="Retry destination"; + } + + structure(TriggerQueue, external ="yes") { + void pop(); + bool empty(); + void emplace(Addr,MachineID); + RetryQueueEntry next(); + } + + TriggerQueue retryQueue, template=""; + + //////////////////////////////////////////////////////////////////////////// + // External functions + //////////////////////////////////////////////////////////////////////////// + + Tick clockEdge(); + Tick curTick(); + Tick cyclesToTicks(Cycles c); + void set_tbe(TBE b); + void unset_tbe(); + void wakeUpAllBuffers(Addr a); + bool respondsTo(Addr addr); + + //////////////////////////////////////////////////////////////////////////// + // Interface functions required by SLICC + //////////////////////////////////////////////////////////////////////////// + + State getState(TBE tbe, Addr addr) { + if (is_valid(tbe)) { + assert(tbe.addr == addr); + return tbe.state; + } else { + return State:READY; + } + } + + void setState(TBE tbe, Addr addr, State state) { + if (is_valid(tbe)) { + assert(tbe.addr == addr); + tbe.state := state; + } + } + + AccessPermission getAccessPermission(Addr addr) { + if (respondsTo(addr)) { + TBE tbe := TBEs[addr]; + if (is_valid(tbe)) { + DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, Memory_State_to_permission(tbe.state)); + return Memory_State_to_permission(tbe.state); + } else { + DPRINTF(RubySlicc, "%x %s\n", addr, AccessPermission:Backing_Store); + return AccessPermission:Backing_Store; + } + } else { + DPRINTF(RubySlicc, "%x %s\n", addr, AccessPermission:NotPresent); + return AccessPermission:NotPresent; + } + } + + void setAccessPermission(Addr addr, State state) { + } + + void functionalRead(Addr addr, Packet *pkt, WriteMask &mask) { + if (respondsTo(addr)) { + DPRINTF(RubySlicc, "functionalRead %x\n", addr); + TBE tbe := TBEs[addr]; + + if (mask.isEmpty()) { + functionalMemoryRead(pkt); + mask.fillMask(); + DPRINTF(RubySlicc, "functionalRead mem %x %s\n", addr, mask); + } + + // Update with any transient data + //TODO additional handling of partial data ?? + if (is_valid(tbe)) { + WriteMask read_mask; + read_mask.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize); + read_mask.andMask(tbe.dataBlkValid); + if (read_mask.isEmpty() == false) { + testAndReadMask(addr, tbe.dataBlk, read_mask, pkt); + DPRINTF(RubySlicc, "functionalRead tbe %x %s %s %s\n", addr, tbe.dataBlk, read_mask, mask); + mask.orMask(read_mask); + } + } + } + } + + int functionalWrite(Addr addr, Packet *pkt) { + if(respondsTo(addr)) { + int num_functional_writes := 0; + TBE tbe := TBEs[addr]; + if (is_valid(tbe)) { + num_functional_writes := num_functional_writes + + testAndWrite(addr, tbe.dataBlk, pkt); + DPRINTF(RubySlicc, "functionalWrite tbe %x %s\n", addr, tbe.dataBlk); + } + num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); + DPRINTF(RubySlicc, "functionalWrite mem %x\n", addr); + return num_functional_writes; + } else { + return 0; + } + } + + + //////////////////////////////////////////////////////////////////////////// + // Helper functions + //////////////////////////////////////////////////////////////////////////// + + void printResources() { + DPRINTF(RubySlicc, "Resources(avail/max): TBEs=%d/%d\n", + storTBEs.size(), storTBEs.capacity()); + DPRINTF(RubySlicc, "Resources(in/out size): rdy=%d req=%d/%d rsp=%d/%d dat=%d/%d snp=%d/%d\n", + reqRdy.getSize(curTick()), + reqIn.getSize(curTick()), reqOut.getSize(curTick()), + rspIn.getSize(curTick()), rspOut.getSize(curTick()), + datIn.getSize(curTick()), datOut.getSize(curTick()), + snpIn.getSize(curTick()), snpOut.getSize(curTick())); + } + + //////////////////////////////////////////////////////////////////////////// + // Input/output port definitions + //////////////////////////////////////////////////////////////////////////// + + // Outbound port definitions + + out_port(reqOutPort, CHIRequestMsg, reqOut); + out_port(snpOutPort, CHIRequestMsg, snpOut); + out_port(rspOutPort, CHIResponseMsg, rspOut); + out_port(datOutPort, CHIDataMsg, datOut); + out_port(triggerOutPort, TriggerMsg, triggerQueue); + out_port(memQueue_out, MemoryMsg, requestToMemory); + out_port(reqRdyOutPort, CHIRequestMsg, reqRdy); + + // Inbound port definitions + + // Response + in_port(rspInPort, CHIResponseMsg, rspIn, rank=6) { + if (rspInPort.isReady(clockEdge())) { + printResources(); + peek(rspInPort, CHIResponseMsg) { + error("Unexpected message"); + } + } + } + + // Data + in_port(datInPort, CHIDataMsg, datIn, rank=5) { + if (datInPort.isReady(clockEdge())) { + printResources(); + peek(datInPort, CHIDataMsg) { + int received := in_msg.bitMask.count(); + assert((received <= data_channel_size) && (received > 0)); + trigger(dataToEvent(in_msg.type), in_msg.addr, TBEs[in_msg.addr]); + } + } + } + + // Data/Ack from memory + + in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=4) { + if (memQueue_in.isReady(clockEdge())) { + printResources(); + peek(memQueue_in, MemoryMsg) { + Addr addr := makeLineAddress(in_msg.addr); + if (in_msg.Type == MemoryRequestType:MEMORY_READ) { + trigger(Event:MemoryData, addr, TBEs[addr]); + } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { + trigger(Event:MemoryAck, addr, TBEs[addr]); + } else { + error("Invalid message"); + } + } + } + } + + // Trigger + in_port(triggerInPort, TriggerMsg, triggerQueue, rank=3) { + if (triggerInPort.isReady(clockEdge())) { + printResources(); + peek(triggerInPort, TriggerMsg) { + trigger(in_msg.event, in_msg.addr, TBEs[in_msg.addr]); + } + } + } + + // Snoops + in_port(snpInPort, CHIRequestMsg, snpIn, rank=2) { + if (snpInPort.isReady(clockEdge())) { + printResources(); + peek(snpInPort, CHIRequestMsg) { + error("Unexpected message"); + } + } + } + + // Requests + in_port(reqRdyInPort, CHIRequestMsg, reqRdy, rank=1) { + if (reqRdyInPort.isReady(clockEdge())) { + printResources(); + peek(reqRdyInPort, CHIRequestMsg) { + trigger(reqToEvent(in_msg.type), in_msg.addr, TBEs[in_msg.addr]); + } + } + } + + in_port(reqInPort, CHIRequestMsg, reqIn, rank=0) { + if (reqInPort.isReady(clockEdge())) { + printResources(); + peek(reqInPort, CHIRequestMsg) { + if (in_msg.allowRetry) { + trigger(Event:CheckAllocTBE, in_msg.addr, TBEs[in_msg.addr]); + } else { + // Only expected requests that do not allow retry are the ones that + // are being retried after receiving credit + trigger(Event:CheckAllocTBE_WithCredit, + in_msg.addr, TBEs[in_msg.addr]); + } + } + } + } + + + + //////////////////////////////////////////////////////////////////////////// + // Actions + //////////////////////////////////////////////////////////////////////////// + + action(checkAllocateTBE, desc="") { + // Move to reqRdy if resources available, otherwise send retry + if (storTBEs.areNSlotsAvailable(1)) { + // reserve a slot for this request + storTBEs.incrementReserved(); + + peek(reqInPort, CHIRequestMsg) { + enqueue(reqRdyOutPort, CHIRequestMsg, 0) { + out_msg := in_msg; + } + } + + } else { + peek(reqInPort, CHIRequestMsg) { + assert(in_msg.allowRetry); + enqueue(triggerOutPort, TriggerMsg, 0) { + out_msg.addr := in_msg.addr; + out_msg.event := Event:Trigger_SendRetry; + out_msg.retryDest := in_msg.requestor; + retryQueue.emplace(in_msg.addr,in_msg.requestor); + } + } + } + reqInPort.dequeue(clockEdge()); + } + + action(checkAllocateTBE_withCredit, desc="") { + // We must have reserved resources for this request + peek(reqInPort, CHIRequestMsg) { + assert(in_msg.allowRetry == false); + enqueue(reqRdyOutPort, CHIRequestMsg, 0) { + out_msg := in_msg; + } + } + reqInPort.dequeue(clockEdge()); + } + + action(allocateTBE, "atbe", desc="Allocate TBEs for a miss") { + // We must have reserved resources for this allocation + storTBEs.decrementReserved(); + assert(storTBEs.areNSlotsAvailable(1)); + + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.storSlot := storTBEs.addEntryToNewSlot(); + tbe.addr := address; + tbe.rxtxBytes := 0; + tbe.useDataSepResp := false; + } + + action(initializeFromReqTBE, "itbe", desc="Initialize TBE fields") { + peek(reqRdyInPort, CHIRequestMsg) { + tbe.requestor := in_msg.requestor; + if (in_msg.dataToFwdRequestor) { + tbe.destination := in_msg.fwdRequestor; + } else { + tbe.destination := in_msg.requestor; + } + tbe.accAddr := in_msg.accAddr; + tbe.accSize := in_msg.accSize; + } + } + + action(decWritePending, "dwp", desc="Decrement pending writes") { + assert(pendingWrites >= 1); + pendingWrites := pendingWrites - 1; + } + + action(deallocateTBE, "dtbe", desc="Deallocate TBEs") { + assert(is_valid(tbe)); + storTBEs.removeEntryFromSlot(tbe.storSlot); + TBEs.deallocate(address); + unset_tbe(); + // send credit if requestor waiting for it + if (retryQueue.empty() == false) { + assert(storTBEs.areNSlotsAvailable(1)); + storTBEs.incrementReserved(); + RetryQueueEntry e := retryQueue.next(); + retryQueue.pop(); + enqueue(triggerOutPort, TriggerMsg, 0) { + out_msg.addr := e.addr; + out_msg.retryDest := e.retryDest; + out_msg.event := Event:Trigger_SendPCrdGrant; + } + } + } + + action(sendReadReceipt, "sRR", desc="Send receipt to requestor") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:ReadReceipt; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } + // also send different type of data when ready + tbe.useDataSepResp := true; + } + + action(sendCompDBIDResp, "sCbid", desc="Send ack to requestor") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:CompDBIDResp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } + } + + action(sendMemoryRead, "smr", desc="Send request to memory") { + assert(is_valid(tbe)); + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := address; + out_msg.Type := MemoryRequestType:MEMORY_READ; + out_msg.Sender := tbe.requestor; + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.Len := 0; + } + } + + action(sendMemoryWrite, "smw", desc="Send request to memory") { + assert(is_valid(tbe)); + enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { + out_msg.addr := tbe.accAddr; + out_msg.Type := MemoryRequestType:MEMORY_WB; + out_msg.Sender := tbe.requestor; + out_msg.MessageSize := MessageSizeType:Writeback_Data; + out_msg.DataBlk := tbe.dataBlk; + out_msg.Len := tbe.accSize; + } + tbe.dataBlkValid.clear(); + pendingWrites := pendingWrites + 1; + } + + action(prepareSend, "ps", desc="Copies received memory data to TBE") { + assert(is_valid(tbe)); + peek(memQueue_in, MemoryMsg) { + tbe.dataBlk := in_msg.DataBlk; + } + tbe.rxtxBytes := 0; + tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize); + } + + action(copyWriteDataToTBE, "cpWDat", desc="Copies received net data to TBE") { + peek(datInPort, CHIDataMsg) { + assert(is_valid(tbe)); + tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask); + tbe.dataBlkValid.orMask(in_msg.bitMask); + tbe.rxtxBytes := tbe.rxtxBytes + in_msg.bitMask.count(); + } + } + + action(sendDataAndCheck, "sd", desc="Send received data to requestor") { + assert(is_valid(tbe)); + assert(tbe.rxtxBytes < blockSize); + enqueue(datOutPort, CHIDataMsg, data_latency) { + out_msg.addr := tbe.addr; + if (tbe.useDataSepResp) { + out_msg.type := CHIDataType:DataSepResp_UC; + } else { + out_msg.type := CHIDataType:CompData_UC; + } + out_msg.dataBlk := tbe.dataBlk; + // Called in order for the whole block so use rxtxBytes as offset + out_msg.bitMask.setMask(tbe.rxtxBytes, data_channel_size); + out_msg.Destination.add(tbe.destination); + } + + //DPRINTF(RubySlicc, "rxtxBytes=%d\n", tbe.rxtxBytes); + + tbe.rxtxBytes := tbe.rxtxBytes + data_channel_size; + + // end or send next chunk next cycle + Event next := Event:Trigger_SendDone; + Cycles delay := intToCycles(0); + if (tbe.rxtxBytes < blockSize) { + next := Event:Trigger_Send; + delay := intToCycles(1); + } + enqueue(triggerOutPort, TriggerMsg, delay) { + out_msg.addr := address; + out_msg.event := next; + } + } + + action(checkForReceiveCompletion, "cWc", desc="Check if all data is received") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "rxtxBytes=%d\n", tbe.rxtxBytes); + assert((tbe.rxtxBytes <= tbe.accSize) && (tbe.rxtxBytes > 0)); + if (tbe.rxtxBytes == tbe.accSize) { + enqueue(triggerOutPort, TriggerMsg, 0) { + out_msg.addr := address; + out_msg.event := Event:Trigger_ReceiveDone; + } + tbe.rxtxBytes := 0; + assert(tbe.dataBlkValid.getMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize)); + } + } + + action(popReqInQueue, "preq", desc="Pop request queue.") { + reqRdyInPort.dequeue(clockEdge()); + } + + action(popDataInQueue, "pdata", desc="Pop data queue.") { + datInPort.dequeue(clockEdge()); + } + + action(popTriggerQueue, "ptrigger", desc="Pop trigger queue.") { + triggerInPort.dequeue(clockEdge()); + } + + action(popMemoryQueue, "pmem", desc="Pop memory queue.") { + memQueue_in.dequeue(clockEdge()); + } + + // Stall/wake-up only used for requests that arrive when we are on the + // WAITING_NET_DATA state. For all other case the line should be either + // ready or we can overlap + action(stallRequestQueue, "str", desc="Stall and wait on the address") { + peek(reqRdyInPort, CHIRequestMsg){ + stall_and_wait(reqRdyInPort, address); + } + } + action(wakeUpStalled, "wa", desc="Wake up any requests waiting for this address") { + wakeUpAllBuffers(address); + } + + action(sendRetryAck, desc="") { + peek(triggerInPort, TriggerMsg) { + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := in_msg.addr; + out_msg.type := CHIResponseType:RetryAck; + out_msg.responder := machineID; + out_msg.Destination.add(in_msg.retryDest); + } + } + } + + action(sendPCrdGrant, desc="") { + peek(triggerInPort, TriggerMsg) { + enqueue(rspOutPort, CHIResponseMsg, response_latency) { + out_msg.addr := in_msg.addr; + out_msg.type := CHIResponseType:PCrdGrant; + out_msg.responder := machineID; + out_msg.Destination.add(in_msg.retryDest); + } + } + } + + //////////////////////////////////////////////////////////////////////////// + // Transitions + //////////////////////////////////////////////////////////////////////////// + + transition(READY, ReadNoSnp, READING_MEM) { + allocateTBE; + initializeFromReqTBE; + sendMemoryRead; + popReqInQueue; + } + + transition(READY, ReadNoSnpSep, READING_MEM) { + allocateTBE; + initializeFromReqTBE; + sendMemoryRead; + sendReadReceipt; + popReqInQueue; + } + + transition(READING_MEM, MemoryData, SENDING_NET_DATA) { + prepareSend; + sendDataAndCheck; + popMemoryQueue; + } + + transition(SENDING_NET_DATA, Trigger_Send) { + sendDataAndCheck; + popTriggerQueue; + } + + transition(READY, WriteNoSnpPtl, WAITING_NET_DATA) { + allocateTBE; + initializeFromReqTBE; + sendCompDBIDResp; + popReqInQueue; + } + + transition(READY, WriteNoSnp, WAITING_NET_DATA) { + allocateTBE; + initializeFromReqTBE; + sendCompDBIDResp; + popReqInQueue; + } + + transition(WAITING_NET_DATA, WriteData) { + copyWriteDataToTBE; + checkForReceiveCompletion; + popDataInQueue; + } + + transition(WAITING_NET_DATA, Trigger_ReceiveDone, READY) { + sendMemoryWrite; + deallocateTBE; + wakeUpStalled; + popTriggerQueue; + } + + transition(SENDING_NET_DATA, Trigger_SendDone, READY) { + deallocateTBE; + wakeUpStalled; + popTriggerQueue; + } + + // Just sanity check against counter of pending acks + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY}, + MemoryAck) { + decWritePending; + popMemoryQueue; + } + + // Notice we only use this here and call wakeUp when leaving this state + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA}, + {ReadNoSnp, ReadNoSnpSep, WriteNoSnpPtl}) { + stallRequestQueue; + } + + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY}, + Trigger_SendRetry) { + sendRetryAck; + popTriggerQueue; + } + + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY}, + Trigger_SendPCrdGrant) { + sendPCrdGrant; + popTriggerQueue; + } + + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY}, + CheckAllocTBE) { + checkAllocateTBE; + } + + transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY}, + CheckAllocTBE_WithCredit) { + checkAllocateTBE_withCredit; + } + +} diff --git a/src/mem/ruby/protocol/chi/CHI-msg.sm b/src/mem/ruby/protocol/chi/CHI-msg.sm new file mode 100644 index 0000000000..d51fb76ca9 --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI-msg.sm @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2021 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// All CHI request and response types match the name style in the standard doc. +// For a description of a specific message type, refer to the Arm's AMBA 5 +// CHI specification (issue D): +// https://static.docs.arm.com/ihi0050/d/ +// IHI0050D_amba_5_chi_architecture_spec.pdf + +enumeration(CHIRequestType, desc="") { + // Incoming requests generated by the sequencer + Load; + Store; + StoreLine; + + // CHI request types + ReadShared; + ReadNotSharedDirty; + ReadUnique; + ReadOnce; + CleanUnique; + + Evict; + + WriteBackFull; + WriteCleanFull; + WriteEvictFull; + WriteUniquePtl; + WriteUniqueFull; + + SnpSharedFwd; + SnpNotSharedDirtyFwd; + SnpUniqueFwd; + SnpOnceFwd; + SnpOnce; + SnpShared; + SnpUnique; + SnpCleanInvalid; + + WriteNoSnpPtl; + WriteNoSnp; + ReadNoSnp; + ReadNoSnpSep; + + null; +} + +structure(CHIRequestMsg, desc="", interface="Message") { + Addr addr, desc="Request line address"; + Addr accAddr, desc="Original access address. Set for Write*Ptl and requests from the sequencer"; + int accSize, desc="Access size. Set for Write*Ptl and requests from the sequencer"; + CHIRequestType type, desc="Request type"; + MachineID requestor, desc="Requestor ID"; + MachineID fwdRequestor, desc="Where to send data for DMT/DCT requests"; + bool dataToFwdRequestor, desc="Data has to be forwarded to fwdRequestor"; + bool retToSrc, desc="Affects whether or not a snoop resp returns data"; + bool allowRetry, desc="This request can be retried"; + NetDest Destination, desc="Message destination"; + + RequestPtr seqReq, default="nullptr", desc="Pointer to original request from CPU/sequencer (nullptr if not valid)"; + bool isSeqReqValid, default="false", desc="Set if seqReq is valid (not nullptr)"; + + bool is_local_pf, desc="Request generated by a local prefetcher"; + bool is_remote_pf, desc="Request generated a prefetcher in another cache"; + + MessageSizeType MessageSize, default="MessageSizeType_Control"; + + // No data for functional access + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } +} + +enumeration(CHIResponseType, desc="...") { + // CHI response types + Comp_I; + Comp_UC; + Comp_SC; + CompAck; + CompDBIDResp; + DBIDResp; + Comp; + ReadReceipt; + RespSepData; + + SnpResp_I; + SnpResp_I_Fwded_UC; + SnpResp_I_Fwded_UD_PD; + SnpResp_SC; + SnpResp_SC_Fwded_SC; + SnpResp_SC_Fwded_SD_PD; + SnpResp_UC_Fwded_I; + SnpResp_UD_Fwded_I; + SnpResp_SC_Fwded_I; + SnpResp_SD_Fwded_I; + + RetryAck; + PCrdGrant; + + null; +} + +structure(CHIResponseMsg, desc="", interface="Message") { + Addr addr, desc="Line address"; + CHIResponseType type, desc="Response type"; + MachineID responder, desc="Responder ID"; + NetDest Destination, desc="Response destination"; + bool stale, desc="Response to a stale request"; + //NOTE: not in CHI and for debuging only + + MessageSizeType MessageSize, default="MessageSizeType_Control"; + + // No data for functional access + bool functionalRead(Packet *pkt) { return false; } + bool functionalRead(Packet *pkt, WriteMask &mask) { return false; } + bool functionalWrite(Packet *pkt) { return false; } +} + +enumeration(CHIDataType, desc="...") { + // CHI data response types + CompData_I; + CompData_UC; + CompData_SC; + CompData_UD_PD; + CompData_SD_PD; + DataSepResp_UC; + CBWrData_UC; + CBWrData_SC; + CBWrData_UD_PD; + CBWrData_SD_PD; + CBWrData_I; + NCBWrData; + SnpRespData_I; + SnpRespData_I_PD; + SnpRespData_SC; + SnpRespData_SC_PD; + SnpRespData_SD; + SnpRespData_UC; + SnpRespData_UD; + SnpRespData_SC_Fwded_SC; + SnpRespData_SC_Fwded_SD_PD; + SnpRespData_SC_PD_Fwded_SC; + SnpRespData_I_Fwded_SD_PD; + SnpRespData_I_PD_Fwded_SC; + SnpRespData_I_Fwded_SC; + null; +} + +structure(CHIDataMsg, desc="", interface="Message") { + Addr addr, desc="Line address"; + CHIDataType type, desc="Response type"; + MachineID responder, desc="Responder ID"; + NetDest Destination, desc="Response destination"; + DataBlock dataBlk, desc="Line data"; + WriteMask bitMask, desc="Which bytes in the data block are valid"; + + + MessageSizeType MessageSize, default="MessageSizeType_Data"; + + bool functionalRead(Packet *pkt) { + if(bitMask.isFull()) { + return testAndRead(addr, dataBlk, pkt); + } else { + return false; + } + } + + bool functionalRead(Packet *pkt, WriteMask &mask) { + // read if bitmask has bytes not in mask or if data is dirty + bool is_dirty := (type == CHIDataType:CompData_UD_PD) || + (type == CHIDataType:CompData_SD_PD) || + (type == CHIDataType:CBWrData_UD_PD) || + (type == CHIDataType:CBWrData_SD_PD) || + (type == CHIDataType:NCBWrData) || + (type == CHIDataType:SnpRespData_I_PD) || + (type == CHIDataType:SnpRespData_SC_PD) || + (type == CHIDataType:SnpRespData_SD) || + (type == CHIDataType:SnpRespData_UD) || + (type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) || + (type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) || + (type == CHIDataType:SnpRespData_I_Fwded_SD_PD) || + (type == CHIDataType:SnpRespData_I_PD_Fwded_SC); + assert(bitMask.isEmpty() == false); + WriteMask test_mask := mask; + test_mask.orMask(bitMask); + if ((test_mask.cmpMask(mask) == false) || is_dirty) { + if (testAndReadMask(addr, dataBlk, bitMask, pkt)) { + mask.orMask(bitMask); + return true; + } + } + return false; + } + + bool functionalWrite(Packet *pkt) { + return testAndWrite(addr, dataBlk, pkt); + } +} + + diff --git a/src/mem/ruby/protocol/chi/CHI.slicc b/src/mem/ruby/protocol/chi/CHI.slicc new file mode 100644 index 0000000000..27724bb582 --- /dev/null +++ b/src/mem/ruby/protocol/chi/CHI.slicc @@ -0,0 +1,6 @@ +protocol "CHI"; + +include "RubySlicc_interfaces.slicc"; +include "CHI-msg.sm"; +include "CHI-cache.sm"; +include "CHI-mem.sm"; diff --git a/src/mem/ruby/protocol/chi/SConsopts b/src/mem/ruby/protocol/chi/SConsopts new file mode 100644 index 0000000000..91c10d22d3 --- /dev/null +++ b/src/mem/ruby/protocol/chi/SConsopts @@ -0,0 +1,47 @@ +# -*- mode:python -*- + +# Copyright (c) 2021 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Import('*') + +# Register this protocol with gem5/SCons + +all_protocols.append('CHI') + +# CHI requires Ruby's inerface to support partial functional reads +need_partial_func_reads.append('CHI') + +protocol_dirs.append(Dir('.').abspath) From 68d612c3304a923923bf883f9e334c5305639673 Mon Sep 17 00:00:00 2001 From: "Daniel R. Carvalho" Date: Sat, 13 Mar 2021 18:43:50 -0300 Subject: [PATCH 21/25] base-stats: Fix Watt Unit Watt had two implementations. Since having the unit printed as Watt is more relevant than as Joule/Second, keep the class. Change-Id: Ic9ae755115e2eca94492f3d5b11245db9fe42bb6 Signed-off-by: Daniel R. Carvalho Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43006 Reviewed-by: Bobby R. Bruce Maintainer: Bobby R. Bruce Tested-by: kokoro --- src/base/stats/units.hh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh index 5b785b4262..b2dfefa19e 100644 --- a/src/base/stats/units.hh +++ b/src/base/stats/units.hh @@ -47,10 +47,9 @@ #define UNIT_RATE(T1, T2) Stats::Units::Rate::get() #define UNIT_RATIO Stats::Units::Ratio::get() #define UNIT_COUNT Stats::Units::Count::get() +#define UNIT_WATT Stats::Units::Watt::get() #define UNIT_UNSPECIFIED Stats::Units::Unspecified::get() -#define UNIT_WATT UNIT_RATE(Stats::Units::Joule, Stats::Units::Second) - namespace Stats { /** From 9ea38f7147c9516976878318e08fcd77d7798aac Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 19 Jan 2021 16:05:08 +0000 Subject: [PATCH 22/25] python: Fix incorrect prefixes is m5.utils.convert The conversion functions incorrectly assumed that kibibytes are 'kiB' rather than 'KiB' (correct). Cherry-picked from: https://gem5-review.googlesource.com/c/public/gem5/+/39375 Change-Id: Ia9409218c37284514fc4fabdabf327641db8cefc Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43146 Reviewed-by: Jason Lowe-Power Reviewed-by: Andreas Sandberg Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/python/m5/util/convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/m5/util/convert.py b/src/python/m5/util/convert.py index 077b6b4dc5..ae667b3eb2 100644 --- a/src/python/m5/util/convert.py +++ b/src/python/m5/util/convert.py @@ -62,7 +62,7 @@ metric_prefixes = { 'Gi': gibi, 'G': giga, 'M': mega, - 'ki': kibi, + 'Ki': kibi, 'k': kilo, 'Mi': mebi, 'm': milli, @@ -84,7 +84,7 @@ binary_prefixes = { 'G' : gibi, 'Mi': mebi, 'M' : mebi, - 'ki': kibi, + 'Ki': kibi, 'k' : kibi, } From 1479ad9ef05d7ac6443c3aa03f5e02fa88179ab4 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 3 Mar 2021 11:38:06 +0000 Subject: [PATCH 23/25] arch-arm: Fix atomics permission checks in TLB For stage 2 translations, atomic accesses were not checking the access permission bits in the page table descriptors, and were instead wrongly using the nature of the request itself (r/w booleans). Cherry-picked from: https://gem5-review.googlesource.com/c/public/gem5/+/42073 Change-Id: I919a08b690287b03426d9124a61887e521f47823 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43143 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/arm/tlb.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/arch/arm/tlb.cc b/src/arch/arm/tlb.cc index 413a13e78c..a09c9538fe 100644 --- a/src/arch/arm/tlb.cc +++ b/src/arch/arm/tlb.cc @@ -772,8 +772,7 @@ TLB::checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode, // sctlr.wxn overrides the xn bit grant = !wxn && !xn; } else if (is_atomic) { - grant = r && w; - grant_read = r; + grant = hap; } else if (is_write) { grant = hap & 0x2; } else { // is_read From eb3554e0e98efce8da9094b8e7e19ee78657be8b Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Fri, 12 Mar 2021 12:50:49 +0000 Subject: [PATCH 24/25] configs: Use integer division in MESI_Three_Level_HTM.py num_cpus_per_cluster and num_l2caches_per_cluster need to be integer as we are iterating over those variables Cherry-picked from: https://gem5-review.googlesource.com/c/public/gem5/+/42883 Change-Id: Ifc1f9cf06b36044289a0ba5e54666f1af2587fca Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43144 Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair Tested-by: kokoro --- configs/ruby/MESI_Three_Level_HTM.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index 89ca93c61d..b6b1c7f637 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -78,10 +78,10 @@ def create_system(options, full_system, system, dma_ports, bootmem, dma_cntrl_nodes = [] assert (options.num_cpus % options.num_clusters == 0) - num_cpus_per_cluster = options.num_cpus / options.num_clusters + num_cpus_per_cluster = options.num_cpus // options.num_clusters assert (options.num_l2caches % options.num_clusters == 0) - num_l2caches_per_cluster = options.num_l2caches / options.num_clusters + num_l2caches_per_cluster = options.num_l2caches // options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) From 31cd81fdec46bae4b48d4f3788776936389dbdec Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Tue, 16 Mar 2021 15:24:37 -0700 Subject: [PATCH 25/25] misc: Updated the RELEASE-NOTES and version number Updated the RELEASE-NOTES.md and version number for the v20.1.0.5 hotfix release. Change-Id: I137a12325137799b9b1f98fe67ac55bfab49cd91 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/43145 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- RELEASE-NOTES.md | 8 ++++++++ src/Doxyfile | 2 +- src/base/version.cc | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 3f1709123d..7c3472e39a 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,3 +1,11 @@ +# Version 20.1.0.5 + +**[HOTFIX]** This hotfix release fixes three known bugs: + +* `src/python/m5/util/convert.py` incorrectly stated kibibytes as 'kiB' instead of 'KiB'. This has been fixed. +* Atomic accesses were not checking the access permission bits in the page table descriptors. They were incorrectly using the nature of the request itself. This is now fixed. +* `num_l2chaches_per_cluster` and `num_cpus_per_cluster` were cast to floats in `configs/ruby/MESI_Three_Level_HTM.py`, which caused errors. This has been fixed so they are correctly cast to integers. + # Version 20.1.0.4 **[HOTFIX]** [gem5 was failing to build with SCons 4.0.1 and 4.1.0](https://gem5.atlassian.net/browse/GEM5-916). diff --git a/src/Doxyfile b/src/Doxyfile index 4ad0ea537b..c9f1ed458c 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = gem5 # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = v20.1.0.4 +PROJECT_NUMBER = v20.1.0.5 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/src/base/version.cc b/src/base/version.cc index 0a34488e5f..3e7aa35f37 100644 --- a/src/base/version.cc +++ b/src/base/version.cc @@ -29,4 +29,4 @@ /** * @ingroup api_base_utils */ -const char *gem5Version = "20.1.0.4"; +const char *gem5Version = "20.1.0.5";