mem-ruby: Always pass on GPU atomics to dir in write-through TCC (#367)

Added checks to ensure that atomics are not performed in the TCC when it is configured as a write-through cache. Also added SLC bit overwrite to ensure directory preforms atomics when there is a write-through TCC. Change-Id: I4514e6c8022aeb7785f2c59871cd9acec8161ed8
2023-10-14 08:39:50 -05:00
parent a3c51ca38c
commit 4931fb0010
3 changed files with 24 additions and 18 deletions
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -555,6 +555,7 @@ def construct_dirs(options, system, ruby_system, network):
        dir_cntrl.create(options, dir_ranges, ruby_system, system)
        dir_cntrl.number_of_TBEs = options.num_tbes
        dir_cntrl.useL3OnWT = options.use_L3_on_WT
+        dir_cntrl.L2isWB = options.WB_L2
        # the number_of_TBEs is inclusive of TBEs below

        # Connect the Directory controller to the ruby network
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -300,16 +300,22 @@ machine(MachineType:TCC, "TCC Cache")
        // checked when the read response is received.
        if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
-        } else if (is_slc_set) {
-            // If the SLC bit is set, the response needs to bypass the cache
-            // and should not be allocated an entry.
+        } else if(in_msg.Type == CoherenceResponseType:NBSysResp) {
+          // If the SLC bit is set or the cache is write-through and
+          // we're receiving modified data (such as from an atomic),
+          // the response needs to bypass the cache and should not be
+          // allocated an entry.
+          if(is_slc_set || (!WB && in_msg.State == CoherenceState:Modified)) {
            trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
-        } else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
-          if(presentOrAvail(in_msg.addr)) {
-            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
          } else {
-            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
-            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            if(presentOrAvail(in_msg.addr)) {
+              // Responses with atomic data will only reach here if the
+              // SLC bit isn't set and the cache is WB
+              trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+            } else {
+              Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+              trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            }
          }
        } else {
          error("Unexpected Response Message to Core");
@@ -358,13 +364,13 @@ machine(MachineType:TCC, "TCC Cache")
                trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
            }
        } else if (in_msg.Type == CoherenceRequestType:Atomic) {
-          // If the request is system-level, or if the address isn't in the cache,
-          // then send the request to the directory. Since non-SLC atomics won't be
-          // performed by the directory, TCC will perform the atomic on the return path
-          // on Event:Data.
+          // If the request is system-level, if the address isn't in the cache,
+          // or if this cache is write-through, then send the request to the
+          // directory. Since non-SLC atomics won't be performed by the directory,
+          // TCC will perform the atomic on the return path on Event:Data.
          // The action will invalidate the cache line if SLC is set and the address is
          // in the cache.
-          if(in_msg.isSLCSet || !presentOrAvail(in_msg.addr)) {
+          if(in_msg.isSLCSet || !WB || !presentOrAvail(in_msg.addr)) {
            trigger(Event:AtomicPassOn, in_msg.addr, cache_entry, tbe);
          } else {
            trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -39,6 +39,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
  bool GPUonly := "False";
  int TCC_select_num_bits;
  bool useL3OnWT := "False";
+  bool L2isWB;
  Cycles to_memory_controller_latency := 1;

  // DMA
@@ -1039,11 +1040,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")

  action(wd_writeBackData, "wd", desc="Write back data if needed") {
    if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
-    // If SLC is not set, the atomic is handled in the L2
-    // Atomic needs to be done at the L3 only if this is
-    // not the case
-
-      if (tbe.atomicData && tbe.isSLCSet) {
+      // Only perform atomics in the directory if the SLC bit is set, or
+      // if the L2 is WT
+      if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) {
        tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
      }
      enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {