diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index b8757c273d..fae9f1ba9c 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -555,6 +555,7 @@ def construct_dirs(options, system, ruby_system, network):
         dir_cntrl.create(options, dir_ranges, ruby_system, system)
         dir_cntrl.number_of_TBEs = options.num_tbes
         dir_cntrl.useL3OnWT = options.use_L3_on_WT
+        dir_cntrl.L2isWB = options.WB_L2
         # the number_of_TBEs is inclusive of TBEs below
 
         # Connect the Directory controller to the ruby network
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index be1243aaa5..d1905c3b96 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -300,16 +300,22 @@ machine(MachineType:TCC, "TCC Cache")
         // checked when the read response is received.
         if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
           trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
-        } else if (is_slc_set) {
-            // If the SLC bit is set, the response needs to bypass the cache
-            // and should not be allocated an entry.
+        } else if(in_msg.Type == CoherenceResponseType:NBSysResp) {
+          // If the SLC bit is set or the cache is write-through and
+          // we're receiving modified data (such as from an atomic),
+          // the response needs to bypass the cache and should not be
+          // allocated an entry.
+          if(is_slc_set || (!WB && in_msg.State == CoherenceState:Modified)) {
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
-        } else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
-          if(presentOrAvail(in_msg.addr)) {
-            trigger(Event:Data, in_msg.addr, cache_entry, tbe);
           } else {
-            Addr victim :=  L2cache.cacheProbe(in_msg.addr);
-            trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            if(presentOrAvail(in_msg.addr)) {
+              // Responses with atomic data will only reach here if the
+              // SLC bit isn't set and the cache is WB
+              trigger(Event:Data, in_msg.addr, cache_entry, tbe);
+            } else {
+              Addr victim :=  L2cache.cacheProbe(in_msg.addr);
+              trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
+            }
           }
         } else {
           error("Unexpected Response Message to Core");
@@ -358,13 +364,13 @@ machine(MachineType:TCC, "TCC Cache")
                 trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
             }
         } else if (in_msg.Type == CoherenceRequestType:Atomic) {
-          // If the request is system-level, or if the address isn't in the cache,
-          // then send the request to the directory. Since non-SLC atomics won't be
-          // performed by the directory, TCC will perform the atomic on the return path
-          // on Event:Data.
+          // If the request is system-level, if the address isn't in the cache,
+          // or if this cache is write-through, then send the request to the
+          // directory. Since non-SLC atomics won't be performed by the directory,
+          // TCC will perform the atomic on the return path on Event:Data.
           // The action will invalidate the cache line if SLC is set and the address is
           // in the cache.
-          if(in_msg.isSLCSet || !presentOrAvail(in_msg.addr)) {
+          if(in_msg.isSLCSet || !WB || !presentOrAvail(in_msg.addr)) {
             trigger(Event:AtomicPassOn, in_msg.addr, cache_entry, tbe);
           } else {
             trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index eed750832f..c3bbfa1950 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -39,6 +39,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
   bool GPUonly := "False";
   int TCC_select_num_bits;
   bool useL3OnWT := "False";
+  bool L2isWB;
   Cycles to_memory_controller_latency := 1;
 
   // DMA
@@ -1039,11 +1040,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   action(wd_writeBackData, "wd", desc="Write back data if needed") {
     if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
-    // If SLC is not set, the atomic is handled in the L2
-    // Atomic needs to be done at the L3 only if this is
-    // not the case
-
-      if (tbe.atomicData && tbe.isSLCSet) {
+      // Only perform atomics in the directory if the SLC bit is set, or
+      // if the L2 is WT
+      if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) {
         tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
       }
       enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {