diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index a59589870d..df3aa1ebca 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -81,6 +81,7 @@ machine(MachineType:TCC, "TCC Cache")
     I, AccessPermission:Invalid,    desc="Invalid";
     IV, AccessPermission:Busy,      desc="Waiting for Data";
     WI, AccessPermission:Busy,      desc="Waiting on Writethrough Ack";
+    WIB, AccessPermission:Busy,     desc="Waiting on Writethrough Ack; Will be Bypassed";
     A, AccessPermission:Busy,       desc="Invalid waiting on atomici Data";
   }
 
@@ -289,7 +290,14 @@ machine(MachineType:TCC, "TCC Cache")
             is_slc_set := tbe.isSLCSet;
         }
 
-        if (is_slc_set) {
+        // Whether the SLC bit is set or not, WB acks should invoke the
+        // WBAck event. For cases where a read response will follow a
+        // WBAck (A read bypass evict on a dirty line), the line's TLB
+        // will not be deallocated on WBAck, and the SLC bit will be
+        // checked when the read response is received.
+        if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
+          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
+        } else if (is_slc_set) {
             // If the SLC bit is set, the response needs to bypass the cache
             // and should not be allocated an entry.
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
@@ -300,8 +308,6 @@ machine(MachineType:TCC, "TCC Cache")
             Addr victim :=  L2cache.cacheProbe(in_msg.addr);
             trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
           }
-        } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
-          trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected Response Message to Core");
         }
@@ -699,6 +705,12 @@ machine(MachineType:TCC, "TCC Cache")
       // woken up
       st_stallAndWaitRequest;
   }
+  transition(WIB, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
+      // by putting the stalled requests in a buffer, we reduce resource contention
+      // since they won't try again every cycle and will instead only try again once
+      // woken up
+      st_stallAndWaitRequest;
+  }
   transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} {
       // by putting the stalled requests in a buffer, we reduce resource contention
       // since they won't try again every cycle and will instead only try again once
@@ -751,7 +763,7 @@ machine(MachineType:TCC, "TCC Cache")
 // Transition to be called when a read request with SLC flag set arrives at
 // entry in state W. It evicts and invalidates the cache entry before
 // forwarding the request to global memory
-  transition(W, RdBypassEvict, I) {TagArrayRead} {
+  transition(W, RdBypassEvict, WIB) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
@@ -763,7 +775,7 @@ machine(MachineType:TCC, "TCC Cache")
 // Transition to be called when a read request with SLC flag set arrives at
 // entry in state M. It evicts and invalidates the cache entry before
 // forwarding the request to global memory to main memory
-  transition(M, RdBypassEvict, I) {TagArrayRead} {
+  transition(M, RdBypassEvict, WIB) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
@@ -785,7 +797,7 @@ machine(MachineType:TCC, "TCC Cache")
 
 // Transition to be called when a read request with SLC flag arrives at entry
 // in transient state. The request stalls until the pending transition is complete.
-  transition({WI, IV}, RdBypassEvict)  {
+  transition({WI, WIB, IV}, RdBypassEvict)  {
     st_stallAndWaitRequest;
   }
 
@@ -900,7 +912,7 @@ transition(I, Atomic, A) {TagArrayRead} {
     i_invL2;
   }
 
-  transition({A, IV, WI}, L2_Repl) {
+  transition({A, IV, WI, WIB}, L2_Repl) {
     i_invL2;
   }
 
@@ -919,7 +931,7 @@ transition(I, Atomic, A) {TagArrayRead} {
     pp_popProbeQueue;
   }
 
-  transition({A, IV, WI}, PrbInv) {
+  transition({A, IV, WI, WIB}, PrbInv) {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }
@@ -974,4 +986,8 @@ transition(I, Atomic, A) {TagArrayRead} {
     wada_wakeUpAllDependentsAddr;
     pr_popResponseQueue;
   }
+
+  transition(WIB, WBAck,I) {
+    pr_popResponseQueue;
+  }
 }