diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index b8757c273d..fae9f1ba9c 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -555,6 +555,7 @@ def construct_dirs(options, system, ruby_system, network): dir_cntrl.create(options, dir_ranges, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT + dir_cntrl.L2isWB = options.WB_L2 # the number_of_TBEs is inclusive of TBEs below # Connect the Directory controller to the ruby network diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index be1243aaa5..d1905c3b96 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -300,16 +300,22 @@ machine(MachineType:TCC, "TCC Cache") // checked when the read response is received. if (in_msg.Type == CoherenceResponseType:NBSysWBAck) { trigger(Event:WBAck, in_msg.addr, cache_entry, tbe); - } else if (is_slc_set) { - // If the SLC bit is set, the response needs to bypass the cache - // and should not be allocated an entry. + } else if(in_msg.Type == CoherenceResponseType:NBSysResp) { + // If the SLC bit is set or the cache is write-through and + // we're receiving modified data (such as from an atomic), + // the response needs to bypass the cache and should not be + // allocated an entry. + if(is_slc_set || (!WB && in_msg.State == CoherenceState:Modified)) { trigger(Event:Bypass, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceResponseType:NBSysResp) { - if(presentOrAvail(in_msg.addr)) { - trigger(Event:Data, in_msg.addr, cache_entry, tbe); } else { - Addr victim := L2cache.cacheProbe(in_msg.addr); - trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + if(presentOrAvail(in_msg.addr)) { + // Responses with atomic data will only reach here if the + // SLC bit isn't set and the cache is WB + trigger(Event:Data, in_msg.addr, cache_entry, tbe); + } else { + Addr victim := L2cache.cacheProbe(in_msg.addr); + trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); + } } } else { error("Unexpected Response Message to Core"); @@ -358,13 +364,13 @@ machine(MachineType:TCC, "TCC Cache") trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); } } else if (in_msg.Type == CoherenceRequestType:Atomic) { - // If the request is system-level, or if the address isn't in the cache, - // then send the request to the directory. Since non-SLC atomics won't be - // performed by the directory, TCC will perform the atomic on the return path - // on Event:Data. + // If the request is system-level, if the address isn't in the cache, + // or if this cache is write-through, then send the request to the + // directory. Since non-SLC atomics won't be performed by the directory, + // TCC will perform the atomic on the return path on Event:Data. // The action will invalidate the cache line if SLC is set and the address is // in the cache. - if(in_msg.isSLCSet || !presentOrAvail(in_msg.addr)) { + if(in_msg.isSLCSet || !WB || !presentOrAvail(in_msg.addr)) { trigger(Event:AtomicPassOn, in_msg.addr, cache_entry, tbe); } else { trigger(Event:Atomic, in_msg.addr, cache_entry, tbe); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index eed750832f..c3bbfa1950 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -39,6 +39,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") bool GPUonly := "False"; int TCC_select_num_bits; bool useL3OnWT := "False"; + bool L2isWB; Cycles to_memory_controller_latency := 1; // DMA @@ -1039,11 +1040,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") action(wd_writeBackData, "wd", desc="Write back data if needed") { if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) { - // If SLC is not set, the atomic is handled in the L2 - // Atomic needs to be done at the L3 only if this is - // not the case - - if (tbe.atomicData && tbe.isSLCSet) { + // Only perform atomics in the directory if the SLC bit is set, or + // if the L2 is WT + if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) { tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask); } enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {