mem-ruby: Always pass on GPU atomics to dir in write-through TCC (#367)

Added checks to ensure that atomics are not performed in the TCC when it
is configured as a write-through cache. Also added SLC bit overwrite to
ensure directory preforms atomics when there is a write-through TCC.

Change-Id: I4514e6c8022aeb7785f2c59871cd9acec8161ed8
This commit is contained in:
Daniel Kouchekinia
2023-10-14 08:39:50 -05:00
committed by GitHub
parent a3c51ca38c
commit 4931fb0010
3 changed files with 24 additions and 18 deletions

View File

@@ -555,6 +555,7 @@ def construct_dirs(options, system, ruby_system, network):
dir_cntrl.create(options, dir_ranges, ruby_system, system)
dir_cntrl.number_of_TBEs = options.num_tbes
dir_cntrl.useL3OnWT = options.use_L3_on_WT
dir_cntrl.L2isWB = options.WB_L2
# the number_of_TBEs is inclusive of TBEs below
# Connect the Directory controller to the ruby network

View File

@@ -300,16 +300,22 @@ machine(MachineType:TCC, "TCC Cache")
// checked when the read response is received.
if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
} else if (is_slc_set) {
// If the SLC bit is set, the response needs to bypass the cache
// and should not be allocated an entry.
} else if(in_msg.Type == CoherenceResponseType:NBSysResp) {
// If the SLC bit is set or the cache is write-through and
// we're receiving modified data (such as from an atomic),
// the response needs to bypass the cache and should not be
// allocated an entry.
if(is_slc_set || (!WB && in_msg.State == CoherenceState:Modified)) {
trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
} else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
if(presentOrAvail(in_msg.addr)) {
trigger(Event:Data, in_msg.addr, cache_entry, tbe);
} else {
Addr victim := L2cache.cacheProbe(in_msg.addr);
trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
if(presentOrAvail(in_msg.addr)) {
// Responses with atomic data will only reach here if the
// SLC bit isn't set and the cache is WB
trigger(Event:Data, in_msg.addr, cache_entry, tbe);
} else {
Addr victim := L2cache.cacheProbe(in_msg.addr);
trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
}
}
} else {
error("Unexpected Response Message to Core");
@@ -358,13 +364,13 @@ machine(MachineType:TCC, "TCC Cache")
trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
}
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
// If the request is system-level, or if the address isn't in the cache,
// then send the request to the directory. Since non-SLC atomics won't be
// performed by the directory, TCC will perform the atomic on the return path
// on Event:Data.
// If the request is system-level, if the address isn't in the cache,
// or if this cache is write-through, then send the request to the
// directory. Since non-SLC atomics won't be performed by the directory,
// TCC will perform the atomic on the return path on Event:Data.
// The action will invalidate the cache line if SLC is set and the address is
// in the cache.
if(in_msg.isSLCSet || !presentOrAvail(in_msg.addr)) {
if(in_msg.isSLCSet || !WB || !presentOrAvail(in_msg.addr)) {
trigger(Event:AtomicPassOn, in_msg.addr, cache_entry, tbe);
} else {
trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);

View File

@@ -39,6 +39,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
bool GPUonly := "False";
int TCC_select_num_bits;
bool useL3OnWT := "False";
bool L2isWB;
Cycles to_memory_controller_latency := 1;
// DMA
@@ -1039,11 +1040,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
action(wd_writeBackData, "wd", desc="Write back data if needed") {
if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
// If SLC is not set, the atomic is handled in the L2
// Atomic needs to be done at the L3 only if this is
// not the case
if (tbe.atomicData && tbe.isSLCSet) {
// Only perform atomics in the directory if the SLC bit is set, or
// if the L2 is WT
if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) {
tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
}
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {