mem-ruby: Remove DirectoryMemory storage in MOESI_AMD_BASE-dir

This protocol is using an old style where read/writes to memory were
being done by writing to a DataBlock in a DirectoryMemory entry. This
results in having multiple copies of memory, leads to stale copies in at
least one memory (usually DRAM), and require --access-backing-store in
most cases to work properly. This changeset removes all references to
getDirectoryEntry(...).DataBlk and instead forwards those reads and
writes to DRAM always.

This results in new transient states BL_WM, BDW_WM, and B_WM which are
blocked states waiting on memory acks indicating a write request is
complete. The appropriate transitions are updates to move to these new
states and stall states are updated to include them. DMA write ACK is
also moved to when the request is sent to memory, rather than when the
request is received.

Change-Id: Ic5bd6a8a8881d7df782e0f7eed8be9d873610e04
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/56446
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2022-02-04 14:25:32 -06:00
parent 54fc137945
commit 7018c2b34e

View File

@@ -61,26 +61,29 @@ machine(MachineType:Directory, "AMD Baseline protocol")
{
// STATES
state_declaration(State, desc="Directory states", default="Directory_State_U") {
U, AccessPermission:Backing_Store, desc="unblocked";
BL, AccessPermission:Busy, desc="got L3 WB request";
U, AccessPermission:Backing_Store, desc="unblocked";
// BL is Busy because it's possible for the data only to be in the network
// in the WB, L3 has sent it and gone on with its business in possibly I
// state.
BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory";
BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory";
BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory";
BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory";
BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
BL, AccessPermission:Busy, desc="got L3 WB request";
BL_WM, AccessPermission:Busy, desc="writing L3 WB to memory, waiting for ack";
BDR_M, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for memory";
BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory";
BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory";
BDR_PM, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes and memory";
BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory";
BDW_P, AccessPermission:Backing_Store, desc="DMA write, blocked waiting for probes, no need for memory";
BDW_WM, AccessPermission:Backing_Store, desc="DMA write, writing to memory, waiting for ack";
BDR_Pm, AccessPermission:Backing_Store, desc="DMA read, blocked waiting for probes, already got memory";
BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory";
B_WM, AccessPermission:Backing_Store, desc="writing to memory, waiting for ack";
B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack";
}
// Events
@@ -132,7 +135,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
// DirectoryEntry
structure(Entry, desc="...", interface="AbstractCacheEntry", main="false") {
State DirectoryState, desc="Directory state";
DataBlock DataBlk, desc="data for the block";
NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore";
}
@@ -195,16 +197,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
return dir_entry;
}
DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
TBE tbe := TBEs.lookup(addr);
if (is_valid(tbe) && tbe.MemData) {
DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe);
return tbe.DataBlk;
}
DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr));
return getDirectoryEntry(addr).DataBlk;
}
State getState(TBE tbe, CacheEntry entry, Addr addr) {
return getDirectoryEntry(addr).DirectoryState;
}
@@ -379,7 +371,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
trigger(Event:MemData, in_msg.addr, entry, tbe);
DPRINTF(RubySlicc, "%s\n", in_msg);
} else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them.
trigger(Event:WBAck, in_msg.addr, entry, tbe);
} else {
DPRINTF(RubySlicc, "%s\n", in_msg.Type);
error("Invalid message");
@@ -920,7 +912,13 @@ machine(MachineType:Directory, "AMD Baseline protocol")
action(d_writeDataToMemory, "d", desc="Write data to memory") {
peek(responseNetwork_in, ResponseMsg) {
getDirectoryEntry(address).DataBlk := in_msg.DataBlk;
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
out_msg.Sender := machineID;
out_msg.MessageSize := MessageSizeType:Writeback_Data;
out_msg.DataBlk := in_msg.DataBlk;
}
if (tbe.Dirty == false) {
// have to update the TBE, too, because of how this
// directory deals with functional writes
@@ -966,7 +964,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
tbe.Dirty := false;
if (in_msg.Type == CoherenceRequestType:WriteThrough) {
tbe.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
@@ -980,30 +977,37 @@ machine(MachineType:Directory, "AMD Baseline protocol")
}
action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") {
if (tbe.Dirty == false) {
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
}
TBEs.deallocate(address);
unset_tbe();
}
action(wd_writeBackData, "wd", desc="Write back data if needed") {
if (tbe.wtData) {
getDirectoryEntry(address).DataBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
} else if (tbe.atomicData) {
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
} else if (tbe.Dirty == false) {
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
if (tbe.wtData || tbe.atomicData || tbe.Dirty == false) {
if (tbe.atomicData) {
tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
}
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
out_msg.Type := MemoryRequestType:MEMORY_WB;
out_msg.Sender := machineID;
out_msg.MessageSize := MessageSizeType:Writeback_Data;
out_msg.DataBlk := tbe.DataBlk;
out_msg.Len := tbe.Len;
DPRINTF(ProtocolTrace, "%s\n", out_msg);
}
}
}
action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") {
peek(memQueue_in, MemoryMsg) {
if (tbe.wtData == true) {
// do nothing
// Keep the write-through data based on mask, but use the memory block
// for the masked-off data.
DataBlock tmpBlk := in_msg.DataBlk;
tmpBlk.copyPartial(tbe.DataBlk, tbe.writeMask);
tbe.DataBlk := tmpBlk;
} else if (tbe.Dirty == false) {
tbe.DataBlk := getDirectoryEntry(address).DataBlk;
tbe.DataBlk := in_msg.DataBlk;
}
tbe.MemData := true;
}
@@ -1183,6 +1187,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
stall_and_wait(dmaRequestQueue_in, address);
}
action(su_stallAndWaitRequest, "su", desc="Stall and wait on the address") {
stall_and_wait(unblockNetwork_in, address);
}
action(wad_wakeUpDependents, "wad", desc="Wake up any requests waiting for this address") {
wakeUpBuffers(address);
}
@@ -1199,18 +1207,18 @@ machine(MachineType:Directory, "AMD Baseline protocol")
}
// TRANSITIONS
transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {RdBlkS, RdBlkM, RdBlk, CtoD}) {
st_stallAndWaitRequest;
}
// It may be possible to save multiple invalidations here!
transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B}, {Atomic, WriteThrough}) {
transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {Atomic, WriteThrough}) {
st_stallAndWaitRequest;
}
// The exit state is always going to be U, so wakeUpDependents logic should be covered in all the
// transitions which are flowing into U.
transition({BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, {DmaRead,DmaWrite}){
transition({BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, {DmaRead,DmaWrite}){
sd_stallAndWaitRequest;
}
@@ -1233,7 +1241,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
transition(U, DmaWrite, BDW_P) {L3TagArrayRead} {
atd_allocateTBEforDMA;
da_sendResponseDmaAck;
icd_probeInvCoreDataForDMA;
pd_popDmaRequestQueue;
}
@@ -1293,12 +1300,10 @@ machine(MachineType:Directory, "AMD Baseline protocol")
zz_recycleRequestQueue;
}
transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} {
transition(BL, CPUData, BL_WM) {L3TagArrayWrite, L3DataArrayWrite} {
d_writeDataToMemory;
al_allocateL3Block;
pr_profileL3HitMiss; //Must come after al_allocateL3Block and before dt_deallocateTBE
wad_wakeUpDependents;
dt_deallocateTBE;
pr_popResponseQueue;
}
@@ -1308,7 +1313,13 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pr_popResponseQueue;
}
transition({B, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
transition(BL_WM, WBAck, U) {
wad_wakeUpDependents;
dt_deallocateTBE;
pm_popMemQueue;
}
transition({B, B_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm}, {VicDirty, VicClean}) {
z_stall;
}
@@ -1316,7 +1327,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pm_popMemQueue;
}
transition({U, BL, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B}, StaleVicDirty) {
transition({U, BL, BL_WM, BDR_M, BS_M, BM_M, B_M, BP, BDR_PM, BDW_P, BDW_WM, BS_PM, BM_PM, B_PM, BDR_Pm, BS_Pm, BM_Pm, B_Pm, B, B_WM}, StaleVicDirty) {
rv_removeVicDirtyIgnore;
w_sendResponseWBAck;
p_popRequestQueue;
@@ -1376,7 +1387,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pm_popMemQueue;
}
transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
transition(BS_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
s_sendResponseS;
wd_writeBackData;
@@ -1385,7 +1396,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pm_popMemQueue;
}
transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
transition(BM_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
m_sendResponseM;
wd_writeBackData;
@@ -1394,7 +1405,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pm_popMemQueue;
}
transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} {
transition(B_M, MemData, B_WM){L3TagArrayWrite, L3DataArrayWrite} {
mt_writeMemDataToTBE;
es_sendResponseES;
wd_writeBackData;
@@ -1403,7 +1414,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pm_popMemQueue;
}
transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} {
transition(BS_M, L3Hit, B_WM) {L3TagArrayWrite, L3DataArrayWrite} {
s_sendResponseS;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1411,7 +1422,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
ptl_popTriggerQueue;
}
transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
transition(BM_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
m_sendResponseM;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1419,7 +1430,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
ptl_popTriggerQueue;
}
transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} {
transition(B_M, L3Hit, B_WM) {L3DataArrayWrite, L3TagArrayWrite} {
es_sendResponseES;
wd_writeBackData;
alwt_allocateL3BlockOnWT;
@@ -1453,12 +1464,18 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pt_popTriggerQueue;
}
transition(BDW_P, ProbeAcksComplete, U) {
transition(BDW_P, ProbeAcksComplete, BDW_WM) {
wd_writeBackData;
da_sendResponseDmaAck;
pt_popTriggerQueue;
}
transition(BDW_WM, WBAck, U) {
// Check for pending requests from the core we put to sleep while waiting
// for a response
wada_wakeUpAllDependentsAddr;
dt_deallocateTBE;
pt_popTriggerQueue;
pm_popMemQueue;
}
transition(BDR_Pm, ProbeAcksComplete, U) {
@@ -1470,7 +1487,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pt_popTriggerQueue;
}
transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
transition(BS_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
sf_setForwardReqTime;
s_sendResponseS;
wd_writeBackData;
@@ -1479,7 +1496,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pt_popTriggerQueue;
}
transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
transition(BM_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
sf_setForwardReqTime;
m_sendResponseM;
wd_writeBackData;
@@ -1488,7 +1505,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pt_popTriggerQueue;
}
transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} {
transition(B_Pm, ProbeAcksComplete, B_WM){L3DataArrayWrite, L3TagArrayWrite} {
sf_setForwardReqTime;
es_sendResponseES;
wd_writeBackData;
@@ -1497,7 +1514,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
pt_popTriggerQueue;
}
transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} {
transition(BP, ProbeAcksComplete, B_WM){L3TagArrayWrite, L3TagArrayWrite} {
sf_setForwardReqTime;
c_sendResponseCtoD;
wd_writeBackData;
@@ -1505,4 +1522,17 @@ machine(MachineType:Directory, "AMD Baseline protocol")
dt_deallocateTBE;
pt_popTriggerQueue;
}
transition(B_WM, WBAck, B) {
wada_wakeUpAllDependentsAddr;
pm_popMemQueue;
}
transition(B_WM, UnblockWriteThrough) {
z_stall;
}
transition(B_WM, CoreUnblock) {
su_stallAndWaitRequest;
}
}