mem-ruby: Implement Atomic No Alloc Policy

Add alternative implementation to far atomics when the flag alloc_on_commit
is false. The implementation fetches the data, performs the atomic and
writes back the cache line to main memory.

Co-authored-by: Fabian Schätzle <f.schaetzle@fz-juelich.de>
Change-Id: I8797fbc68448e1866a292f4afeedd3613113dddd
This commit is contained in:
Víctor Soria Pardos
2024-04-04 09:34:34 +02:00
parent 5a6a3be6da
commit 98358da968
4 changed files with 163 additions and 28 deletions

View File

@@ -1058,13 +1058,29 @@ action(Initiate_AtomicReturn_LocalWrite, desc="") {
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicReturn_WriteBack, desc="") {
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
tbe.actions.push(Event:SendSnpUnique);
} else if (tbe.dir_sharers.count() > 0){
// no one will send us data unless we explicitly ask
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
}
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:SendWriteNoSnp);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:SendWUData);
tbe.dataToBeInvalid := true;
tbe.actions.pushNB(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
@@ -1081,21 +1097,42 @@ action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_WriteBack, desc="") {
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
tbe.actions.push(Event:SendSnpUnique);
} else if (tbe.dir_sharers.count() > 0){
// no one will send us data unless we explicitly ask
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
}
if (comp_anr) {
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:SendWriteNoSnp);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:SendWUData);
tbe.dataToBeInvalid := true;
tbe.actions.pushNB(Event:TagArrayWrite);
}
action(Initiate_AtomicReturn_Forward, desc="") {
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendAtomicReturn);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.push(Event:SendARData);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.pushNB(Event:TagArrayWrite);
tbe.dataToBeInvalid := true;
@@ -1104,10 +1141,12 @@ action(Initiate_AtomicReturn_Forward, desc="") {
action(Initiate_AtomicNoReturn_Forward, desc="") {
if (comp_anr) {
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendAtomicNoReturn);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendCompDBIDResp_ANR);
}
tbe.actions.push(Event:WriteBEPipe);
@@ -1117,19 +1156,48 @@ action(Initiate_AtomicNoReturn_Forward, desc="") {
tbe.dataToBeInvalid := true;
}
action(Initiate_AtomicReturn_Miss_Alloc, desc="") {
tbe.actions.push(Event:SendReadNoSnp);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicReturn_Miss, desc="") {
tbe.actions.push(Event:SendReadNoSnp);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendDBIDResp_AR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.push(Event:SendCompData_AR);
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:SendWriteNoSnp);
tbe.actions.push(Event:SendWUData);
tbe.dataToBeInvalid := true;
tbe.actions.pushNB(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_Miss_Alloc, desc="") {
assert(is_HN);
tbe.actions.push(Event:SendReadNoSnp);
if (comp_anr) {
tbe.actions.push(Event:SendDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
tbe.actions.pushNB(Event:SendComp_ANR);
} else {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
}
action(Initiate_AtomicNoReturn_Miss, desc="") {
assert(is_HN);
tbe.actions.push(Event:SendReadNoSnp);
@@ -1141,12 +1209,12 @@ action(Initiate_AtomicNoReturn_Miss, desc="") {
tbe.actions.push(Event:SendCompDBIDResp_ANR);
tbe.actions.pushNB(Event:WriteFEPipe);
}
tbe.actions.push(Event:WriteFEPipe);
tbe.actions.push(Event:CheckCacheFill);
tbe.actions.push(Event:DelayAtomic);
tbe.actions.push(Event:WriteBEPipe);
tbe.actions.push(Event:TagArrayWrite);
tbe.actions.push(Event:SendWriteNoSnp);
tbe.actions.push(Event:SendWUData);
tbe.dataToBeInvalid := true;
tbe.actions.pushNB(Event:TagArrayWrite);
}
action(Initiate_CopyBack, desc="") {
@@ -1615,6 +1683,9 @@ action(Send_WriteNoSnp, desc="") {
// so addExpectedCount
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp);
tbe.expected_req_resp.addExpectedCount(1);
// If we are WB after AtomicReturn/NoReturn
tbe.atomic_to_be_wb := true;
}
action(Send_WriteNoSnp_Partial, desc="") {
@@ -2948,7 +3019,8 @@ action(Send_Data, desc="") {
}
tbe.snd_pendBytes.setMask(offset, range, false);
if (tbe.reqType == CHIRequestType:AtomicReturn){
if ((tbe.reqType == CHIRequestType:AtomicReturn) &&
(tbe.atomic_to_be_wb == false)){
out_msg.dataBlk := tbe.oldDataBlk;
} else {
out_msg.dataBlk := tbe.dataBlk;

View File

@@ -449,6 +449,8 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
tbe.atomic_op.clear();
tbe.atomic_op.orMask(in_msg.atomic_op);
tbe.atomic_to_be_done := false;
tbe.atomic_to_be_wb := false;
tbe.use_DMT := false;
tbe.use_DCT := false;
@@ -814,7 +816,7 @@ bool needCacheEntry(CHIRequestType req_type,
(req_type == CHIRequestType:AtomicStore))) ||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) ||
(alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) ||
(req_type == CHIRequestType:AtomicNoReturn)));
(req_type == CHIRequestType:AtomicNoReturn)));
}
}
@@ -1274,13 +1276,17 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
} else if (type == CHIRequestType:DvmSync_Initiate) {
return Event:DvmSync_Initiate;
} else if (type == CHIRequestType:AtomicReturn){
if (is_HN) {
if (is_HN && alloc_on_atomic) {
return Event:AtomicReturn_PoC_Alloc;
} else if (is_HN){
return Event:AtomicReturn_PoC;
} else {
return Event:AtomicReturn;
}
} else if (type == CHIRequestType:AtomicNoReturn){
if (is_HN) {
if (is_HN && alloc_on_atomic) {
return Event:AtomicNoReturn_PoC_Alloc;
} else if (is_HN){
return Event:AtomicNoReturn_PoC;
} else {
return Event:AtomicNoReturn;

View File

@@ -448,7 +448,7 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) {
// AtomicReturn and AtomicNoReturn
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC,
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_Forward;
@@ -457,7 +457,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
ProcessNextState;
}
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC,
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_Forward;
@@ -467,7 +467,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
}
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
AtomicReturn_PoC, BUSY_BLKD) {
{AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_LocalWrite;
Profile_Hit;
@@ -476,7 +476,7 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
}
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
AtomicNoReturn_PoC, BUSY_BLKD) {
{AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_LocalWrite;
Profile_Hit;
@@ -484,8 +484,8 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
ProcessNextState;
}
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
AtomicReturn_PoC, BUSY_BLKD) {
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC},
{AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_LocalWrite;
Profile_Miss;
@@ -493,8 +493,26 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
ProcessNextState;
}
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
AtomicNoReturn_PoC, BUSY_BLKD) {
transition({RSC, RSD, RUSC, RUSD, RU},
AtomicReturn_PoC_Alloc, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_LocalWrite;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({RSC, RSD, RUSC, RUSD, RU},
AtomicReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_WriteBack;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC},
{AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_LocalWrite;
Profile_Miss;
@@ -502,6 +520,43 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
ProcessNextState;
}
transition({RSC, RSD, RUSC, RUSD, RU},
AtomicNoReturn_PoC_Alloc, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_LocalWrite;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition({RSC, RSD, RUSC, RUSD, RU},
AtomicNoReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_WriteBack;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicReturn_PoC_Alloc, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_Miss_Alloc;
Allocate_DirEntry;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicNoReturn_PoC_Alloc, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicNoReturn_Miss_Alloc;
Allocate_DirEntry;
Profile_Miss;
Pop_ReqRdyQueue;
ProcessNextState;
}
transition(I, AtomicReturn_PoC, BUSY_BLKD) {
Initiate_Request;
Initiate_AtomicReturn_Miss;
@@ -520,7 +575,6 @@ transition(I, AtomicNoReturn_PoC, BUSY_BLKD) {
ProcessNextState;
}
// Load / Store / Atomic from sequencer & Prefetch from prefetcher
transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) {
@@ -880,8 +934,8 @@ transition({BUSY_BLKD,BUSY_INTR},
WriteUnique,WriteUniquePtl_PoC,
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc,
WriteUniqueZero,
AtomicReturn,AtomicReturn_PoC,
AtomicNoReturn,AtomicNoReturn_PoC,
AtomicReturn,AtomicReturn_PoC, AtomicReturn_PoC_Alloc
AtomicNoReturn,AtomicNoReturn_PoC, AtomicNoReturn_PoC_Alloc
StashOnceShared,StashOnceUnique}) {
StallRequest;
}

View File

@@ -325,7 +325,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
AtomicReturn, desc="", in_trans="yes";
AtomicNoReturn, desc="", in_trans="yes";
AtomicReturn_PoC, desc="", in_trans="yes";
AtomicReturn_PoC_Alloc, desc="", in_trans="yes";
AtomicNoReturn_PoC, desc="", in_trans="yes";
AtomicNoReturn_PoC_Alloc, desc="", in_trans="yes";
SnpCleanInvalid, desc="", in_trans="yes";
SnpShared, desc="", in_trans="yes";
SnpSharedFwd, desc="", in_trans="yes";
@@ -661,6 +663,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
// Atomic info associated with the transaction
WriteMask atomic_op, desc="Atomic Operation Wrapper";
bool atomic_to_be_done, desc="We have yet to perform the atomic";
bool atomic_to_be_wb, desc="We are writebacking the atomic";
// NOTE: seqReq is a smart pointer pointing to original CPU request object
// that triggers transactions associated with this TBE. seqReq carries some