mem-ruby: Implement Atomic No Alloc Policy
Add alternative implementation to far atomics when the flag alloc_on_commit is false. The implementation fetches the data, performs the atomic and writes back the cache line to main memory. Co-authored-by: Fabian Schätzle <f.schaetzle@fz-juelich.de> Change-Id: I8797fbc68448e1866a292f4afeedd3613113dddd
This commit is contained in:
@@ -1058,13 +1058,29 @@ action(Initiate_AtomicReturn_LocalWrite, desc="") {
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_WriteBack, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
|
||||
tbe.actions.push(Event:SendSnpUnique);
|
||||
} else if (tbe.dir_sharers.count() > 0){
|
||||
// no one will send us data unless we explicitly ask
|
||||
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
|
||||
}
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:SendWriteNoSnp);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:SendWUData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
|
||||
@@ -1081,21 +1097,42 @@ action(Initiate_AtomicNoReturn_LocalWrite, desc="") {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_WriteBack, desc="") {
|
||||
if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) {
|
||||
tbe.actions.push(Event:SendSnpUnique);
|
||||
} else if (tbe.dir_sharers.count() > 0){
|
||||
// no one will send us data unless we explicitly ask
|
||||
tbe.actions.push(Event:SendSnpUniqueRetToSrc);
|
||||
}
|
||||
if (comp_anr) {
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:SendWriteNoSnp);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:SendWUData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_Forward, desc="") {
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendAtomicReturn);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.push(Event:SendARData);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
|
||||
tbe.dataToBeInvalid := true;
|
||||
@@ -1104,10 +1141,12 @@ action(Initiate_AtomicReturn_Forward, desc="") {
|
||||
action(Initiate_AtomicNoReturn_Forward, desc="") {
|
||||
if (comp_anr) {
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendAtomicNoReturn);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
}
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
@@ -1117,19 +1156,48 @@ action(Initiate_AtomicNoReturn_Forward, desc="") {
|
||||
tbe.dataToBeInvalid := true;
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_Miss_Alloc, desc="") {
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicReturn_Miss, desc="") {
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendDBIDResp_AR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:SendCompData_AR);
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:SendWriteNoSnp);
|
||||
tbe.actions.push(Event:SendWUData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_AtomicNoReturn_Miss_Alloc, desc="") {
|
||||
assert(is_HN);
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
if (comp_anr) {
|
||||
tbe.actions.push(Event:SendDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
tbe.actions.pushNB(Event:SendComp_ANR);
|
||||
} else {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
|
||||
action(Initiate_AtomicNoReturn_Miss, desc="") {
|
||||
assert(is_HN);
|
||||
tbe.actions.push(Event:SendReadNoSnp);
|
||||
@@ -1141,12 +1209,12 @@ action(Initiate_AtomicNoReturn_Miss, desc="") {
|
||||
tbe.actions.push(Event:SendCompDBIDResp_ANR);
|
||||
tbe.actions.pushNB(Event:WriteFEPipe);
|
||||
}
|
||||
|
||||
tbe.actions.push(Event:WriteFEPipe);
|
||||
tbe.actions.push(Event:CheckCacheFill);
|
||||
tbe.actions.push(Event:DelayAtomic);
|
||||
tbe.actions.push(Event:WriteBEPipe);
|
||||
tbe.actions.push(Event:TagArrayWrite);
|
||||
tbe.actions.push(Event:SendWriteNoSnp);
|
||||
tbe.actions.push(Event:SendWUData);
|
||||
tbe.dataToBeInvalid := true;
|
||||
tbe.actions.pushNB(Event:TagArrayWrite);
|
||||
}
|
||||
|
||||
action(Initiate_CopyBack, desc="") {
|
||||
@@ -1615,6 +1683,9 @@ action(Send_WriteNoSnp, desc="") {
|
||||
// so addExpectedCount
|
||||
tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp);
|
||||
tbe.expected_req_resp.addExpectedCount(1);
|
||||
|
||||
// If we are WB after AtomicReturn/NoReturn
|
||||
tbe.atomic_to_be_wb := true;
|
||||
}
|
||||
|
||||
action(Send_WriteNoSnp_Partial, desc="") {
|
||||
@@ -2948,7 +3019,8 @@ action(Send_Data, desc="") {
|
||||
}
|
||||
tbe.snd_pendBytes.setMask(offset, range, false);
|
||||
|
||||
if (tbe.reqType == CHIRequestType:AtomicReturn){
|
||||
if ((tbe.reqType == CHIRequestType:AtomicReturn) &&
|
||||
(tbe.atomic_to_be_wb == false)){
|
||||
out_msg.dataBlk := tbe.oldDataBlk;
|
||||
} else {
|
||||
out_msg.dataBlk := tbe.dataBlk;
|
||||
|
||||
@@ -449,6 +449,8 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes"
|
||||
|
||||
tbe.atomic_op.clear();
|
||||
tbe.atomic_op.orMask(in_msg.atomic_op);
|
||||
tbe.atomic_to_be_done := false;
|
||||
tbe.atomic_to_be_wb := false;
|
||||
|
||||
tbe.use_DMT := false;
|
||||
tbe.use_DCT := false;
|
||||
@@ -814,7 +816,7 @@ bool needCacheEntry(CHIRequestType req_type,
|
||||
(req_type == CHIRequestType:AtomicStore))) ||
|
||||
(alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) ||
|
||||
(alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) ||
|
||||
(req_type == CHIRequestType:AtomicNoReturn)));
|
||||
(req_type == CHIRequestType:AtomicNoReturn)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1274,13 +1276,17 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) {
|
||||
} else if (type == CHIRequestType:DvmSync_Initiate) {
|
||||
return Event:DvmSync_Initiate;
|
||||
} else if (type == CHIRequestType:AtomicReturn){
|
||||
if (is_HN) {
|
||||
if (is_HN && alloc_on_atomic) {
|
||||
return Event:AtomicReturn_PoC_Alloc;
|
||||
} else if (is_HN){
|
||||
return Event:AtomicReturn_PoC;
|
||||
} else {
|
||||
return Event:AtomicReturn;
|
||||
}
|
||||
} else if (type == CHIRequestType:AtomicNoReturn){
|
||||
if (is_HN) {
|
||||
if (is_HN && alloc_on_atomic) {
|
||||
return Event:AtomicNoReturn_PoC_Alloc;
|
||||
} else if (is_HN){
|
||||
return Event:AtomicNoReturn_PoC;
|
||||
} else {
|
||||
return Event:AtomicNoReturn;
|
||||
|
||||
@@ -448,7 +448,7 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) {
|
||||
|
||||
// AtomicReturn and AtomicNoReturn
|
||||
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC,
|
||||
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_Forward;
|
||||
@@ -457,7 +457,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC,
|
||||
UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_Forward;
|
||||
@@ -467,7 +467,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,
|
||||
}
|
||||
|
||||
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
AtomicReturn_PoC, BUSY_BLKD) {
|
||||
{AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_LocalWrite;
|
||||
Profile_Hit;
|
||||
@@ -476,7 +476,7 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
}
|
||||
|
||||
transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
{AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_LocalWrite;
|
||||
Profile_Hit;
|
||||
@@ -484,8 +484,8 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC},
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicReturn_PoC, BUSY_BLKD) {
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC},
|
||||
{AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
@@ -493,8 +493,26 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
transition({RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicReturn_PoC_Alloc, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_WriteBack;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({SD, SD_RSD, SD_RSC, SC, SC_RSC},
|
||||
{AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
@@ -502,6 +520,43 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU},
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicNoReturn_PoC_Alloc, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_LocalWrite;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition({RSC, RSD, RUSC, RUSD, RU},
|
||||
AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_WriteBack;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(I, AtomicReturn_PoC_Alloc, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_Miss_Alloc;
|
||||
Allocate_DirEntry;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
transition(I, AtomicNoReturn_PoC_Alloc, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicNoReturn_Miss_Alloc;
|
||||
Allocate_DirEntry;
|
||||
Profile_Miss;
|
||||
Pop_ReqRdyQueue;
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
|
||||
transition(I, AtomicReturn_PoC, BUSY_BLKD) {
|
||||
Initiate_Request;
|
||||
Initiate_AtomicReturn_Miss;
|
||||
@@ -520,7 +575,6 @@ transition(I, AtomicNoReturn_PoC, BUSY_BLKD) {
|
||||
ProcessNextState;
|
||||
}
|
||||
|
||||
|
||||
// Load / Store / Atomic from sequencer & Prefetch from prefetcher
|
||||
|
||||
transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) {
|
||||
@@ -880,8 +934,8 @@ transition({BUSY_BLKD,BUSY_INTR},
|
||||
WriteUnique,WriteUniquePtl_PoC,
|
||||
WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc,
|
||||
WriteUniqueZero,
|
||||
AtomicReturn,AtomicReturn_PoC,
|
||||
AtomicNoReturn,AtomicNoReturn_PoC,
|
||||
AtomicReturn,AtomicReturn_PoC, AtomicReturn_PoC_Alloc
|
||||
AtomicNoReturn,AtomicNoReturn_PoC, AtomicNoReturn_PoC_Alloc
|
||||
StashOnceShared,StashOnceUnique}) {
|
||||
StallRequest;
|
||||
}
|
||||
|
||||
@@ -325,7 +325,9 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
AtomicReturn, desc="", in_trans="yes";
|
||||
AtomicNoReturn, desc="", in_trans="yes";
|
||||
AtomicReturn_PoC, desc="", in_trans="yes";
|
||||
AtomicReturn_PoC_Alloc, desc="", in_trans="yes";
|
||||
AtomicNoReturn_PoC, desc="", in_trans="yes";
|
||||
AtomicNoReturn_PoC_Alloc, desc="", in_trans="yes";
|
||||
SnpCleanInvalid, desc="", in_trans="yes";
|
||||
SnpShared, desc="", in_trans="yes";
|
||||
SnpSharedFwd, desc="", in_trans="yes";
|
||||
@@ -661,6 +663,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
|
||||
// Atomic info associated with the transaction
|
||||
WriteMask atomic_op, desc="Atomic Operation Wrapper";
|
||||
bool atomic_to_be_done, desc="We have yet to perform the atomic";
|
||||
bool atomic_to_be_wb, desc="We are writebacking the atomic";
|
||||
|
||||
// NOTE: seqReq is a smart pointer pointing to original CPU request object
|
||||
// that triggers transactions associated with this TBE. seqReq carries some
|
||||
|
||||
Reference in New Issue
Block a user