diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index 5eefc3ba9b..3db54934c6 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -1058,13 +1058,29 @@ action(Initiate_AtomicReturn_LocalWrite, desc="") { tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } +action(Initiate_AtomicReturn_WriteBack, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} action(Initiate_AtomicNoReturn_LocalWrite, desc="") { if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { @@ -1081,21 +1097,42 @@ action(Initiate_AtomicNoReturn_LocalWrite, desc="") { tbe.actions.push(Event:SendCompDBIDResp_ANR); tbe.actions.pushNB(Event:WriteFEPipe); } - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } +action(Initiate_AtomicNoReturn_WriteBack, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + if (comp_anr) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} action(Initiate_AtomicReturn_Forward, desc="") { - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.push(Event:SendARData); - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:WriteBEPipe); tbe.actions.pushNB(Event:TagArrayWrite); tbe.dataToBeInvalid := true; @@ -1104,10 +1141,12 @@ action(Initiate_AtomicReturn_Forward, desc="") { action(Initiate_AtomicNoReturn_Forward, desc="") { if (comp_anr) { tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_ANR); tbe.actions.pushNB(Event:SendComp_ANR); } else { tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompDBIDResp_ANR); } tbe.actions.push(Event:WriteBEPipe); @@ -1117,19 +1156,48 @@ action(Initiate_AtomicNoReturn_Forward, desc="") { tbe.dataToBeInvalid := true; } +action(Initiate_AtomicReturn_Miss_Alloc, desc="") { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} + action(Initiate_AtomicReturn_Miss, desc="") { tbe.actions.push(Event:SendReadNoSnp); tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_AR); - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_AtomicNoReturn_Miss_Alloc, desc="") { + assert(is_HN); + tbe.actions.push(Event:SendReadNoSnp); + if (comp_anr) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } + action(Initiate_AtomicNoReturn_Miss, desc="") { assert(is_HN); tbe.actions.push(Event:SendReadNoSnp); @@ -1141,12 +1209,12 @@ action(Initiate_AtomicNoReturn_Miss, desc="") { tbe.actions.push(Event:SendCompDBIDResp_ANR); tbe.actions.pushNB(Event:WriteFEPipe); } - - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); tbe.actions.push(Event:WriteBEPipe); - tbe.actions.push(Event:TagArrayWrite); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); } action(Initiate_CopyBack, desc="") { @@ -1615,6 +1683,9 @@ action(Send_WriteNoSnp, desc="") { // so addExpectedCount tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); tbe.expected_req_resp.addExpectedCount(1); + + // If we are WB after AtomicReturn/NoReturn + tbe.atomic_to_be_wb := true; } action(Send_WriteNoSnp_Partial, desc="") { @@ -2948,7 +3019,8 @@ action(Send_Data, desc="") { } tbe.snd_pendBytes.setMask(offset, range, false); - if (tbe.reqType == CHIRequestType:AtomicReturn){ + if ((tbe.reqType == CHIRequestType:AtomicReturn) && + (tbe.atomic_to_be_wb == false)){ out_msg.dataBlk := tbe.oldDataBlk; } else { out_msg.dataBlk := tbe.dataBlk; diff --git a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm index fbafda61cd..9fda5ba052 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm @@ -449,6 +449,8 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" tbe.atomic_op.clear(); tbe.atomic_op.orMask(in_msg.atomic_op); + tbe.atomic_to_be_done := false; + tbe.atomic_to_be_wb := false; tbe.use_DMT := false; tbe.use_DCT := false; @@ -814,7 +816,7 @@ bool needCacheEntry(CHIRequestType req_type, (req_type == CHIRequestType:AtomicStore))) || (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) || (alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) || - (req_type == CHIRequestType:AtomicNoReturn))); + (req_type == CHIRequestType:AtomicNoReturn))); } } @@ -1274,13 +1276,17 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) { } else if (type == CHIRequestType:DvmSync_Initiate) { return Event:DvmSync_Initiate; } else if (type == CHIRequestType:AtomicReturn){ - if (is_HN) { + if (is_HN && alloc_on_atomic) { + return Event:AtomicReturn_PoC_Alloc; + } else if (is_HN){ return Event:AtomicReturn_PoC; } else { return Event:AtomicReturn; } } else if (type == CHIRequestType:AtomicNoReturn){ - if (is_HN) { + if (is_HN && alloc_on_atomic) { + return Event:AtomicNoReturn_PoC_Alloc; + } else if (is_HN){ return Event:AtomicNoReturn_PoC; } else { return Event:AtomicNoReturn; diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm index 1f6f906c0c..012e5f4ce5 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm @@ -448,7 +448,7 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) { // AtomicReturn and AtomicNoReturn -transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC, UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_Forward; @@ -457,7 +457,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, ProcessNextState; } -transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC, UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_Forward; @@ -467,7 +467,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, } transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, - AtomicReturn_PoC, BUSY_BLKD) { + {AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_LocalWrite; Profile_Hit; @@ -476,7 +476,7 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, } transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, - AtomicNoReturn_PoC, BUSY_BLKD) { + {AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_LocalWrite; Profile_Hit; @@ -484,8 +484,8 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, ProcessNextState; } -transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, - AtomicReturn_PoC, BUSY_BLKD) { +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_LocalWrite; Profile_Miss; @@ -493,8 +493,26 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, ProcessNextState; } -transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, - AtomicNoReturn_PoC, BUSY_BLKD) { +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_WriteBack; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_LocalWrite; Profile_Miss; @@ -502,6 +520,43 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, ProcessNextState; } +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicNoReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicNoReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_WriteBack; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_Miss_Alloc; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicNoReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_Miss_Alloc; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + transition(I, AtomicReturn_PoC, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_Miss; @@ -520,7 +575,6 @@ transition(I, AtomicNoReturn_PoC, BUSY_BLKD) { ProcessNextState; } - // Load / Store / Atomic from sequencer & Prefetch from prefetcher transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) { @@ -880,8 +934,8 @@ transition({BUSY_BLKD,BUSY_INTR}, WriteUnique,WriteUniquePtl_PoC, WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc, WriteUniqueZero, - AtomicReturn,AtomicReturn_PoC, - AtomicNoReturn,AtomicNoReturn_PoC, + AtomicReturn,AtomicReturn_PoC, AtomicReturn_PoC_Alloc + AtomicNoReturn,AtomicNoReturn_PoC, AtomicNoReturn_PoC_Alloc StashOnceShared,StashOnceUnique}) { StallRequest; } diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index 487fe60026..6a74045c23 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -325,7 +325,9 @@ machine(MachineType:Cache, "Cache coherency protocol") : AtomicReturn, desc="", in_trans="yes"; AtomicNoReturn, desc="", in_trans="yes"; AtomicReturn_PoC, desc="", in_trans="yes"; + AtomicReturn_PoC_Alloc, desc="", in_trans="yes"; AtomicNoReturn_PoC, desc="", in_trans="yes"; + AtomicNoReturn_PoC_Alloc, desc="", in_trans="yes"; SnpCleanInvalid, desc="", in_trans="yes"; SnpShared, desc="", in_trans="yes"; SnpSharedFwd, desc="", in_trans="yes"; @@ -661,6 +663,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : // Atomic info associated with the transaction WriteMask atomic_op, desc="Atomic Operation Wrapper"; bool atomic_to_be_done, desc="We have yet to perform the atomic"; + bool atomic_to_be_wb, desc="We are writebacking the atomic"; // NOTE: seqReq is a smart pointer pointing to original CPU request object // that triggers transactions associated with this TBE. seqReq carries some