From 7ee574b3092cddaf51944e3675c187bf9a4eb92d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Soria=20Pardos?= Date: Tue, 26 Mar 2024 16:04:23 +0100 Subject: [PATCH 1/3] mem-ruby: Remove AtomicReturn_NoWait from CHI To make Atomic transaction recursive and enable 2-level config, remove AtomicReturn_NoWait and other level-dependent code GitHub Issue: https://github.com/gem5/gem5/issues/882 Change-Id: Iac468cdb8a3b5914c8f05c5cedde866ce85f359a --- .../ruby/protocol/chi/CHI-cache-actions.sm | 54 +++++++------------ .../protocol/chi/CHI-cache-transitions.sm | 11 +--- src/mem/ruby/protocol/chi/CHI-cache.sm | 3 +- 3 files changed, 21 insertions(+), 47 deletions(-) diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index 5d614dcc24..5fa87420d8 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -823,7 +823,8 @@ action(Initiate_AtomicReturn_I, desc="") { tbe.atomic_to_be_done := true; } else if ((policy_type == 1) || // UNIQUE NEAR (policy_type == 2)) { // PRESENT NEAR - tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.push(Event:SendARData); tbe.dataToBeInvalid := true; tbe.doCacheFill := false; tbe.atomic_to_be_done := false; @@ -860,7 +861,8 @@ action(Initiate_AtomicReturn_SD, desc="") { tbe.actions.push(Event:TagArrayWrite); tbe.atomic_to_be_done := true; } else if (policy_type == 1) { // UNIQUE NEAR - tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.push(Event:SendARData); tbe.dataToBeInvalid := true; tbe.doCacheFill := false; tbe.atomic_to_be_done := false; @@ -897,7 +899,8 @@ action(Initiate_AtomicReturn_SC, desc="") { tbe.actions.push(Event:TagArrayWrite); tbe.atomic_to_be_done := true; } else if (policy_type == 1) { // UNIQUE NEAR - tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.push(Event:SendARData); tbe.dataToBeInvalid := true; tbe.doCacheFill := false; tbe.atomic_to_be_done := false; @@ -1049,7 +1052,7 @@ action(Initiate_AtomicReturn_LocalWrite, desc="") { } tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.pushNB(Event:WriteFEPipe); - tbe.actions.pushNB(Event:SendCompData_AR); + tbe.actions.push(Event:SendCompData_AR); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); @@ -1082,11 +1085,11 @@ action(Initiate_AtomicNoReturn_LocalWrite, desc="") { action(Initiate_AtomicReturn_Forward, desc="") { - if ((tbe.dir_sharers.count() > 0) && - (tbe.dir_sharers.isElement(tbe.requestor))){ - tbe.dir_sharers.remove(tbe.requestor); - } + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.push(Event:SendARData); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); tbe.actions.pushNB(Event:TagArrayWrite); @@ -1094,10 +1097,6 @@ action(Initiate_AtomicReturn_Forward, desc="") { } action(Initiate_AtomicNoReturn_Forward, desc="") { - if ((tbe.dir_sharers.count() > 0) && - (tbe.dir_sharers.isElement(tbe.requestor))){ - tbe.dir_sharers.remove(tbe.requestor); - } if (comp_anr) { tbe.actions.push(Event:SendAtomicNoReturn); tbe.actions.push(Event:SendDBIDResp_ANR); @@ -1118,7 +1117,7 @@ action(Initiate_AtomicReturn_Miss, desc="") { tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.pushNB(Event:WriteFEPipe); - tbe.actions.pushNB(Event:SendCompData_AR); + tbe.actions.push(Event:SendCompData_AR); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); @@ -1664,18 +1663,6 @@ action(Send_AtomicReturn, desc="") { tbe.expected_req_resp.addExpectedCount(1); } -action(Send_AtomicReturn_NoWait, desc="") { - assert(is_valid(tbe)); - - enqueue(reqOutPort, CHIRequestMsg, request_latency) { - prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg); - out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); - allowRequestRetry(tbe, out_msg); - } - - tbe.dataAMOValid := false; -} - action(Send_AtomicNoReturn, desc="") { assert(is_valid(tbe)); @@ -2337,6 +2324,10 @@ action(UpdateDataState_FromADataResp, desc="") { tbe.dataDirty := true; DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk); + } else if ((tbe.expected_req_resp.hasReceivedData()) && + ((tbe.reqType == CHIRequestType:AtomicReturn) || + (tbe.reqType == CHIRequestType:AtomicNoReturn))){ + tbe.dataMaybeDirtyUpstream := false; } printTBEState(tbe); } @@ -2504,10 +2495,6 @@ action(Receive_ReqResp_WUNeedComp, desc="") { tbe.defer_expected_comp := true; } -action(Receive_ReqResp_AR, desc="") { - tbe.actions.pushFrontNB(Event:SendARData); -} - action(Receive_ReqResp_WUComp, desc="") { if (tbe.defer_expected_comp) { tbe.defer_expected_comp := false; @@ -2730,16 +2717,17 @@ action(Send_ANRData, desc="") { action(CheckARComp, desc="") { assert(is_valid(tbe)); + clearExpectedReqResp(tbe); tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I); tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); - tbe.expected_req_resp.addExpectedCount(2); + tbe.expected_req_resp.setExpectedCount(2); } action(CheckANRComp, desc="") { assert(is_valid(tbe)); if (tbe.defer_expected_comp) { tbe.defer_expected_comp := false; - tbe.expected_req_resp.addExpectedCount(1); + tbe.expected_req_resp.setExpectedCount(1); tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp); } } @@ -3108,10 +3096,6 @@ action(Send_CompData_AR, desc="") { assert(is_valid(tbe)); assert(tbe.dataValid); - if (is_HN) { - tbe.oldDataBlk := tbe.dataBlk; - } - tbe.snd_msgType := CHIDataType:CompData_I; tbe.dataMaybeDirtyUpstream := false; tbe.requestorToBeExclusiveOwner := false; diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm index 9ced9d1826..1f6f906c0c 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm @@ -942,15 +942,6 @@ transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} { transition(BUSY_BLKD, SendAtomicReturn, BUSY_INTR) {DestinationAvailable} { Pop_TriggerQueue; Send_AtomicReturn; - CheckARComp; - Profile_OutgoingStart; - ProcessNextState_ClearPending; -} - -transition(BUSY_BLKD, SendAtomicReturn_NoWait, BUSY_INTR) { - Pop_TriggerQueue; - Send_AtomicReturn_NoWait; - CheckARComp; Profile_OutgoingStart; ProcessNextState_ClearPending; } @@ -1016,6 +1007,7 @@ transition(BUSY_BLKD, SendWUDataCB) { transition({BUSY_BLKD,BUSY_INTR}, SendARData) { Pop_TriggerQueue; Send_ARData; + CheckARComp; ProcessNextState_ClearPending; } @@ -1492,7 +1484,6 @@ transition({BUSY_INTR,BUSY_BLKD}, DBIDResp, BUSY_BLKD) { Receive_ReqResp; Receive_ReqResp_CopyDBID; Receive_ReqResp_WUNeedComp; - Receive_ReqResp_AR; Pop_RespInQueue; ProcessNextState; } diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index a5c75b167b..487fe60026 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -135,7 +135,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : // All Near executes all Atomics at L1 (variable set to 0; default) // Unique Near executes Atomics at HNF for states I, SC, SD (set to 1) // Present Near execites all Atomics at L1 except when state is I (set to 2) - int policy_type := 1; + int policy_type := 0; // Use separate Comp/DBIDResp responses for WriteUnique @@ -487,7 +487,6 @@ machine(MachineType:Cache, "Cache coherency protocol") : // Send an atomic request downstream. SendAtomicReturn, out_trans="yes", desc="Send atomic request with return"; - SendAtomicReturn_NoWait, out_trans="yes", desc="Send atomic request with return, but no DBID"; SendAtomicNoReturn, out_trans="yes", desc="Send atomic request without return"; SendARData, desc="Send atomic return request data"; SendANRData, desc="Send atomic no return request data"; From 5a6a3be6da1d921050dc7260547b63919c4704b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Soria=20Pardos?= Date: Tue, 26 Mar 2024 16:11:40 +0100 Subject: [PATCH 2/3] mem-ruby: Fix policy_type condition in CHI Fix if-else condition in CHI-cache-actions to correctly support policy_type Present Near (2) Change-Id: Ib776d847a908a8ac7693c2d10405bc0c4a9d767d --- src/mem/ruby/protocol/chi/CHI-cache-actions.sm | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index 5fa87420d8..5eefc3ba9b 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -841,7 +841,8 @@ action(Initiate_AtomicNoReturn_I, desc="") { tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); tbe.atomic_to_be_done := true; - } else if (policy_type == 1) { // UNIQUE NEAR + } else if ((policy_type == 1) || // UNIQUE NEAR + (policy_type == 2)) { // PRESENT NEAR tbe.actions.push(Event:SendAtomicNoReturn); tbe.actions.push(Event:SendANRData); tbe.dataToBeInvalid := true; @@ -853,7 +854,8 @@ action(Initiate_AtomicNoReturn_I, desc="") { } action(Initiate_AtomicReturn_SD, desc="") { - if (policy_type == 0){ // ALL NEAR + if ((policy_type == 0) || // ALL NEAR + (policy_type == 2)) { // PRESENT NEAR tbe.actions.push(Event:SendReadUnique); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); @@ -872,7 +874,8 @@ action(Initiate_AtomicReturn_SD, desc="") { } action(Initiate_AtomicNoReturn_SD, desc="") { - if (policy_type == 0){ // ALL NEAR + if ((policy_type == 0) || // ALL NEAR + (policy_type == 2)) { // PRESENT NEAR tbe.actions.push(Event:SendReadUnique); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); @@ -891,7 +894,8 @@ action(Initiate_AtomicNoReturn_SD, desc="") { } action(Initiate_AtomicReturn_SC, desc="") { - if (policy_type == 0){ // ALL NEAR + if ((policy_type == 0) || // ALL NEAR + (policy_type == 2)) { // PRESENT NEAR tbe.actions.push(Event:SendReadUnique); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); @@ -910,7 +914,8 @@ action(Initiate_AtomicReturn_SC, desc="") { } action(Initiate_AtomicNoReturn_SC, desc="") { - if (policy_type == 0){ // ALL NEAR + if ((policy_type == 0) || // ALL NEAR + (policy_type == 2)) { // PRESENT NEAR tbe.actions.push(Event:SendReadUnique); tbe.actions.push(Event:WriteFEPipe); tbe.actions.push(Event:CheckCacheFill); From 98358da9688aedd1a68883626f16bb5724c7a69f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Soria=20Pardos?= Date: Thu, 4 Apr 2024 09:34:34 +0200 Subject: [PATCH 3/3] mem-ruby: Implement Atomic No Alloc Policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add alternative implementation to far atomics when the flag alloc_on_commit is false. The implementation fetches the data, performs the atomic and writes back the cache line to main memory. Co-authored-by: Fabian Schätzle Change-Id: I8797fbc68448e1866a292f4afeedd3613113dddd --- .../ruby/protocol/chi/CHI-cache-actions.sm | 100 +++++++++++++++--- src/mem/ruby/protocol/chi/CHI-cache-funcs.sm | 12 ++- .../protocol/chi/CHI-cache-transitions.sm | 76 +++++++++++-- src/mem/ruby/protocol/chi/CHI-cache.sm | 3 + 4 files changed, 163 insertions(+), 28 deletions(-) diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index 5eefc3ba9b..3db54934c6 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -1058,13 +1058,29 @@ action(Initiate_AtomicReturn_LocalWrite, desc="") { tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } +action(Initiate_AtomicReturn_WriteBack, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} action(Initiate_AtomicNoReturn_LocalWrite, desc="") { if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { @@ -1081,21 +1097,42 @@ action(Initiate_AtomicNoReturn_LocalWrite, desc="") { tbe.actions.push(Event:SendCompDBIDResp_ANR); tbe.actions.pushNB(Event:WriteFEPipe); } - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } +action(Initiate_AtomicNoReturn_WriteBack, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + if (comp_anr) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} action(Initiate_AtomicReturn_Forward, desc="") { - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_AR); tbe.actions.push(Event:SendARData); - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:WriteBEPipe); tbe.actions.pushNB(Event:TagArrayWrite); tbe.dataToBeInvalid := true; @@ -1104,10 +1141,12 @@ action(Initiate_AtomicReturn_Forward, desc="") { action(Initiate_AtomicNoReturn_Forward, desc="") { if (comp_anr) { tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_ANR); tbe.actions.pushNB(Event:SendComp_ANR); } else { tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompDBIDResp_ANR); } tbe.actions.push(Event:WriteBEPipe); @@ -1117,19 +1156,48 @@ action(Initiate_AtomicNoReturn_Forward, desc="") { tbe.dataToBeInvalid := true; } +action(Initiate_AtomicReturn_Miss_Alloc, desc="") { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.push(Event:SendCompData_AR); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} + action(Initiate_AtomicReturn_Miss, desc="") { tbe.actions.push(Event:SendReadNoSnp); tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendDBIDResp_AR); - tbe.actions.pushNB(Event:WriteFEPipe); tbe.actions.push(Event:SendCompData_AR); - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); +} + +action(Initiate_AtomicNoReturn_Miss_Alloc, desc="") { + assert(is_HN); + tbe.actions.push(Event:SendReadNoSnp); + if (comp_anr) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); tbe.actions.push(Event:TagArrayWrite); } + action(Initiate_AtomicNoReturn_Miss, desc="") { assert(is_HN); tbe.actions.push(Event:SendReadNoSnp); @@ -1141,12 +1209,12 @@ action(Initiate_AtomicNoReturn_Miss, desc="") { tbe.actions.push(Event:SendCompDBIDResp_ANR); tbe.actions.pushNB(Event:WriteFEPipe); } - - tbe.actions.push(Event:WriteFEPipe); - tbe.actions.push(Event:CheckCacheFill); tbe.actions.push(Event:DelayAtomic); tbe.actions.push(Event:WriteBEPipe); - tbe.actions.push(Event:TagArrayWrite); + tbe.actions.push(Event:SendWriteNoSnp); + tbe.actions.push(Event:SendWUData); + tbe.dataToBeInvalid := true; + tbe.actions.pushNB(Event:TagArrayWrite); } action(Initiate_CopyBack, desc="") { @@ -1615,6 +1683,9 @@ action(Send_WriteNoSnp, desc="") { // so addExpectedCount tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); tbe.expected_req_resp.addExpectedCount(1); + + // If we are WB after AtomicReturn/NoReturn + tbe.atomic_to_be_wb := true; } action(Send_WriteNoSnp_Partial, desc="") { @@ -2948,7 +3019,8 @@ action(Send_Data, desc="") { } tbe.snd_pendBytes.setMask(offset, range, false); - if (tbe.reqType == CHIRequestType:AtomicReturn){ + if ((tbe.reqType == CHIRequestType:AtomicReturn) && + (tbe.atomic_to_be_wb == false)){ out_msg.dataBlk := tbe.oldDataBlk; } else { out_msg.dataBlk := tbe.dataBlk; diff --git a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm index fbafda61cd..9fda5ba052 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm @@ -449,6 +449,8 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" tbe.atomic_op.clear(); tbe.atomic_op.orMask(in_msg.atomic_op); + tbe.atomic_to_be_done := false; + tbe.atomic_to_be_wb := false; tbe.use_DMT := false; tbe.use_DCT := false; @@ -814,7 +816,7 @@ bool needCacheEntry(CHIRequestType req_type, (req_type == CHIRequestType:AtomicStore))) || (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) || (alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) || - (req_type == CHIRequestType:AtomicNoReturn))); + (req_type == CHIRequestType:AtomicNoReturn))); } } @@ -1274,13 +1276,17 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) { } else if (type == CHIRequestType:DvmSync_Initiate) { return Event:DvmSync_Initiate; } else if (type == CHIRequestType:AtomicReturn){ - if (is_HN) { + if (is_HN && alloc_on_atomic) { + return Event:AtomicReturn_PoC_Alloc; + } else if (is_HN){ return Event:AtomicReturn_PoC; } else { return Event:AtomicReturn; } } else if (type == CHIRequestType:AtomicNoReturn){ - if (is_HN) { + if (is_HN && alloc_on_atomic) { + return Event:AtomicNoReturn_PoC_Alloc; + } else if (is_HN){ return Event:AtomicNoReturn_PoC; } else { return Event:AtomicNoReturn; diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm index 1f6f906c0c..012e5f4ce5 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm @@ -448,7 +448,7 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) { // AtomicReturn and AtomicNoReturn -transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC, UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_Forward; @@ -457,7 +457,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, ProcessNextState; } -transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD,RUSC, UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_Forward; @@ -467,7 +467,7 @@ transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, } transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, - AtomicReturn_PoC, BUSY_BLKD) { + {AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_LocalWrite; Profile_Hit; @@ -476,7 +476,7 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, } transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, - AtomicNoReturn_PoC, BUSY_BLKD) { + {AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_LocalWrite; Profile_Hit; @@ -484,8 +484,8 @@ transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, ProcessNextState; } -transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, - AtomicReturn_PoC, BUSY_BLKD) { +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {AtomicReturn_PoC_Alloc, AtomicReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_LocalWrite; Profile_Miss; @@ -493,8 +493,26 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, ProcessNextState; } -transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, - AtomicNoReturn_PoC, BUSY_BLKD) { +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_WriteBack; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC}, + {AtomicNoReturn_PoC_Alloc, AtomicNoReturn_PoC}, BUSY_BLKD) { Initiate_Request; Initiate_AtomicNoReturn_LocalWrite; Profile_Miss; @@ -502,6 +520,43 @@ transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, ProcessNextState; } +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicNoReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({RSC, RSD, RUSC, RUSD, RU}, + AtomicNoReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_WriteBack; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_Miss_Alloc; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicNoReturn_PoC_Alloc, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_Miss_Alloc; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + transition(I, AtomicReturn_PoC, BUSY_BLKD) { Initiate_Request; Initiate_AtomicReturn_Miss; @@ -520,7 +575,6 @@ transition(I, AtomicNoReturn_PoC, BUSY_BLKD) { ProcessNextState; } - // Load / Store / Atomic from sequencer & Prefetch from prefetcher transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) { @@ -880,8 +934,8 @@ transition({BUSY_BLKD,BUSY_INTR}, WriteUnique,WriteUniquePtl_PoC, WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc, WriteUniqueZero, - AtomicReturn,AtomicReturn_PoC, - AtomicNoReturn,AtomicNoReturn_PoC, + AtomicReturn,AtomicReturn_PoC, AtomicReturn_PoC_Alloc + AtomicNoReturn,AtomicNoReturn_PoC, AtomicNoReturn_PoC_Alloc StashOnceShared,StashOnceUnique}) { StallRequest; } diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index 487fe60026..6a74045c23 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -325,7 +325,9 @@ machine(MachineType:Cache, "Cache coherency protocol") : AtomicReturn, desc="", in_trans="yes"; AtomicNoReturn, desc="", in_trans="yes"; AtomicReturn_PoC, desc="", in_trans="yes"; + AtomicReturn_PoC_Alloc, desc="", in_trans="yes"; AtomicNoReturn_PoC, desc="", in_trans="yes"; + AtomicNoReturn_PoC_Alloc, desc="", in_trans="yes"; SnpCleanInvalid, desc="", in_trans="yes"; SnpShared, desc="", in_trans="yes"; SnpSharedFwd, desc="", in_trans="yes"; @@ -661,6 +663,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : // Atomic info associated with the transaction WriteMask atomic_op, desc="Atomic Operation Wrapper"; bool atomic_to_be_done, desc="We have yet to perform the atomic"; + bool atomic_to_be_wb, desc="We are writebacking the atomic"; // NOTE: seqReq is a smart pointer pointing to original CPU request object // that triggers transactions associated with this TBE. seqReq carries some