diff --git a/configs/ruby/CHI_config.py b/configs/ruby/CHI_config.py index 4f2580c373..1288cf95d6 100644 --- a/configs/ruby/CHI_config.py +++ b/configs/ruby/CHI_config.py @@ -244,6 +244,7 @@ class CHI_L1Controller(CHI_Cache_Controller): self.alloc_on_readunique = True self.alloc_on_readonce = True self.alloc_on_writeback = True + self.alloc_on_atomic = False self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = True @@ -280,6 +281,7 @@ class CHI_L2Controller(CHI_Cache_Controller): self.alloc_on_readunique = True self.alloc_on_readonce = True self.alloc_on_writeback = True + self.alloc_on_atomic = False self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = True @@ -316,6 +318,7 @@ class CHI_HNFController(CHI_Cache_Controller): self.alloc_on_readunique = False self.alloc_on_readonce = True self.alloc_on_writeback = True + self.alloc_on_atomic = True self.dealloc_on_unique = True self.dealloc_on_shared = False self.dealloc_backinv_unique = False @@ -392,6 +395,7 @@ class CHI_DMAController(CHI_Cache_Controller): self.alloc_on_readunique = False self.alloc_on_readonce = False self.alloc_on_writeback = False + self.alloc_on_atomic = False self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = False diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 8ba9d935ff..293c731c37 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -139,6 +139,13 @@ structure (Sequencer, external = "yes") { Cycles, Cycles, Cycles); void writeUniqueCallback(Addr, DataBlock); + void atomicCallback(Addr, DataBlock); + void atomicCallback(Addr, DataBlock, bool); + void atomicCallback(Addr, DataBlock, bool, MachineType); + void atomicCallback(Addr, DataBlock, bool, MachineType, + Cycles, Cycles, Cycles); + + void unaddressedCallback(Addr, RubyRequestType); void unaddressedCallback(Addr, RubyRequestType, MachineType); void unaddressedCallback(Addr, RubyRequestType, MachineType, diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index 42e07eb46b..4c9498423c 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -148,15 +148,22 @@ action(AllocateTBE_SeqRequest, desc="") { out_msg.is_remote_pf := false; out_msg.txnId := max_outstanding_transactions; + out_msg.atomic_op.clear(); + out_msg.atomic_op.orMask(in_msg.writeMask); + if ((in_msg.Type == RubyRequestType:LD) || (in_msg.Type == RubyRequestType:IFETCH)) { out_msg.type := CHIRequestType:Load; - } else if (in_msg.Type == RubyRequestType:ST) { + } else if (in_msg.Type == RubyRequestType:ST) { if (in_msg.Size == blockSize) { out_msg.type := CHIRequestType:StoreLine; } else { out_msg.type := CHIRequestType:Store; } + } else if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) { + out_msg.type := CHIRequestType:AtomicLoad; + } else if (in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN){ + out_msg.type := CHIRequestType:AtomicStore; } else { error("Invalid RubyRequestType"); } @@ -769,6 +776,148 @@ action(Initiate_StoreMiss, desc="") { } } +action(Initiate_Atomic_UC, desc="") { + if ((policy_type == 0) || // ALL NEAR + (policy_type == 1) || // UNIQUE NEAR + (policy_type == 2) // PRESENT NEAR + ){ + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:AtomicHit); + tbe.actions.pushNB(Event:DataArrayWrite); + tbe.actions.pushNB(Event:TagArrayWrite); + } else { + error("Invalid policy type"); + } +} + +action(Initiate_Atomic_UD, desc="") { + if ((policy_type == 0) || // ALL NEAR + (policy_type == 1) || // UNIQUE NEAR + (policy_type == 2) // PRESENT NEAR + ){ + tbe.actions.push(Event:DataArrayRead); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:AtomicHit); + tbe.actions.pushNB(Event:DataArrayWrite); + tbe.actions.pushNB(Event:TagArrayWrite); + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicReturn_I, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if ((policy_type == 1) || // UNIQUE NEAR + (policy_type == 2)) { // PRESENT NEAR + tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicNoReturn_I, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if (policy_type == 1) { // UNIQUE NEAR + tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.push(Event:SendANRData); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicReturn_SD, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if (policy_type == 1) { // UNIQUE NEAR + tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicNoReturn_SD, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if (policy_type == 1) { // UNIQUE NEAR + tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.push(Event:SendANRData); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicReturn_SC, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if (policy_type == 1) { // UNIQUE NEAR + tbe.actions.push(Event:SendAtomicReturn_NoWait); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + +action(Initiate_AtomicNoReturn_SC, desc="") { + if (policy_type == 0){ // ALL NEAR + tbe.actions.push(Event:SendReadUnique); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); + tbe.atomic_to_be_done := true; + } else if (policy_type == 1) { // UNIQUE NEAR + tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.push(Event:SendANRData); + tbe.dataToBeInvalid := true; + tbe.doCacheFill := false; + tbe.atomic_to_be_done := false; + } else { + error("Invalid policy type"); + } +} + action(Initiate_StoreUpgrade, desc="") { assert(tbe.dataValid); assert(is_valid(cache_entry)); @@ -865,8 +1014,111 @@ action(Initiate_WriteUnique_Forward, desc="") { tbe.actions.pushNB(Event:TagArrayWrite); } +action(Initiate_AtomicReturn_LocalWrite, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendCompData_AR); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} +action(Initiate_AtomicNoReturn_LocalWrite, desc="") { + if ((tbe.dir_sharers.count() > 0) && tbe.dataMaybeDirtyUpstream) { + tbe.actions.push(Event:SendSnpUnique); + } else if (tbe.dir_sharers.count() > 0){ + // no one will send us data unless we explicitly ask + tbe.actions.push(Event:SendSnpUniqueRetToSrc); + } + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} + + +action(Initiate_AtomicReturn_Forward, desc="") { + if ((tbe.dir_sharers.count() > 0) && + (tbe.dir_sharers.isElement(tbe.requestor))){ + tbe.dir_sharers.remove(tbe.requestor); + } + tbe.actions.push(Event:SendAtomicReturn); + tbe.actions.push(Event:SendCompData_AR); + tbe.actions.pushNB(Event:TagArrayWrite); + + tbe.dataToBeInvalid := true; +} + +action(Initiate_AtomicNoReturn_Forward, desc="") { + if ((tbe.dir_sharers.count() > 0) && + (tbe.dir_sharers.isElement(tbe.requestor))){ + tbe.dir_sharers.remove(tbe.requestor); + } + if (comp_wu) { + tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendAtomicNoReturn); + tbe.actions.push(Event:SendCompDBIDResp_ANR); + } + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:SendANRData); + tbe.actions.pushNB(Event:TagArrayWrite); + + tbe.dataToBeInvalid := true; +} + +action(Initiate_AtomicReturn_Miss, desc="") { + tbe.actions.push(Event:SendReadNoSnp); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.push(Event:SendDBIDResp_AR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendCompData_AR); + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} + +action(Initiate_AtomicNoReturn_Miss, desc="") { + assert(is_HN); + tbe.actions.push(Event:SendReadNoSnp); + if (comp_wu) { + tbe.actions.push(Event:SendDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + tbe.actions.pushNB(Event:SendComp_ANR); + } else { + tbe.actions.push(Event:SendCompDBIDResp_ANR); + tbe.actions.pushNB(Event:WriteFEPipe); + } + + tbe.actions.push(Event:WriteFEPipe); + tbe.actions.push(Event:CheckCacheFill); + tbe.actions.push(Event:DelayAtomic); + tbe.actions.push(Event:WriteBEPipe); + tbe.actions.push(Event:TagArrayWrite); +} + action(Initiate_CopyBack, desc="") { // expect to receive this data after Send_CompDBIDResp if (tbe.reqType == CHIRequestType:WriteBackFull) { @@ -1157,7 +1409,9 @@ action(Send_ReadShared, desc="") { action(Send_ReadNoSnp, desc="") { assert(is_HN); - assert(tbe.use_DMT == false); + assert((tbe.use_DMT == false) || + ((tbe.reqType == CHIRequestType:AtomicReturn) || + (tbe.reqType == CHIRequestType:AtomicNoReturn))); clearExpectedReqResp(tbe); tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_UC); @@ -1368,6 +1622,45 @@ action(Send_WriteUnique, desc="") { tbe.expected_req_resp.addExpectedCount(1); } +action(Send_AtomicReturn, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + clearExpectedReqResp(tbe); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp); + tbe.expected_req_resp.addExpectedCount(1); +} + +action(Send_AtomicReturn_NoWait, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequestAtomic(tbe, CHIRequestType:AtomicReturn, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + + tbe.dataAMOValid := false; +} + +action(Send_AtomicNoReturn, desc="") { + assert(is_valid(tbe)); + + enqueue(reqOutPort, CHIRequestMsg, request_latency) { + prepareRequestAtomic(tbe, CHIRequestType:AtomicNoReturn, out_msg); + out_msg.Destination.add(mapAddressToDownstreamMachine(tbe.addr)); + allowRequestRetry(tbe, out_msg); + } + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompDBIDResp); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:DBIDResp); + tbe.expected_req_resp.addExpectedCount(1); +} + + action(Send_SnpCleanInvalid, desc="") { assert(is_valid(tbe)); assert(tbe.expected_snp_resp.hasExpected() == false); @@ -1636,6 +1929,20 @@ action(ExpectNCBWrData, desc="") { tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize, false); } +action(ExpectNCBWrData_A, desc="") { + // Expected data + int num_msgs := tbe.accSize / data_channel_size; + if ((tbe.accSize % data_channel_size) != 0) { + num_msgs := num_msgs + 1; + } + tbe.expected_req_resp.clear(num_msgs); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:NCBWrData); + tbe.expected_req_resp.setExpectedCount(1); + + // In atomic operations we do not expect real data for the current block + // Thus the mask bits do not care +} + action(ExpectCompAck, desc="") { assert(is_valid(tbe)); tbe.expected_req_resp.addExpectedRespType(CHIResponseType:CompAck); @@ -1658,7 +1965,22 @@ action(Receive_ReqDataResp, desc="") { } // Copy data to tbe only if we didn't have valid data or the received // data is dirty - if ((tbe.dataBlkValid.isFull() == false) || + if ((in_msg.type == CHIDataType:NCBWrData) && + ((tbe.reqType == CHIRequestType:AtomicReturn) || + (tbe.reqType == CHIRequestType:AtomicNoReturn))){ + // DO NOTHING + } else if ((in_msg.type == CHIDataType:CompData_I) && + ((tbe.reqType == CHIRequestType:AtomicReturn) || + (tbe.reqType == CHIRequestType:AtomicLoad))) { + if(tbe.dataBlkValid.isFull()){ + tbe.dataBlkValid.clear(); + } + tbe.oldDataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask); + assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false); + tbe.dataBlkValid.orMask(in_msg.bitMask); + DPRINTF(RubySlicc, "Received %s\n", tbe.oldDataBlk); + DPRINTF(RubySlicc, "dataBlkValid = %s\n", tbe.dataBlkValid); + } else if ((tbe.dataBlkValid.isFull() == false) || (in_msg.type == CHIDataType:CompData_UD_PD) || (in_msg.type == CHIDataType:CompData_SD_PD) || (in_msg.type == CHIDataType:CBWrData_UD_PD) || @@ -1683,7 +2005,8 @@ action(Receive_RespSepDataFromCompData, desc="") { if (tbe.expected_req_resp.receiveResp(CHIResponseType:RespSepData) == false) { error("Received unexpected message"); } - if (is_HN == false) { + if ((is_HN == false) && (tbe.reqType != CHIRequestType:AtomicReturn) && + ((tbe.reqType != CHIRequestType:AtomicLoad) || (tbe.atomic_to_be_done == true))){ // must now ack the responder tbe.actions.pushFrontNB(Event:SendCompAck); } @@ -1905,6 +2228,7 @@ action(UpdateDataState_FromReqDataResp, desc="") { } else if (in_msg.type == CHIDataType:CompData_I) { tbe.dataValid := true; + tbe.dataAMOValid := true; tbe.dataToBeInvalid := true; assert(tbe.dataMaybeDirtyUpstream == false); @@ -1946,7 +2270,9 @@ action(UpdateDataState_FromReqDataResp, desc="") { action(UpdateDataState_FromWUDataResp, desc="") { assert(is_valid(tbe)); - if (tbe.expected_req_resp.hasReceivedData()) { + if (tbe.expected_req_resp.hasReceivedData() && + (tbe.reqType != CHIRequestType:AtomicReturn) && + (tbe.reqType != CHIRequestType:AtomicNoReturn)) { assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr))); assert(tbe.dataBlkValid.test(addressOffset(tbe.accAddr, tbe.addr) + tbe.accSize - 1)); @@ -1964,6 +2290,22 @@ action(UpdateDataState_FromWUDataResp, desc="") { printTBEState(tbe); } +action(UpdateDataState_FromADataResp, desc="") { + assert(is_valid(tbe)); + if (is_HN && (tbe.expected_req_resp.hasReceivedData()) && + ((tbe.reqType == CHIRequestType:AtomicReturn) || + (tbe.reqType == CHIRequestType:AtomicNoReturn))) { + DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk); + + tbe.oldDataBlk := tbe.dataBlk; + tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op); + tbe.dataDirty := true; + + DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk); + } + printTBEState(tbe); +} + action(UpdateDataState_FromCUResp, desc="") { assert(is_valid(tbe)); peek(rspInPort, CHIResponseMsg) { @@ -2127,6 +2469,10 @@ action(Receive_ReqResp_WUNeedComp, desc="") { tbe.defer_expected_comp := true; } +action(Receive_ReqResp_AR, desc="") { + tbe.actions.pushFrontNB(Event:SendARData); +} + action(Receive_ReqResp_WUComp, desc="") { if (tbe.defer_expected_comp) { tbe.defer_expected_comp := false; @@ -2320,6 +2666,36 @@ action(CheckWUComp, desc="") { } } +action(Send_ARData, desc="") { + assert(is_valid(tbe)); + tbe.snd_msgType := CHIDataType:NCBWrData; + tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr); + setupPendingAtomicSend(tbe); +} + +action(Send_ANRData, desc="") { + assert(is_valid(tbe)); + tbe.snd_msgType := CHIDataType:NCBWrData; + tbe.snd_destination := mapAddressToDownstreamMachine(tbe.addr); + setupPendingAtomicSend(tbe); +} + +action(CheckARComp, desc="") { + assert(is_valid(tbe)); + tbe.expected_req_resp.addExpectedDataType(CHIDataType:CompData_I); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:RespSepData); + tbe.expected_req_resp.addExpectedCount(2); +} + +action(CheckANRComp, desc="") { + assert(is_valid(tbe)); + if (tbe.defer_expected_comp) { + tbe.defer_expected_comp := false; + tbe.expected_req_resp.addExpectedCount(1); + tbe.expected_req_resp.addExpectedRespType(CHIResponseType:Comp); + } +} + action(Send_SnpRespData, desc="") { assert(is_HN == false); assert(is_valid(tbe)); @@ -2531,7 +2907,12 @@ action(Send_Data, desc="") { } tbe.snd_pendBytes.setMask(offset, range, false); - out_msg.dataBlk := tbe.dataBlk; + if (tbe.reqType == CHIRequestType:AtomicReturn){ + out_msg.dataBlk := tbe.oldDataBlk; + } else { + out_msg.dataBlk := tbe.dataBlk; + } + out_msg.bitMask.setMask(offset, range); out_msg.responder := machineID; @@ -2673,6 +3054,36 @@ action(Send_Comp_WU, desc="") { } } + +action(Send_CompData_AR, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataValid); + + if (is_HN) { + tbe.oldDataBlk := tbe.dataBlk; + } + + tbe.snd_msgType := CHIDataType:CompData_I; + tbe.dataMaybeDirtyUpstream := false; + tbe.requestorToBeExclusiveOwner := false; + tbe.requestorToBeOwner := false; + tbe.snd_destination := tbe.requestor; + setupPendingSend(tbe); + printTBEState(tbe); + +} + +action(Send_Comp_ANR, desc="") { + assert(is_valid(tbe)); + enqueue(rspOutPort, CHIResponseMsg, comp_anr_latency + response_latency) { + out_msg.addr := address; + out_msg.type := CHIResponseType:Comp; + out_msg.responder := machineID; + out_msg.Destination.add(tbe.requestor); + } +} + + action(Send_SnpRespI, desc="") { enqueue(rspOutPort, CHIResponseMsg, response_latency) { out_msg.addr := address; @@ -3003,6 +3414,22 @@ action(Callback_StoreHit, desc="") { } } +action(Callback_AtomicHit, desc="") { + assert(is_valid(tbe)); + assert(tbe.dataValid); + assert((tbe.reqType == CHIRequestType:AtomicLoad) || + (tbe.reqType == CHIRequestType:AtomicStore)); + DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk); + + DataBlock oldDataBlk; + oldDataBlk := tbe.dataBlk; + tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op); + + sequencer.atomicCallback(tbe.addr, oldDataBlk, false); + DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk); + tbe.dataDirty := true; +} + action(Callback_ExpressPrefetchHit, desc="") { // have not allocated TBE, but must clear the reservation assert(is_invalid(tbe)); @@ -3051,6 +3478,25 @@ action(Callback_Miss, desc="") { // also decay the timeout scLockDecayLatency(); } + } else if (tbe.dataValid && tbe.atomic_to_be_done && + ((tbe.reqType == CHIRequestType:AtomicLoad) || + (tbe.reqType == CHIRequestType:AtomicStore))){ + assert(is_valid(tbe)); + assert(tbe.dataValid); + assert((tbe.reqType == CHIRequestType:AtomicLoad) || + (tbe.reqType == CHIRequestType:AtomicStore)); + DPRINTF(RubySlicc, "Atomic before %s\n", tbe.dataBlk); + + DataBlock oldDataBlk; + oldDataBlk := tbe.dataBlk; + tbe.dataBlk.atomicPartial(tbe.dataBlk, tbe.atomic_op); + + sequencer.atomicCallback(tbe.addr, oldDataBlk, false); + DPRINTF(RubySlicc, "Atomic after %s\n", tbe.dataBlk); + tbe.dataDirty := true; + } else if (tbe.dataValid && tbe.dataAMOValid && (tbe.reqType == CHIRequestType:AtomicLoad)) { + DPRINTF(RubySlicc, "Atomic before %s\n", tbe.oldDataBlk); + sequencer.atomicCallback(tbe.addr, tbe.oldDataBlk, false); } } @@ -3070,6 +3516,18 @@ action(Unset_Timeout_Cache, desc="") { wakeup_port(snpRdyPort, address); } +action(Callback_AtomicNoReturn, desc="") { + assert(is_valid(tbe)); + assert((tbe.is_local_pf || tbe.is_remote_pf) == false); + assert((tbe.reqType == CHIRequestType:AtomicNoReturn) || + (tbe.reqType == CHIRequestType:AtomicStore)); + + if(tbe.reqType == CHIRequestType:AtomicStore){ + sequencer.atomicCallback(tbe.addr, tbe.dataBlk); + DPRINTF(RubySlicc, "AtomicNoReturn %s\n", tbe.dataBlk); + } +} + action(Callback_WriteUnique, desc="") { assert(is_valid(tbe)); assert((tbe.is_local_pf || tbe.is_remote_pf) == false); @@ -3183,7 +3641,7 @@ action(Profile_OutgoingEnd_DatalessResp, desc="") { action(TagArrayRead, desc="") { assert(is_valid(tbe)); tbe.delayNextAction := curTick() + cyclesToTicks( - tagLatency(fromSequencer(tbe.reqType))); + tagLatency(fromSequencer(tbe.reqType))); } action(TagArrayWrite, desc="") { @@ -3235,6 +3693,11 @@ action(FillPipe, desc="") { tbe.delayNextAction := curTick() + cyclesToTicks(fill_latency); } +action(DelayAtomic, desc="") { + assert(is_valid(tbe)); + tbe.delayNextAction := curTick() + cyclesToTicks(atomic_op_latency); +} + action(SnpSharedPipe, desc="") { assert(is_valid(tbe)); tbe.delayNextAction := curTick() + cyclesToTicks(snp_latency); diff --git a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm index 4d8c35053c..371ad05109 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-funcs.sm @@ -302,7 +302,9 @@ Cycles dataLatency() { bool fromSequencer(CHIRequestType reqType) { return reqType == CHIRequestType:Load || reqType == CHIRequestType:Store || - reqType == CHIRequestType:StoreLine; + reqType == CHIRequestType:StoreLine || + reqType == CHIRequestType:AtomicLoad || + tbe.reqType == CHIRequestType:AtomicStore; } bool inCache(Addr addr) { @@ -434,6 +436,9 @@ TBE allocateRequestTBE(Addr addr, CHIRequestMsg in_msg), return_by_pointer="yes" tbe.is_local_pf := in_msg.is_local_pf; tbe.is_remote_pf := in_msg.is_remote_pf; + tbe.atomic_op.clear(); + tbe.atomic_op.orMask(in_msg.atomic_op); + tbe.use_DMT := false; tbe.use_DCT := false; @@ -622,6 +627,13 @@ void setupPendingPartialSend(TBE tbe) { scheduleSendData(tbe, 0); } +void setupPendingAtomicSend(TBE tbe) { + assert(blockSize >= data_channel_size); + assert((blockSize % data_channel_size) == 0); + tbe.snd_pendBytes.setMask(0,tbe.accSize,true); + scheduleSendData(tbe, 0); +} + // common code for downstream requests void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) { out_msg.addr := tbe.addr; @@ -644,6 +656,17 @@ void prepareRequest(TBE tbe, CHIRequestType type, CHIRequestMsg & out_msg) { assert(tbe.txnId != static_cast(Addr, "value", -1)); } +void prepareRequestAtomic(TBE tbe, CHIRequestType type, + CHIRequestMsg & out_msg) { + assert((type == CHIRequestType:AtomicReturn) || + (type == CHIRequestType:AtomicNoReturn)); + prepareRequest(tbe, type, out_msg); + out_msg.accAddr := tbe.accAddr; + out_msg.accSize := tbe.accSize; + out_msg.atomic_op.clear(); + out_msg.atomic_op.orMask(tbe.atomic_op); +} + void allowRequestRetry(TBE tbe, CHIRequestMsg & out_msg) { out_msg.allowRetry := true; tbe.pendReqAllowRetry := true; @@ -672,6 +695,8 @@ void prepareRequestRetry(TBE tbe, CHIRequestMsg & out_msg) { out_msg.seqReq := tbe.seqReq; out_msg.is_local_pf := false; out_msg.is_remote_pf := tbe.is_local_pf || tbe.is_remote_pf; + out_msg.atomic_op.clear(); + out_msg.atomic_op.orMask(tbe.atomic_op); } void prepareRequestRetryDVM(TBE tbe, CHIRequestMsg & out_msg) { @@ -773,8 +798,12 @@ bool needCacheEntry(CHIRequestType req_type, (req_type == CHIRequestType:WriteEvictFull) || (is_HN && (req_type == CHIRequestType:WriteUniqueFull)))) || (alloc_on_seq_acc && ((req_type == CHIRequestType:Load) || - (req_type == CHIRequestType:Store))) || - (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)); + (req_type == CHIRequestType:Store) || + (req_type == CHIRequestType:AtomicLoad) || + (req_type == CHIRequestType:AtomicStore))) || + (alloc_on_seq_line_write && (req_type == CHIRequestType:StoreLine)) || + (alloc_on_atomic && ((req_type == CHIRequestType:AtomicReturn) || + (req_type == CHIRequestType:AtomicNoReturn))); } } @@ -1174,6 +1203,10 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) { return Event:Store; } else if (type == CHIRequestType:StoreLine) { return Event:Store; + } else if (type == CHIRequestType:AtomicLoad) { + return Event:AtomicLoad; + } else if (type == CHIRequestType:AtomicStore){ + return Event:AtomicStore; } else if (type == CHIRequestType:ReadShared) { return Event:ReadShared; } else if (type == CHIRequestType:ReadNotSharedDirty) { @@ -1214,6 +1247,18 @@ Event reqToEvent(CHIRequestType type, bool is_prefetch) { return Event:DvmTlbi_Initiate; } else if (type == CHIRequestType:DvmSync_Initiate) { return Event:DvmSync_Initiate; + } else if (type == CHIRequestType:AtomicReturn){ + if (is_HN) { + return Event:AtomicReturn_PoC; + } else { + return Event:AtomicReturn; + } + } else if (type == CHIRequestType:AtomicNoReturn){ + if (is_HN) { + return Event:AtomicNoReturn_PoC; + } else { + return Event:AtomicNoReturn; + } } else { error("Invalid CHIRequestType"); } diff --git a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm index cb9ffa567a..0e8c6ec0e3 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-transitions.sm @@ -155,6 +155,12 @@ transition({BUSY_INTR,BUSY_BLKD}, FillPipe) { ProcessNextState_ClearPending; } +transition({BUSY_INTR,BUSY_BLKD}, DelayAtomic) { + Pop_TriggerQueue; + DelayAtomic; + ProcessNextState_ClearPending; +} + transition({BUSY_INTR,BUSY_BLKD}, SnpSharedPipe) { Pop_TriggerQueue; SnpSharedPipe; @@ -418,8 +424,82 @@ transition({RSC,RSD,RUSD,RUSC,RU,I}, WriteUnique, BUSY_BLKD) { ProcessNextState; } +// AtomicReturn and AtomicNoReturn -// Load / Store from sequencer & Prefetch from prefetcher +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, + UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicReturn, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_Forward; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({I,SC,SC_RSC,SD,SD_RSD,SD_RSC,RSD,RUSD, + UD,UD_RSC,UD_RSD,UD_RU,UC,UC_RSC,UC_RU,RSC,RU}, AtomicNoReturn, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_Forward; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, + AtomicReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_LocalWrite; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,UD_RU,UD_RSD,UD_RSC,UC,UC_RU,UC_RSC}, + AtomicNoReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_LocalWrite; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, + AtomicReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({SD, SD_RSD, SD_RSC, SC, SC_RSC, RSC, RSD, RUSC, RUSD, RU}, + AtomicNoReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_LocalWrite; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_Miss; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicNoReturn_PoC, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_Miss; + Allocate_DirEntry; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + +// Load / Store / Atomic from sequencer & Prefetch from prefetcher transition({UD,UD_T,SD,UC,SC}, Load, BUSY_BLKD) { Initiate_Request; @@ -460,6 +540,28 @@ transition(BUSY_BLKD, StoreHit) { ProcessNextState_ClearPending; } +transition(UC, {AtomicLoad,AtomicStore}, BUSY_BLKD) { + Initiate_Request; + Initiate_Atomic_UC; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition({UD,UD_T}, {AtomicLoad,AtomicStore}, BUSY_BLKD) { + Initiate_Request; + Initiate_Atomic_UD; + Profile_Hit; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(BUSY_BLKD, AtomicHit) { + Pop_TriggerQueue; + Callback_AtomicHit; + ProcessNextState_ClearPending; +} + transition(I, {Load,Prefetch}, BUSY_BLKD) { Initiate_Request; Initiate_LoadMiss; @@ -494,6 +596,55 @@ transition({BUSY_BLKD,BUSY_INTR}, UseTimeout) { Unset_Timeout_TBE; } +transition(I, AtomicLoad, BUSY_BLKD){ + Initiate_Request; + Initiate_AtomicReturn_I; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(I, AtomicStore, BUSY_BLKD){ + Initiate_Request; + Initiate_AtomicNoReturn_I; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(SD, AtomicLoad, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_SD; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(SC, AtomicLoad, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicReturn_SC; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(SD, AtomicStore, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_SD; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + +transition(SC, AtomicStore, BUSY_BLKD) { + Initiate_Request; + Initiate_AtomicNoReturn_SC; + Profile_Miss; + Pop_ReqRdyQueue; + ProcessNextState; +} + + // Evict from Upstream transition({UD_RSC,SD_RSC,UC_RSC,SC_RSC,RSC,RSD,RUSD,RUSC,UD_RSD,SD_RSD}, Evict, BUSY_BLKD) { @@ -691,13 +842,15 @@ transition(BUSY_INTR, {SnpOnce,SnpOnceFwd}, BUSY_BLKD) { transition({BUSY_BLKD,BUSY_INTR}, {ReadShared, ReadNotSharedDirty, ReadUnique, ReadUnique_PoC, ReadOnce, CleanUnique, CleanUnique_Stale, - Load, Store, Prefetch, + Load, Store, AtomicLoad, AtomicStore, Prefetch, WriteBackFull, WriteBackFull_Stale, WriteEvictFull, WriteEvictFull_Stale, WriteCleanFull, WriteCleanFull_Stale, Evict, Evict_Stale, WriteUnique,WriteUniquePtl_PoC, - WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc}) { + WriteUniqueFull_PoC,WriteUniqueFull_PoC_Alloc + AtomicReturn,AtomicReturn_PoC, + AtomicNoReturn,AtomicNoReturn_PoC}) { StallRequest; } @@ -754,6 +907,30 @@ transition(BUSY_BLKD, SendWriteUnique, BUSY_INTR) {DestinationAvailable} { ProcessNextState_ClearPending; } +transition(BUSY_BLKD, SendAtomicReturn, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_AtomicReturn; + CheckARComp; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendAtomicReturn_NoWait, BUSY_INTR) { + Pop_TriggerQueue; + Send_AtomicReturn_NoWait; + CheckARComp; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendAtomicNoReturn, BUSY_INTR) {DestinationAvailable} { + Pop_TriggerQueue; + Send_AtomicNoReturn; + Profile_OutgoingStart; + ProcessNextState_ClearPending; +} + + transition(BUSY_BLKD, SendWriteNoSnp, BUSY_INTR) {DestinationAvailable} { Pop_TriggerQueue; Send_WriteNoSnp; @@ -804,6 +981,20 @@ transition(BUSY_BLKD, SendWUDataCB) { ProcessNextState_ClearPending; } +transition({BUSY_BLKD,BUSY_INTR}, SendARData) { + Pop_TriggerQueue; + Send_ARData; + ProcessNextState_ClearPending; +} + +transition({BUSY_BLKD,BUSY_INTR}, SendANRData) { + Pop_TriggerQueue; + Callback_AtomicNoReturn; + Send_ANRData; + CheckANRComp; + ProcessNextState_ClearPending; +} + transition(BUSY_BLKD, SendInvSnpResp) { Pop_TriggerQueue; Send_InvSnpResp; @@ -1025,6 +1216,26 @@ transition({BUSY_BLKD,BUSY_INTR}, SendComp_WU) { ProcessNextState_ClearPending; } +transition(BUSY_BLKD, SendCompDBIDResp_ANR) { + Pop_TriggerQueue; + ExpectNCBWrData_A; + Send_CompDBIDResp; + ProcessNextState_ClearPending; +} + +transition(BUSY_BLKD, SendDBIDResp_AR) { + Pop_TriggerQueue; + ExpectNCBWrData_A; + Send_DBIDResp; + ProcessNextState_ClearPending; +} + +transition({BUSY_BLKD,BUSY_INTR}, SendCompData_AR) { + Pop_TriggerQueue; + Send_CompData_AR; + ProcessNextState_ClearPending; +} + transition(BUSY_BLKD, SendCompDBIDRespStale) { Pop_TriggerQueue; Send_CompDBIDResp_Stale; @@ -1085,6 +1296,7 @@ transition(BUSY_BLKD, transition({BUSY_BLKD,BUSY_INTR}, NCBWrData) { Receive_ReqDataResp; UpdateDataState_FromWUDataResp; + UpdateDataState_FromADataResp; Pop_DataInQueue; ProcessNextState; } @@ -1238,10 +1450,11 @@ transition(BUSY_INTR, CompDBIDResp, BUSY_BLKD) { } // alternative flow for WU with separate Comp -transition(BUSY_INTR, DBIDResp, BUSY_BLKD) { +transition({BUSY_INTR,BUSY_BLKD}, DBIDResp, BUSY_BLKD) { Receive_ReqResp; Receive_ReqResp_CopyDBID; Receive_ReqResp_WUNeedComp; + Receive_ReqResp_AR; Pop_RespInQueue; ProcessNextState; } diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index e40989df47..f806488b45 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -51,6 +51,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : // sending necessary snoops. Cycles read_hit_latency := 0; Cycles read_miss_latency := 0; + Cycles atomic_op_latency := 0; Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data Cycles fill_latency := 0; // Fill latency @@ -126,11 +127,24 @@ machine(MachineType:Cache, "Cache coherency protocol") : // possible. bool enable_DCT; + // Atomic Operation Policy + // All Near executes all Atomics at L1 (variable set to 0; default) + // Unique Near executes Atomics at HNF for states I, SC, SD (set to 1) + // Present Near execites all Atomics at L1 except when state is I (set to 2) + int policy_type := 1; + + // Use separate Comp/DBIDResp responses for WriteUnique bool comp_wu := "False"; // additional latency for the WU Comp response Cycles comp_wu_latency := 0; + + // Use separate Comp/DBIDResp responses for AtomicNoResponse + bool comp_anr := "False"; + // additional latency for the ANR Comp response + Cycles comp_anr_latency := 0; + // Controls cache clusivity for different request types. // set all alloc_on* to false to completelly disable caching bool alloc_on_readshared; @@ -139,6 +153,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : bool alloc_on_writeback; bool alloc_on_seq_acc; bool alloc_on_seq_line_write; + bool alloc_on_atomic; // Controls if the clusivity is strict. bool dealloc_on_unique; bool dealloc_on_shared; @@ -285,6 +300,8 @@ machine(MachineType:Cache, "Cache coherency protocol") : // See CHIRequestType in CHi-msg.sm for descriptions Load, desc="", in_trans="yes"; Store, desc="", in_trans="yes"; + AtomicLoad, desc="", in_trans="yes"; + AtomicStore, desc="", in_trans="yes"; Prefetch, desc="", in_trans="yes"; ReadShared, desc="", in_trans="yes"; ReadNotSharedDirty, desc="", in_trans="yes"; @@ -300,6 +317,10 @@ machine(MachineType:Cache, "Cache coherency protocol") : WriteUniquePtl_PoC, desc="", in_trans="yes"; WriteUniqueFull_PoC, desc="", in_trans="yes"; WriteUniqueFull_PoC_Alloc, desc="", in_trans="yes"; + AtomicReturn, desc="", in_trans="yes"; + AtomicNoReturn, desc="", in_trans="yes"; + AtomicReturn_PoC, desc="", in_trans="yes"; + AtomicNoReturn_PoC, desc="", in_trans="yes"; SnpCleanInvalid, desc="", in_trans="yes"; SnpShared, desc="", in_trans="yes"; SnpSharedFwd, desc="", in_trans="yes"; @@ -418,11 +439,12 @@ machine(MachineType:Cache, "Cache coherency protocol") : DataArrayWriteOnFill, desc="Write the cache data array (cache fill)"; // Events for modeling the pipeline latency - ReadHitPipe, desc="Latency of reads served from local cache"; - ReadMissPipe, desc="Latency of reads not served from local cache"; - WriteFEPipe, desc="Front-end latency of write requests"; - WriteBEPipe, desc="Back-end latency of write requests"; - FillPipe, desc="Cache fill latency"; + ReadHitPipe, desc="Latency of reads served from local cache"; + ReadMissPipe, desc="Latency of reads not served from local cache"; + WriteFEPipe, desc="Front-end latency of write requests"; + WriteBEPipe, desc="Back-end latency of write requests"; + FillPipe, desc="Cache fill latency"; + DelayAtomic, desc="Atomic operation latency"; SnpSharedPipe, desc="Latency for SnpShared requests"; SnpInvPipe, desc="Latency for SnpUnique and SnpCleanInv requests"; SnpOncePipe, desc="Latency for SnpOnce requests"; @@ -435,9 +457,9 @@ machine(MachineType:Cache, "Cache coherency protocol") : SendReadUnique, out_trans="yes", desc="Send a ReadUnique"; SendCompAck, desc="Send CompAck"; // Read handling at the completer - SendCompData, desc="Send CompData"; - WaitCompAck, desc="Expect to receive CompAck"; - SendRespSepData, desc="Send RespSepData for a DMT request"; + SendCompData, desc="Send CompData"; + WaitCompAck, desc="Expect to receive CompAck"; + SendRespSepData, desc="Send RespSepData for a DMT request"; // Send a write request downstream. SendWriteBackOrWriteEvict, out_trans="yes", desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)"; @@ -449,11 +471,25 @@ machine(MachineType:Cache, "Cache coherency protocol") : SendWUData, desc="Send write unique data"; SendWUDataCB, desc="Send write unique data from a sequencer callback"; // Write handling at the completer - SendCompDBIDResp, desc="Ack WB with CompDBIDResp"; - SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp"; - SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data"; - SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data"; - SendComp_WU, desc="Ack WU completion"; + SendCompDBIDResp, desc="Ack WB with CompDBIDResp"; + SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp"; + SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data"; + SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data"; + SendComp_WU, desc="Ack WU completion"; + + // Send an atomic request downstream. + SendAtomicReturn, out_trans="yes", desc="Send atomic request with return"; + SendAtomicReturn_NoWait, out_trans="yes", desc="Send atomic request with return, but no DBID"; + SendAtomicNoReturn, out_trans="yes", desc="Send atomic request without return"; + SendARData, desc="Send atomic return request data"; + SendANRData, desc="Send atomic no return request data"; + // Atomic handling at the completer + SendDBIDResp_AR, desc="Ack AR with DBIDResp and set expected data"; + SendCompData_AR, desc="Ack AR completion"; + SendCompDBIDResp_ANR, desc="Ack ANR with CompDBIDResp and set expected data"; + SendDBIDResp_ANR, desc="Ack ANR with DBIDResp and set expected data"; + SendComp_ANR, desc="Ack ANR completion"; + // Dataless requests SendEvict, out_trans="yes", desc="Send a Evict"; @@ -502,6 +538,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : // Misc triggers LoadHit, desc="Complete a load hit"; StoreHit, desc="Complete a store hit"; + AtomicHit, desc="Complete an atomic hit"; UseTimeout, desc="Transition from UD_T -> UD"; RestoreFromHazard, desc="Restore from a snoop hazard"; TX_Data, desc="Transmit pending data messages"; @@ -613,6 +650,10 @@ machine(MachineType:Cache, "Cache coherency protocol") : bool is_local_pf, desc="Request generated by a local prefetcher"; bool is_remote_pf, desc="Request generated a prefetcher in another cache"; + // Atomic info associated with the transaction + WriteMask atomic_op, desc="Atomic Operation Wrapper"; + bool atomic_to_be_done, desc="We have yet to perform the atomic"; + // NOTE: seqReq is a smart pointer pointing to original CPU request object // that triggers transactions associated with this TBE. seqReq carries some // information (e.g., PC of requesting instruction, virtual address of this @@ -630,8 +671,10 @@ machine(MachineType:Cache, "Cache coherency protocol") : // stable state. bool hasUseTimeout, desc="Line is locked under store/use timeout"; DataBlock dataBlk, desc="Local copy of the line"; + DataBlock oldDataBlk, desc="Local copy of the line before executing atomic"; WriteMask dataBlkValid, desc="Marks which bytes in the DataBlock are valid"; bool dataValid, desc="Local copy is valid"; + bool dataAMOValid, desc="Local copy is valid for AMO"; bool dataDirty, desc="Local copy is dirtry"; bool dataMaybeDirtyUpstream, desc="Line maybe dirty upstream"; bool dataUnique, desc="Line is unique either locally or upsatream"; diff --git a/src/mem/ruby/protocol/chi/CHI-msg.sm b/src/mem/ruby/protocol/chi/CHI-msg.sm index f3c2d66363..b9e11d9dd9 100644 --- a/src/mem/ruby/protocol/chi/CHI-msg.sm +++ b/src/mem/ruby/protocol/chi/CHI-msg.sm @@ -46,6 +46,8 @@ enumeration(CHIRequestType, desc="") { Load; Store; StoreLine; + AtomicLoad; + AtomicStore; // Incoming DVM-related requests generated by the sequencer DvmTlbi_Initiate; DvmSync_Initiate; @@ -66,6 +68,9 @@ enumeration(CHIRequestType, desc="") { WriteUniquePtl; WriteUniqueFull; + AtomicReturn; + AtomicNoReturn; + SnpSharedFwd; SnpNotSharedDirtyFwd; SnpUniqueFwd; @@ -108,6 +113,8 @@ structure(CHIRequestMsg, desc="", interface="Message") { bool is_local_pf, desc="Request generated by a local prefetcher"; bool is_remote_pf, desc="Request generated a prefetcher in another cache"; + WriteMask atomic_op, desc="Atomic Operation Wrapper"; + bool usesTxnId, desc="True if using a Transaction ID", default="false"; Addr txnId, desc="Transaction ID", default="0"; diff --git a/src/mem/ruby/slicc_interface/RubyRequest.cc b/src/mem/ruby/slicc_interface/RubyRequest.cc index 643c1dec6f..c6faf2d76f 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.cc +++ b/src/mem/ruby/slicc_interface/RubyRequest.cc @@ -123,5 +123,14 @@ RubyRequest::functionalWrite(Packet *pkt) return cBase < cTail; } +void +RubyRequest::setWriteMask(uint32_t offset, uint32_t len, + std::vector< std::pair> atomicOps) +{ + m_writeMask.setMask(offset, len); + m_writeMask.setAtomicOps(atomicOps); +} + + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index 89ce83451e..1e9674b9f5 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -226,6 +226,8 @@ class RubyRequest : public Message const PrefetchBit& getPrefetch() const { return m_Prefetch; } RequestPtr getRequestPtr() const { return m_pkt->req; } + void setWriteMask(uint32_t offset, uint32_t len, + std::vector< std::pair> atomicOps); void print(std::ostream& out) const; bool functionalRead(Packet *pkt); bool functionalRead(Packet *pkt, WriteMask &mask); diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 82fc19b57c..48054febef 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -466,8 +466,12 @@ Sequencer::writeCallback(Addr address, DataBlock& data, bool ruby_request = true; while (!seq_req_list.empty()) { SequencerRequest &seq_req = seq_req_list.front(); + // Atomic Request may be executed remotly in the cache hierarchy + bool atomic_req = + ((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) || + (seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN)); - if (noCoales && !ruby_request) { + if ((noCoales || atomic_req) && !ruby_request) { // Do not process follow-up requests // (e.g. if full line no present) // Reissue to the cache hierarchy @@ -479,6 +483,8 @@ Sequencer::writeCallback(Addr address, DataBlock& data, assert(seq_req.m_type != RubyRequestType_LD); assert(seq_req.m_type != RubyRequestType_Load_Linked); assert(seq_req.m_type != RubyRequestType_IFETCH); + assert(seq_req.m_type != RubyRequestType_ATOMIC_RETURN); + assert(seq_req.m_type != RubyRequestType_ATOMIC_NO_RETURN); } // handle write request @@ -594,6 +600,62 @@ Sequencer::readCallback(Addr address, DataBlock& data, } } +void +Sequencer::atomicCallback(Addr address, DataBlock& data, + const bool externalHit, const MachineType mach, + const Cycles initialRequestTime, + const Cycles forwardRequestTime, + const Cycles firstResponseTime) +{ + // + // Free the first request (an atomic operation) from the list. + // Then issue the next request to ruby system as we cannot + // assume the cache line is present in the cache + // (the opperation could be performed remotly) + // + assert(address == makeLineAddress(address)); + assert(m_RequestTable.find(address) != m_RequestTable.end()); + auto &seq_req_list = m_RequestTable[address]; + + // Perform hitCallback only on the first cpu request that + // issued the ruby request + bool ruby_request = true; + while (!seq_req_list.empty()) { + SequencerRequest &seq_req = seq_req_list.front(); + + if (ruby_request) { + // Check that the request was an atomic memory operation + // and record the latency + assert((seq_req.m_type == RubyRequestType_ATOMIC_RETURN) || + (seq_req.m_type == RubyRequestType_ATOMIC_NO_RETURN)); + recordMissLatency(&seq_req, true, mach, externalHit, + initialRequestTime, forwardRequestTime, + firstResponseTime); + } else { + // Read, Write or Atomic request: + // reissue request to the cache hierarchy + // (we don't know if op was performed remotly) + issueRequest(seq_req.pkt, seq_req.m_second_type); + break; + } + + // Atomics clean the monitor entry + llscClearMonitor(address); + + markRemoved(); + ruby_request = false; + hitCallback(&seq_req, data, true, mach, externalHit, + initialRequestTime, forwardRequestTime, + firstResponseTime, false); + seq_req_list.pop_front(); + } + + // free all outstanding requests corresponding to this address + if (seq_req_list.empty()) { + m_RequestTable.erase(address); + } +} + void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, bool llscSuccess, @@ -637,10 +699,16 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || (type == RubyRequestType_Locked_RMW_Read) || - (type == RubyRequestType_Load_Linked)) { + (type == RubyRequestType_Load_Linked) || + (type == RubyRequestType_ATOMIC_RETURN)) { pkt->setData( data.getData(getOffset(request_address), pkt->getSize())); - DPRINTF(RubySequencer, "read data %s\n", data); + + if (type == RubyRequestType_ATOMIC_RETURN) { + DPRINTF(RubySequencer, "ATOMIC RETURN data %s\n", data); + } else { + DPRINTF(RubySequencer, "read data %s\n", data); + } } else if (pkt->req->isSwap()) { assert(!pkt->isMaskedWrite()); std::vector overwrite_val(pkt->getSize()); @@ -807,6 +875,19 @@ Sequencer::makeRequest(PacketPtr pkt) } else if (pkt->req->isTlbiCmd()) { primary_type = secondary_type = tlbiCmdToRubyRequestType(pkt); DPRINTF(RubySequencer, "Issuing TLBI\n"); +#if defined (PROTOCOL_CHI) + } else if (pkt->isAtomicOp()) { + if (pkt->req->isAtomicReturn()){ + DPRINTF(RubySequencer, "Issuing ATOMIC RETURN \n"); + primary_type = secondary_type = + RubyRequestType_ATOMIC_RETURN; + } else { + DPRINTF(RubySequencer, "Issuing ATOMIC NO RETURN\n"); + primary_type = secondary_type = + RubyRequestType_ATOMIC_NO_RETURN; + + } +#endif } else { // // To support SwapReq, we need to check isWrite() first: a SwapReq @@ -914,6 +995,18 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, core_id); + if (pkt->isAtomicOp() && + ((secondary_type == RubyRequestType_ATOMIC_RETURN) || + (secondary_type == RubyRequestType_ATOMIC_NO_RETURN))){ + // Create the blocksize, access mask and atomicops + uint32_t offset = getOffset(pkt->getAddr()); + std::vector> atomicOps; + atomicOps.push_back(std::make_pair + (offset, pkt->getAtomicOp())); + + msg->setWriteMask(offset, pkt->getSize(), atomicOps); + } + DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n", curTick(), m_version, "Seq", "Begin", "", "", printAddress(msg->getPhysicalAddress()), diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 020a7d8c20..8f736da6d5 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -126,6 +126,14 @@ class Sequencer : public RubyPort const Cycles forwardRequestTime = Cycles(0), const Cycles firstResponseTime = Cycles(0)); + void atomicCallback(Addr address, + DataBlock& data, + const bool externalHit = false, + const MachineType mach = MachineType_NUM, + const Cycles initialRequestTime = Cycles(0), + const Cycles forwardRequestTime = Cycles(0), + const Cycles firstResponseTime = Cycles(0)); + void unaddressedCallback(Addr unaddressedReqId, RubyRequestType requestType, const MachineType mach = MachineType_NUM,