diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc index 70d9bc332a..8f47d0026b 100644 --- a/src/mem/ruby/common/DataBlock.cc +++ b/src/mem/ruby/common/DataBlock.cc @@ -110,12 +110,13 @@ DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask) } void -DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask) +DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, + bool isAtomicNoReturn) { for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { m_data[i] = dblk.m_data[i]; } - mask.performAtomic(m_data, m_atomicLog); + mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn); } void diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index aa94f56eb8..7456a25f3f 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -96,7 +96,8 @@ class DataBlock void setData(PacketPtr pkt); void copyPartial(const DataBlock &dblk, int offset, int len); void copyPartial(const DataBlock &dblk, const WriteMask &mask); - void atomicPartial(const DataBlock & dblk, const WriteMask & mask); + void atomicPartial(const DataBlock & dblk, const WriteMask & mask, + bool isAtomicNoReturn=true); bool equal(const DataBlock& obj) const; void print(std::ostream& out) const; diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc index 911262b4ba..1fa03c951e 100644 --- a/src/mem/ruby/common/WriteMask.cc +++ b/src/mem/ruby/common/WriteMask.cc @@ -57,7 +57,7 @@ WriteMask::print(std::ostream& out) const void WriteMask::performAtomic(uint8_t * p, - std::deque& log) const + std::deque& log, bool isAtomicNoReturn) const { int offset; uint8_t *block_update; @@ -65,11 +65,13 @@ WriteMask::performAtomic(uint8_t * p, // vector. This is done to match the ordering of packets // that was seen when the initial coalesced request was created. for (int i = 0; i < mAtomicOp.size(); i++) { - // Save the old value of the data block in case a - // return value is needed - block_update = new uint8_t[mSize]; - std::memcpy(block_update, p, mSize); - log.push_back(block_update); + if (!isAtomicNoReturn) { + // Save the old value of the data block in case a + // return value is needed + block_update = new uint8_t[mSize]; + std::memcpy(block_update, p, mSize); + log.push_back(block_update); + } // Perform the atomic operation offset = mAtomicOp[i].first; AtomicOpFunctor *fnctr = mAtomicOp[i].second; diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh index 47ec798500..8c6b8ce976 100644 --- a/src/mem/ruby/common/WriteMask.hh +++ b/src/mem/ruby/common/WriteMask.hh @@ -230,7 +230,8 @@ class WriteMask * specific atomic operation. */ void performAtomic(uint8_t * p, - std::deque& atomicChangeLog) const; + std::deque& atomicChangeLog, + bool isAtomicNoReturn=true) const; const AtomicOpVector& getAtomicOps() const diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index dfab0ed29a..7cb3a00e26 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -369,7 +369,9 @@ machine(MachineType:TCC, "TCC Cache") } else { trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe); } - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { // If the request is system-level, if the address isn't in the cache, // or if this cache is write-through, then send the request to the // directory. Since non-SLC atomics won't be performed by the directory, @@ -612,12 +614,17 @@ machine(MachineType:TCC, "TCC Cache") } if (coreRequestNetwork_in.isReady(clockEdge())) { peek(coreRequestNetwork_in, CPURequestMsg) { - if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){ + if(in_msg.Type == CoherenceRequestType:RdBlk || + in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn){ tbe.Destination.add(in_msg.Requestor); } tbe.isGLCSet := in_msg.isGLCSet; tbe.isSLCSet := in_msg.isSLCSet; - if(in_msg.Type == CoherenceRequestType:Atomic){ + if(in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn){ tbe.atomicWriteMask.clear(); tbe.atomicWriteMask.orMask(in_msg.writeMask); } @@ -715,7 +722,7 @@ machine(MachineType:TCC, "TCC Cache") out_msg.WTRequestor := in_msg.Requestor; out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory)); out_msg.MessageSize := MessageSizeType:Data; - out_msg.Type := CoherenceRequestType:Atomic; + out_msg.Type := in_msg.Type; out_msg.Dirty := true; out_msg.writeMask.orMask(in_msg.writeMask); out_msg.isGLCSet := in_msg.isGLCSet; @@ -801,7 +808,16 @@ machine(MachineType:TCC, "TCC Cache") } action(pa_performAtomic, "pa", desc="Perform atomic") { - cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask); + peek(coreRequestNetwork_in, CPURequestMsg) { + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, false); + } else { + // Set the isAtomicNoReturn flag to ensure that logs are not + // generated erroneously + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, true); + } + } } // END ACTIONS diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 5530e6dcb3..ee49d859a8 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -441,10 +441,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, TCC_select_low_bit, TCC_select_num_bits)); out_msg.MessageSize := MessageSizeType:Data; - out_msg.Type := CoherenceRequestType:Atomic; out_msg.InitialRequestTime := curCycle(); out_msg.Shared := false; peek(mandatoryQueue_in, RubyRequest) { + if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) { + out_msg.Type := CoherenceRequestType:AtomicReturn; + } else { + assert(in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN); + out_msg.Type := CoherenceRequestType:AtomicNoReturn; + } out_msg.instSeqNum := in_msg.instSeqNum; out_msg.isGLCSet := in_msg.isGLCSet; out_msg.isSLCSet := in_msg.isSLCSet; diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm index ec8ffe6325..2f5103f846 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-Region-dir.sm @@ -180,6 +180,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol") bool MemData, desc="Got MemData?",default="false"; bool wtData, desc="Got write through data?",default="false"; bool atomicData, desc="Got Atomic op?",default="false"; + // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn; + bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false"; + bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false"; Cycles InitialRequestTime, desc="..."; Cycles ForwardRequestTime, desc="..."; Cycles ProbeRequestStartTime, desc="..."; @@ -436,7 +439,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol") trigger(Event:RdBlkS, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { trigger(Event:RdBlkM, in_msg.addr, entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { trigger(Event:Atomic, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { trigger(Event:WriteThrough, in_msg.addr, entry, tbe); @@ -474,7 +479,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol") trigger(Event:RdBlkSP, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { trigger(Event:RdBlkMP, in_msg.addr, entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { trigger(Event:AtomicP, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { trigger(Event:WriteThroughP, in_msg.addr, entry, tbe); @@ -670,7 +677,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol") out_msg.DemandRequest := false; } } else { - assert(in_msg.Type == CoherenceRequestType:Atomic); + assert(in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn); enqueue(responseNetwork_out, ResponseMsg, response_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:NBSysResp; @@ -977,10 +986,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol") tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } - if (in_msg.Type == CoherenceRequestType:Atomic) { + if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { tbe.writeMask.clear(); tbe.writeMask.orMask(in_msg.writeMask); tbe.atomicData := true; + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + tbe.atomicDataReturn := true; + } else { + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + tbe.atomicDataNoReturn := true; + } tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } @@ -1012,10 +1029,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol") tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } - if (in_msg.Type == CoherenceRequestType:Atomic) { + if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { tbe.writeMask.clear(); tbe.writeMask.orMask(in_msg.writeMask); tbe.atomicData := true; + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + tbe.atomicDataReturn := true; + } else { + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + tbe.atomicDataNoReturn := true; + } tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } @@ -1062,8 +1087,15 @@ machine(MachineType:Directory, "AMD_Base-like protocol") tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask); getDirectoryEntry(address).DataBlk := tbe.DataBlkAux; } else{ - assert(in_msg.Type == CoherenceRequestType:Atomic); - tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask); + assert(in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, false); + } else { + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, true); + } getDirectoryEntry(address).DataBlk := tbe.DataBlkAux; } } @@ -1076,7 +1108,12 @@ machine(MachineType:Directory, "AMD_Base-like protocol") tbe.DataBlk := tmp; getDirectoryEntry(address).DataBlk := tbe.DataBlk; } else if (tbe.atomicData) { - tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask); + if (tbe.atomicDataReturn) { + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, false); + } else { + assert(tbe.atomicDataNoReturn); + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, true); + } getDirectoryEntry(address).DataBlk := tbe.DataBlk; } else if (tbe.Dirty == true) { APPEND_TRANSITION_COMMENT(" Wrote data back "); @@ -1137,6 +1174,7 @@ machine(MachineType:Directory, "AMD_Base-like protocol") tbe.DataBlk := tmp; } else if (tbe.Dirty) { if(tbe.atomicData == false && tbe.wtData == false) { + assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn); DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm index 5987d7cf76..5d85ad2fc6 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -458,7 +458,9 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WriteThrough ) { trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:Atomic ) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe); } else { if (in_msg.Type == CoherenceRequestType:VicDirty || @@ -523,9 +525,11 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS); } APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs); - if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic || - in_msg.Type == CoherenceRequestType:WriteThrough ) - { + if (in_msg.Type == CoherenceRequestType:RdBlkM || + in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn || + in_msg.Type == CoherenceRequestType:WriteThrough) { cache_entry.dirty := true; } if (in_msg.Type == CoherenceRequestType:VicDirty || diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index c3bbfa1950..b9401d680a 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -159,6 +159,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") bool MemData, desc="Got MemData?",default="false"; bool wtData, desc="Got write through data?",default="false"; bool atomicData, desc="Got Atomic op?",default="false"; + // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn; + bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false"; + bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false"; Cycles InitialRequestTime, desc="..."; Cycles ForwardRequestTime, desc="..."; Cycles ProbeRequestStartTime, desc="..."; @@ -399,7 +402,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") trigger(Event:RdBlkM, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { trigger(Event:WriteThrough, in_msg.addr, entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { trigger(Event:Atomic, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:VicDirty) { if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { @@ -743,7 +748,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") // CPU + GPU or GPU only system if ((in_msg.Type != CoherenceRequestType:WriteThrough && - in_msg.Type != CoherenceRequestType:Atomic) || + in_msg.Type != CoherenceRequestType:Atomic && + in_msg.Type != CoherenceRequestType:AtomicReturn && + in_msg.Type != CoherenceRequestType:AtomicNoReturn) || !in_msg.NoWriteConflict) { if (noTCCdir) { probe_dests.add(mapAddressToRange(address, MachineType:TCC, @@ -1013,10 +1020,18 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } - if (in_msg.Type == CoherenceRequestType:Atomic) { + if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { tbe.writeMask.clear(); tbe.writeMask.orMask(in_msg.writeMask); tbe.atomicData := true; + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + tbe.atomicDataReturn := true; + } else { + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + tbe.atomicDataNoReturn := true; + } tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; tbe.isSLCSet := in_msg.isSLCSet; @@ -1043,7 +1058,12 @@ machine(MachineType:Directory, "AMD Baseline protocol") // Only perform atomics in the directory if the SLC bit is set, or // if the L2 is WT if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) { - tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask); + if (tbe.atomicDataReturn) { + tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, false); + } else { + assert(tbe.atomicDataNoReturn); + tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, true); + } } enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) { out_msg.addr := address; @@ -1083,6 +1103,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.writeMask.fillMask(); } else if (tbe.Dirty) { if(tbe.atomicData == false && tbe.wtData == false) { + assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn == false); DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm index bb3a013325..984362da39 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm @@ -39,6 +39,8 @@ enumeration(CoherenceRequestType, desc="Coherence Request Types") { VicClean, desc="L2 clean eviction"; VicDirty, desc="L2 dirty eviction"; Atomic, desc="Upper level atomic"; + AtomicReturn, desc="Upper level atomic"; + AtomicNoReturn, desc="Upper level atomic"; AtomicWriteBack, desc="Upper level atomic"; WriteThrough, desc="Ordered WriteThrough w/Data"; WriteThroughFifo, desc="WriteThrough with no data"; diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm index 5e815a7165..4e9e9597aa 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm @@ -170,6 +170,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") bool MemData, desc="Got MemData?",default="false"; bool wtData, desc="Got write through data?",default="false"; bool atomicData, desc="Got Atomic op?",default="false"; + // Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn; + bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false"; + bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false"; Cycles InitialRequestTime, desc="..."; Cycles ForwardRequestTime, desc="..."; Cycles ProbeRequestStartTime, desc="..."; @@ -451,7 +454,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") trigger(Event:RdBlkM, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { trigger(Event:WriteThrough, in_msg.addr, entry, tbe); - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { trigger(Event:Atomic, in_msg.addr, entry, tbe); } else if (in_msg.Type == CoherenceRequestType:VicDirty) { if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { @@ -656,7 +661,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") // add relevant TCC node to list. This replaces all TCPs and SQCs if(isGPUSharer(address)) { if ((in_msg.Type == CoherenceRequestType:WriteThrough || - in_msg.Type == CoherenceRequestType:Atomic) && + in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) && in_msg.NoWriteConflict) { // Don't Include TCCs unless there was write-CAB conflict in the TCC } else if(noTCCdir) { @@ -814,6 +821,8 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.writeMask.clear(); tbe.wtData := false; tbe.atomicData := false; + tbe.atomicDataReturn := false; + tbe.atomicDataNoReturn := false; tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs tbe.Dirty := false; tbe.NumPendingAcks := 0; @@ -831,10 +840,18 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } - if (in_msg.Type == CoherenceRequestType:Atomic) { + if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { tbe.writeMask.clear(); tbe.writeMask.orMask(in_msg.writeMask); tbe.atomicData := true; + if (in_msg.Type == CoherenceRequestType:AtomicReturn) { + tbe.atomicDataReturn = true; + } else { + assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn); + tbe.atomicDataNoReturn = true; + } tbe.WTRequestor := in_msg.WTRequestor; tbe.LastSender := in_msg.Requestor; } @@ -866,8 +883,14 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.DataBlk := tmp; getDirectoryEntry(address).DataBlk := tbe.DataBlk; } else if (tbe.atomicData) { - tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk, - tbe.writeMask); + if (tbe.atomicDataReturn) { + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk, + tbe.writeMask, false); + } else { + assert(tbe.atomicDataNoReturn); + tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk, + tbe.writeMask, true); + } getDirectoryEntry(address).DataBlk := tbe.DataBlk; } else if (tbe.Dirty == false) { getDirectoryEntry(address).DataBlk := tbe.DataBlk; @@ -896,6 +919,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") tbe.DataBlk := tmp; } else if (tbe.Dirty) { if(tbe.atomicData == false && tbe.wtData == false) { + assert(atomicDataReturn == false && atomicDataNoReturn); DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data } @@ -1050,7 +1074,9 @@ machine(MachineType:Directory, "AMD Baseline protocol") entry.pfState := ProbeFilterState:T; entry.isOnCPU := false; entry.isOnGPU := false; - } else if (in_msg.Type == CoherenceRequestType:Atomic) { + } else if (in_msg.Type == CoherenceRequestType:Atomic || + in_msg.Type == CoherenceRequestType:AtomicReturn || + in_msg.Type == CoherenceRequestType:AtomicNoReturn) { entry.pfState := ProbeFilterState:T; entry.isOnCPU := false; entry.isOnGPU := false; diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index 9ccafba41f..0d2bc742f9 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -77,6 +77,7 @@ structure(DataBlock, external = "yes", desc="..."){ void copyPartial(DataBlock, int, int); void copyPartial(DataBlock, WriteMask); void atomicPartial(DataBlock, WriteMask); + void atomicPartial(DataBlock, WriteMask, bool); int numAtomicLogEntries(); void clearAtomicLogEntries(); }