mem-ruby: Fix for not creating log entries on atomic no return requests (#546)

Augmenting Datablock and WriteMask to support optional arg to
distinguish between return and no return. In the case of atomic no
return requests, log entries should not be created when performing the
atomic.

Change-Id: Ic3112834742f4058a7aa155d25ccc4c014b60199a
This commit is contained in:
BujSet
2023-11-14 09:54:42 -06:00
committed by GitHub
parent be5c03ea9f
commit 65b44e6516
12 changed files with 156 additions and 38 deletions

View File

@@ -110,12 +110,13 @@ DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask)
}
void
DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
bool isAtomicNoReturn)
{
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
m_data[i] = dblk.m_data[i];
}
mask.performAtomic(m_data, m_atomicLog);
mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn);
}
void

View File

@@ -96,7 +96,8 @@ class DataBlock
void setData(PacketPtr pkt);
void copyPartial(const DataBlock &dblk, int offset, int len);
void copyPartial(const DataBlock &dblk, const WriteMask &mask);
void atomicPartial(const DataBlock & dblk, const WriteMask & mask);
void atomicPartial(const DataBlock & dblk, const WriteMask & mask,
bool isAtomicNoReturn=true);
bool equal(const DataBlock& obj) const;
void print(std::ostream& out) const;

View File

@@ -57,7 +57,7 @@ WriteMask::print(std::ostream& out) const
void
WriteMask::performAtomic(uint8_t * p,
std::deque<uint8_t*>& log) const
std::deque<uint8_t*>& log, bool isAtomicNoReturn) const
{
int offset;
uint8_t *block_update;
@@ -65,11 +65,13 @@ WriteMask::performAtomic(uint8_t * p,
// vector. This is done to match the ordering of packets
// that was seen when the initial coalesced request was created.
for (int i = 0; i < mAtomicOp.size(); i++) {
// Save the old value of the data block in case a
// return value is needed
block_update = new uint8_t[mSize];
std::memcpy(block_update, p, mSize);
log.push_back(block_update);
if (!isAtomicNoReturn) {
// Save the old value of the data block in case a
// return value is needed
block_update = new uint8_t[mSize];
std::memcpy(block_update, p, mSize);
log.push_back(block_update);
}
// Perform the atomic operation
offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;

View File

@@ -230,7 +230,8 @@ class WriteMask
* specific atomic operation.
*/
void performAtomic(uint8_t * p,
std::deque<uint8_t*>& atomicChangeLog) const;
std::deque<uint8_t*>& atomicChangeLog,
bool isAtomicNoReturn=true) const;
const AtomicOpVector&
getAtomicOps() const

View File

@@ -369,7 +369,9 @@ machine(MachineType:TCC, "TCC Cache")
} else {
trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
}
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
// If the request is system-level, if the address isn't in the cache,
// or if this cache is write-through, then send the request to the
// directory. Since non-SLC atomics won't be performed by the directory,
@@ -612,12 +614,17 @@ machine(MachineType:TCC, "TCC Cache")
}
if (coreRequestNetwork_in.isReady(clockEdge())) {
peek(coreRequestNetwork_in, CPURequestMsg) {
if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
if(in_msg.Type == CoherenceRequestType:RdBlk ||
in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn){
tbe.Destination.add(in_msg.Requestor);
}
tbe.isGLCSet := in_msg.isGLCSet;
tbe.isSLCSet := in_msg.isSLCSet;
if(in_msg.Type == CoherenceRequestType:Atomic){
if(in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn){
tbe.atomicWriteMask.clear();
tbe.atomicWriteMask.orMask(in_msg.writeMask);
}
@@ -715,7 +722,7 @@ machine(MachineType:TCC, "TCC Cache")
out_msg.WTRequestor := in_msg.Requestor;
out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
out_msg.MessageSize := MessageSizeType:Data;
out_msg.Type := CoherenceRequestType:Atomic;
out_msg.Type := in_msg.Type;
out_msg.Dirty := true;
out_msg.writeMask.orMask(in_msg.writeMask);
out_msg.isGLCSet := in_msg.isGLCSet;
@@ -801,7 +808,16 @@ machine(MachineType:TCC, "TCC Cache")
}
action(pa_performAtomic, "pa", desc="Perform atomic") {
cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask);
peek(coreRequestNetwork_in, CPURequestMsg) {
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, false);
} else {
// Set the isAtomicNoReturn flag to ensure that logs are not
// generated erroneously
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
cache_entry.DataBlk.atomicPartial(cache_entry.DataBlk, cache_entry.writeMask, true);
}
}
}
// END ACTIONS

View File

@@ -441,10 +441,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC,
TCC_select_low_bit, TCC_select_num_bits));
out_msg.MessageSize := MessageSizeType:Data;
out_msg.Type := CoherenceRequestType:Atomic;
out_msg.InitialRequestTime := curCycle();
out_msg.Shared := false;
peek(mandatoryQueue_in, RubyRequest) {
if (in_msg.Type == RubyRequestType:ATOMIC_RETURN) {
out_msg.Type := CoherenceRequestType:AtomicReturn;
} else {
assert(in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN);
out_msg.Type := CoherenceRequestType:AtomicNoReturn;
}
out_msg.instSeqNum := in_msg.instSeqNum;
out_msg.isGLCSet := in_msg.isGLCSet;
out_msg.isSLCSet := in_msg.isSLCSet;

View File

@@ -180,6 +180,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
bool MemData, desc="Got MemData?",default="false";
bool wtData, desc="Got write through data?",default="false";
bool atomicData, desc="Got Atomic op?",default="false";
// Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
Cycles InitialRequestTime, desc="...";
Cycles ForwardRequestTime, desc="...";
Cycles ProbeRequestStartTime, desc="...";
@@ -436,7 +439,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
trigger(Event:RdBlkS, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
trigger(Event:Atomic, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
@@ -474,7 +479,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
trigger(Event:RdBlkSP, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:RdBlkM) {
trigger(Event:RdBlkMP, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
trigger(Event:AtomicP, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
trigger(Event:WriteThroughP, in_msg.addr, entry, tbe);
@@ -670,7 +677,9 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
out_msg.DemandRequest := false;
}
} else {
assert(in_msg.Type == CoherenceRequestType:Atomic);
assert(in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn);
enqueue(responseNetwork_out, ResponseMsg, response_latency) {
out_msg.addr := address;
out_msg.Type := CoherenceResponseType:NBSysResp;
@@ -977,10 +986,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
if (in_msg.Type == CoherenceRequestType:Atomic) {
if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
tbe.writeMask.clear();
tbe.writeMask.orMask(in_msg.writeMask);
tbe.atomicData := true;
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
tbe.atomicDataReturn := true;
} else {
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
tbe.atomicDataNoReturn := true;
}
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
@@ -1012,10 +1029,18 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
if (in_msg.Type == CoherenceRequestType:Atomic) {
if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
tbe.writeMask.clear();
tbe.writeMask.orMask(in_msg.writeMask);
tbe.atomicData := true;
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
tbe.atomicDataReturn := true;
} else {
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
tbe.atomicDataNoReturn := true;
}
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
@@ -1062,8 +1087,15 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
tbe.DataBlkAux.copyPartial(in_msg.DataBlk,in_msg.writeMask);
getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
} else{
assert(in_msg.Type == CoherenceRequestType:Atomic);
tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask);
assert(in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, false);
} else {
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
tbe.DataBlkAux.atomicPartial(getDirectoryEntry(address).DataBlk,in_msg.writeMask, true);
}
getDirectoryEntry(address).DataBlk := tbe.DataBlkAux;
}
}
@@ -1076,7 +1108,12 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
tbe.DataBlk := tmp;
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
} else if (tbe.atomicData) {
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask);
if (tbe.atomicDataReturn) {
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, false);
} else {
assert(tbe.atomicDataNoReturn);
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,tbe.writeMask, true);
}
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
} else if (tbe.Dirty == true) {
APPEND_TRANSITION_COMMENT(" Wrote data back ");
@@ -1137,6 +1174,7 @@ machine(MachineType:Directory, "AMD_Base-like protocol")
tbe.DataBlk := tmp;
} else if (tbe.Dirty) {
if(tbe.atomicData == false && tbe.wtData == false) {
assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn);
DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
}

View File

@@ -458,7 +458,9 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:WriteThrough ) {
trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:Atomic ) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
trigger(Event:CPUWrite, in_msg.addr, cache_entry, tbe);
} else {
if (in_msg.Type == CoherenceRequestType:VicDirty ||
@@ -523,9 +525,11 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
assert(in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:RdBlkS);
}
APPEND_TRANSITION_COMMENT(cache_entry.NumOutstandingReqs);
if (in_msg.Type == CoherenceRequestType:RdBlkM || in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:WriteThrough )
{
if (in_msg.Type == CoherenceRequestType:RdBlkM ||
in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn ||
in_msg.Type == CoherenceRequestType:WriteThrough) {
cache_entry.dirty := true;
}
if (in_msg.Type == CoherenceRequestType:VicDirty ||

View File

@@ -159,6 +159,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
bool MemData, desc="Got MemData?",default="false";
bool wtData, desc="Got write through data?",default="false";
bool atomicData, desc="Got Atomic op?",default="false";
// Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
Cycles InitialRequestTime, desc="...";
Cycles ForwardRequestTime, desc="...";
Cycles ProbeRequestStartTime, desc="...";
@@ -399,7 +402,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
trigger(Event:Atomic, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:VicDirty) {
if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
@@ -743,7 +748,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
// CPU + GPU or GPU only system
if ((in_msg.Type != CoherenceRequestType:WriteThrough &&
in_msg.Type != CoherenceRequestType:Atomic) ||
in_msg.Type != CoherenceRequestType:Atomic &&
in_msg.Type != CoherenceRequestType:AtomicReturn &&
in_msg.Type != CoherenceRequestType:AtomicNoReturn) ||
!in_msg.NoWriteConflict) {
if (noTCCdir) {
probe_dests.add(mapAddressToRange(address, MachineType:TCC,
@@ -1013,10 +1020,18 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
if (in_msg.Type == CoherenceRequestType:Atomic) {
if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
tbe.writeMask.clear();
tbe.writeMask.orMask(in_msg.writeMask);
tbe.atomicData := true;
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
tbe.atomicDataReturn := true;
} else {
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
tbe.atomicDataNoReturn := true;
}
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
tbe.isSLCSet := in_msg.isSLCSet;
@@ -1043,7 +1058,12 @@ machine(MachineType:Directory, "AMD Baseline protocol")
// Only perform atomics in the directory if the SLC bit is set, or
// if the L2 is WT
if (tbe.atomicData && (tbe.isSLCSet || !L2isWB)) {
tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask);
if (tbe.atomicDataReturn) {
tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, false);
} else {
assert(tbe.atomicDataNoReturn);
tbe.DataBlk.atomicPartial(tbe.DataBlk, tbe.writeMask, true);
}
}
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
@@ -1083,6 +1103,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.writeMask.fillMask();
} else if (tbe.Dirty) {
if(tbe.atomicData == false && tbe.wtData == false) {
assert(tbe.atomicDataReturn == false && tbe.atomicDataNoReturn == false);
DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
}

View File

@@ -39,6 +39,8 @@ enumeration(CoherenceRequestType, desc="Coherence Request Types") {
VicClean, desc="L2 clean eviction";
VicDirty, desc="L2 dirty eviction";
Atomic, desc="Upper level atomic";
AtomicReturn, desc="Upper level atomic";
AtomicNoReturn, desc="Upper level atomic";
AtomicWriteBack, desc="Upper level atomic";
WriteThrough, desc="Ordered WriteThrough w/Data";
WriteThroughFifo, desc="WriteThrough with no data";

View File

@@ -170,6 +170,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
bool MemData, desc="Got MemData?",default="false";
bool wtData, desc="Got write through data?",default="false";
bool atomicData, desc="Got Atomic op?",default="false";
// Note, protocol invariant: atomicData = atomicDataReturn || atomicDataNoReturn;
bool atomicDataReturn, desc="Got Atomic op and need return value?",default="false";
bool atomicDataNoReturn, desc="Got Atomic op and don't need return value?",default="false";
Cycles InitialRequestTime, desc="...";
Cycles ForwardRequestTime, desc="...";
Cycles ProbeRequestStartTime, desc="...";
@@ -451,7 +454,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
trigger(Event:RdBlkM, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:WriteThrough) {
trigger(Event:WriteThrough, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
trigger(Event:Atomic, in_msg.addr, entry, tbe);
} else if (in_msg.Type == CoherenceRequestType:VicDirty) {
if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) {
@@ -656,7 +661,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
// add relevant TCC node to list. This replaces all TCPs and SQCs
if(isGPUSharer(address)) {
if ((in_msg.Type == CoherenceRequestType:WriteThrough ||
in_msg.Type == CoherenceRequestType:Atomic) &&
in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) &&
in_msg.NoWriteConflict) {
// Don't Include TCCs unless there was write-CAB conflict in the TCC
} else if(noTCCdir) {
@@ -814,6 +821,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.writeMask.clear();
tbe.wtData := false;
tbe.atomicData := false;
tbe.atomicDataReturn := false;
tbe.atomicDataNoReturn := false;
tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs
tbe.Dirty := false;
tbe.NumPendingAcks := 0;
@@ -831,10 +840,18 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
if (in_msg.Type == CoherenceRequestType:Atomic) {
if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
tbe.writeMask.clear();
tbe.writeMask.orMask(in_msg.writeMask);
tbe.atomicData := true;
if (in_msg.Type == CoherenceRequestType:AtomicReturn) {
tbe.atomicDataReturn = true;
} else {
assert(in_msg.Type == CoherenceRequestType:AtomicNoReturn);
tbe.atomicDataNoReturn = true;
}
tbe.WTRequestor := in_msg.WTRequestor;
tbe.LastSender := in_msg.Requestor;
}
@@ -866,8 +883,14 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.DataBlk := tmp;
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
} else if (tbe.atomicData) {
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
tbe.writeMask);
if (tbe.atomicDataReturn) {
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
tbe.writeMask, false);
} else {
assert(tbe.atomicDataNoReturn);
tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk,
tbe.writeMask, true);
}
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
} else if (tbe.Dirty == false) {
getDirectoryEntry(address).DataBlk := tbe.DataBlk;
@@ -896,6 +919,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
tbe.DataBlk := tmp;
} else if (tbe.Dirty) {
if(tbe.atomicData == false && tbe.wtData == false) {
assert(atomicDataReturn == false && atomicDataNoReturn);
DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender);
assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data
}
@@ -1050,7 +1074,9 @@ machine(MachineType:Directory, "AMD Baseline protocol")
entry.pfState := ProbeFilterState:T;
entry.isOnCPU := false;
entry.isOnGPU := false;
} else if (in_msg.Type == CoherenceRequestType:Atomic) {
} else if (in_msg.Type == CoherenceRequestType:Atomic ||
in_msg.Type == CoherenceRequestType:AtomicReturn ||
in_msg.Type == CoherenceRequestType:AtomicNoReturn) {
entry.pfState := ProbeFilterState:T;
entry.isOnCPU := false;
entry.isOnGPU := false;

View File

@@ -77,6 +77,7 @@ structure(DataBlock, external = "yes", desc="..."){
void copyPartial(DataBlock, int, int);
void copyPartial(DataBlock, WriteMask);
void atomicPartial(DataBlock, WriteMask);
void atomicPartial(DataBlock, WriteMask, bool);
int numAtomicLogEntries();
void clearAtomicLogEntries();
}