diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc index f70aa79fd4..70d9bc332a 100644 --- a/src/mem/ruby/common/DataBlock.cc +++ b/src/mem/ruby/common/DataBlock.cc @@ -51,9 +51,19 @@ namespace ruby DataBlock::DataBlock(const DataBlock &cp) { - m_data = new uint8_t[RubySystem::getBlockSizeBytes()]; - memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes()); + uint8_t *block_update; + size_t block_bytes = RubySystem::getBlockSizeBytes(); + m_data = new uint8_t[block_bytes]; + memcpy(m_data, cp.m_data, block_bytes); m_alloc = true; + // If this data block is involved in an atomic operation, the effect + // of applying the atomic operations on the data block are recorded in + // m_atomicLog. If so, we must copy over every entry in the change log + for (size_t i = 0; i < cp.m_atomicLog.size(); i++) { + block_update = new uint8_t[block_bytes]; + memcpy(block_update, cp.m_atomicLog[i], block_bytes); + m_atomicLog.push_back(block_update); + } } void @@ -73,7 +83,20 @@ DataBlock::clear() bool DataBlock::equal(const DataBlock& obj) const { - return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes()); + size_t block_bytes = RubySystem::getBlockSizeBytes(); + // Check that the block contents match + if (memcmp(m_data, obj.m_data, block_bytes)) { + return false; + } + if (m_atomicLog.size() != obj.m_atomicLog.size()) { + return false; + } + for (size_t i = 0; i < m_atomicLog.size(); i++) { + if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) { + return false; + } + } + return true; } void @@ -92,7 +115,7 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask) for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { m_data[i] = dblk.m_data[i]; } - mask.performAtomic(m_data); + mask.performAtomic(m_data, m_atomicLog); } void @@ -107,6 +130,28 @@ DataBlock::print(std::ostream& out) const out << std::dec << "]" << std::flush; } +int +DataBlock::numAtomicLogEntries() const +{ + return m_atomicLog.size(); +} +uint8_t* +DataBlock::popAtomicLogEntryFront() +{ + assert(m_atomicLog.size() > 0); + auto ret = m_atomicLog.front(); + m_atomicLog.pop_front(); + return ret; +} +void +DataBlock::clearAtomicLogEntries() +{ + for (auto log : m_atomicLog) { + delete [] log; + } + m_atomicLog.clear(); +} + const uint8_t* DataBlock::getData(int offset, int len) const { @@ -137,7 +182,18 @@ DataBlock::setData(PacketPtr pkt) DataBlock & DataBlock::operator=(const DataBlock & obj) { - memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes()); + uint8_t *block_update; + size_t block_bytes = RubySystem::getBlockSizeBytes(); + // Copy entire block contents from obj to current block + memcpy(m_data, obj.m_data, block_bytes); + // If this data block is involved in an atomic operation, the effect + // of applying the atomic operations on the data block are recorded in + // m_atomicLog. If so, we must copy over every entry in the change log + for (size_t i = 0; i < obj.m_atomicLog.size(); i++) { + block_update = new uint8_t[block_bytes]; + memcpy(block_update, obj.m_atomicLog[i], block_bytes); + m_atomicLog.push_back(block_update); + } return *this; } diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index e147d701c5..aa94f56eb8 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -71,6 +72,12 @@ class DataBlock { if (m_alloc) delete [] m_data; + + // If data block involved in atomic + // operations, free all meta data + for (auto log : m_atomicLog) { + delete [] log; + } } DataBlock& operator=(const DataBlock& obj); @@ -80,6 +87,9 @@ class DataBlock void clear(); uint8_t getByte(int whichByte) const; const uint8_t *getData(int offset, int len) const; + uint8_t* popAtomicLogEntryFront(); + int numAtomicLogEntries() const; + void clearAtomicLogEntries(); uint8_t *getDataMod(int offset); void setByte(int whichByte, uint8_t data); void setData(const uint8_t *data, int offset, int len); @@ -94,6 +104,9 @@ class DataBlock void alloc(); uint8_t *m_data; bool m_alloc; + + // Tracks block changes when atomic ops are applied + std::deque m_atomicLog; }; inline void diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc index 4c24a64706..911262b4ba 100644 --- a/src/mem/ruby/common/WriteMask.cc +++ b/src/mem/ruby/common/WriteMask.cc @@ -55,5 +55,27 @@ WriteMask::print(std::ostream& out) const << std::flush; } +void +WriteMask::performAtomic(uint8_t * p, + std::deque& log) const +{ + int offset; + uint8_t *block_update; + // Here, operations occur in FIFO order from the mAtomicOp + // vector. This is done to match the ordering of packets + // that was seen when the initial coalesced request was created. + for (int i = 0; i < mAtomicOp.size(); i++) { + // Save the old value of the data block in case a + // return value is needed + block_update = new uint8_t[mSize]; + std::memcpy(block_update, p, mSize); + log.push_back(block_update); + // Perform the atomic operation + offset = mAtomicOp[i].first; + AtomicOpFunctor *fnctr = mAtomicOp[i].second; + (*fnctr)(&p[offset]); + } +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh index 2de21da79b..47ec798500 100644 --- a/src/mem/ruby/common/WriteMask.hh +++ b/src/mem/ruby/common/WriteMask.hh @@ -222,26 +222,15 @@ class WriteMask void print(std::ostream& out) const; - void - performAtomic(uint8_t * p) const - { - for (int i = 0; i < mAtomicOp.size(); i++) { - int offset = mAtomicOp[i].first; - AtomicOpFunctor *fnctr = mAtomicOp[i].second; - (*fnctr)(&p[offset]); - } - } - - void - performAtomic(DataBlock & blk) const - { - for (int i = 0; i < mAtomicOp.size(); i++) { - int offset = mAtomicOp[i].first; - uint8_t *p = blk.getDataMod(offset); - AtomicOpFunctor *fnctr = mAtomicOp[i].second; - (*fnctr)(p); - } - } + /* + * Performs atomic operations on the data block pointed to by p. The + * atomic operations to perform are in the vector mAtomicOp. The + * effect of each atomic operation is pushed to the atomicChangeLog + * so that each individual atomic requestor may see the results of their + * specific atomic operation. + */ + void performAtomic(uint8_t * p, + std::deque& atomicChangeLog) const; const AtomicOpVector& getAtomicOps() const diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index 31fc484973..20a0979af1 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -523,6 +523,7 @@ machine(MachineType:TCC, "TCC Cache") out_msg.isSLCSet := in_msg.isSLCSet; } } + cache_entry.DataBlk.clearAtomicLogEntries(); } action(bar_sendBypassedAtomicResponse, "bar", desc="send bypassed Atomic Ack") { diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index a32983ada4..2e496a8221 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -77,6 +77,8 @@ structure(DataBlock, external = "yes", desc="..."){ void copyPartial(DataBlock, int, int); void copyPartial(DataBlock, WriteMask); void atomicPartial(DataBlock, WriteMask); + int numAtomicLogEntries(); + void clearAtomicLogEntries(); } bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt); diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 8bde3f7bc8..beb8da3f9c 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -554,25 +554,48 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, success, isRegion); // update the data // - // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER + // MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER std::vector pktList = crequest->getPackets(); + + uint8_t* log = nullptr; DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n", pktList.size(), request_line_address); + uint32_t offset; + int pkt_size; for (auto& pkt : pktList) { - request_address = pkt->getAddr(); + offset = getOffset(pkt->getAddr()); + pkt_size = pkt->getSize(); if (pkt->getPtr()) { - if ((type == RubyRequestType_LD) || - (type == RubyRequestType_ATOMIC) || - (type == RubyRequestType_ATOMIC_RETURN) || - (type == RubyRequestType_IFETCH) || - (type == RubyRequestType_RMW_Read) || - (type == RubyRequestType_Locked_RMW_Read) || - (type == RubyRequestType_Load_Linked)) { - pkt->setData( - data.getData(getOffset(request_address), pkt->getSize())); - } else { - data.setData(pkt->getPtr(), - getOffset(request_address), pkt->getSize()); + switch(type) { + // Store and AtomicNoReturns follow the same path, as the + // data response is not needed. + case RubyRequestType_ATOMIC_NO_RETURN: + assert(pkt->isAtomicOp()); + case RubyRequestType_ST: + data.setData(pkt->getPtr(), offset, pkt_size); + break; + case RubyRequestType_LD: + pkt->setData(data.getData(offset, pkt_size)); + break; + case RubyRequestType_ATOMIC_RETURN: + assert(pkt->isAtomicOp()); + // Atomic operations are performed by the WriteMask + // in packet order, set by the crequest. Thus, when + // unpacking the changes from the log, we read from + // the front of the log to correctly map response + // data into the packets. + + // Log entry contains the old value before the current + // atomic operation occurred. + log = data.popAtomicLogEntryFront(); + pkt->setData(&log[offset]); + delete [] log; + log = nullptr; + break; + default: + panic("Unsupported ruby packet type:%s\n", + RubyRequestType_to_string(type)); + break; } } else { DPRINTF(MemoryAccess, @@ -581,6 +604,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, RubyRequestType_to_string(type)); } } + assert(data.numAtomicLogEntries() == 0); m_outstanding_count--; assert(m_outstanding_count >= 0);