mem: Atomic ops to same address (#200)

Augmenting the DataBlock class with a change log structure to record the
effects of atomic operations on a data block and service these changes
if the atomic operations require return values.

Although the operations are atomic, the coalescer need not send unique
memory requests for each operation. Atomic operations within a wavefront
to the same address are now coalesced into a single memory request. The
response of this request carries all the necessary information to
provide the requesting lanes unique values as a result of their
individual atomic operations. This helps reduce contention for request
and response queues in simulation.

Previously, only the final value of the datablock after all atomic ops
to the same address was visible to the requesting waves. This change
corrects this behavior by allowing each wave to see the effect of this
individual atomic op is a return value is necessary.
This commit is contained in:
Bobby R. Bruce
2023-08-30 23:53:35 -07:00
committed by GitHub
7 changed files with 146 additions and 39 deletions

View File

@@ -51,9 +51,19 @@ namespace ruby
DataBlock::DataBlock(const DataBlock &cp)
{
m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
m_data = new uint8_t[block_bytes];
memcpy(m_data, cp.m_data, block_bytes);
m_alloc = true;
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, cp.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
}
void
@@ -73,7 +83,20 @@ DataBlock::clear()
bool
DataBlock::equal(const DataBlock& obj) const
{
return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Check that the block contents match
if (memcmp(m_data, obj.m_data, block_bytes)) {
return false;
}
if (m_atomicLog.size() != obj.m_atomicLog.size()) {
return false;
}
for (size_t i = 0; i < m_atomicLog.size(); i++) {
if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) {
return false;
}
}
return true;
}
void
@@ -92,7 +115,7 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
m_data[i] = dblk.m_data[i];
}
mask.performAtomic(m_data);
mask.performAtomic(m_data, m_atomicLog);
}
void
@@ -107,6 +130,28 @@ DataBlock::print(std::ostream& out) const
out << std::dec << "]" << std::flush;
}
int
DataBlock::numAtomicLogEntries() const
{
return m_atomicLog.size();
}
uint8_t*
DataBlock::popAtomicLogEntryFront()
{
assert(m_atomicLog.size() > 0);
auto ret = m_atomicLog.front();
m_atomicLog.pop_front();
return ret;
}
void
DataBlock::clearAtomicLogEntries()
{
for (auto log : m_atomicLog) {
delete [] log;
}
m_atomicLog.clear();
}
const uint8_t*
DataBlock::getData(int offset, int len) const
{
@@ -137,7 +182,18 @@ DataBlock::setData(PacketPtr pkt)
DataBlock &
DataBlock::operator=(const DataBlock & obj)
{
memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Copy entire block contents from obj to current block
memcpy(m_data, obj.m_data, block_bytes);
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < obj.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, obj.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
return *this;
}

View File

@@ -44,6 +44,7 @@
#include <inttypes.h>
#include <cassert>
#include <deque>
#include <iomanip>
#include <iostream>
@@ -71,6 +72,12 @@ class DataBlock
{
if (m_alloc)
delete [] m_data;
// If data block involved in atomic
// operations, free all meta data
for (auto log : m_atomicLog) {
delete [] log;
}
}
DataBlock& operator=(const DataBlock& obj);
@@ -80,6 +87,9 @@ class DataBlock
void clear();
uint8_t getByte(int whichByte) const;
const uint8_t *getData(int offset, int len) const;
uint8_t* popAtomicLogEntryFront();
int numAtomicLogEntries() const;
void clearAtomicLogEntries();
uint8_t *getDataMod(int offset);
void setByte(int whichByte, uint8_t data);
void setData(const uint8_t *data, int offset, int len);
@@ -94,6 +104,9 @@ class DataBlock
void alloc();
uint8_t *m_data;
bool m_alloc;
// Tracks block changes when atomic ops are applied
std::deque<uint8_t*> m_atomicLog;
};
inline void

View File

@@ -55,5 +55,27 @@ WriteMask::print(std::ostream& out) const
<< std::flush;
}
void
WriteMask::performAtomic(uint8_t * p,
std::deque<uint8_t*>& log) const
{
int offset;
uint8_t *block_update;
// Here, operations occur in FIFO order from the mAtomicOp
// vector. This is done to match the ordering of packets
// that was seen when the initial coalesced request was created.
for (int i = 0; i < mAtomicOp.size(); i++) {
// Save the old value of the data block in case a
// return value is needed
block_update = new uint8_t[mSize];
std::memcpy(block_update, p, mSize);
log.push_back(block_update);
// Perform the atomic operation
offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}
} // namespace ruby
} // namespace gem5

View File

@@ -222,26 +222,15 @@ class WriteMask
void print(std::ostream& out) const;
void
performAtomic(uint8_t * p) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}
void
performAtomic(DataBlock & blk) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
uint8_t *p = blk.getDataMod(offset);
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(p);
}
}
/*
* Performs atomic operations on the data block pointed to by p. The
* atomic operations to perform are in the vector mAtomicOp. The
* effect of each atomic operation is pushed to the atomicChangeLog
* so that each individual atomic requestor may see the results of their
* specific atomic operation.
*/
void performAtomic(uint8_t * p,
std::deque<uint8_t*>& atomicChangeLog) const;
const AtomicOpVector&
getAtomicOps() const

View File

@@ -523,6 +523,7 @@ machine(MachineType:TCC, "TCC Cache")
out_msg.isSLCSet := in_msg.isSLCSet;
}
}
cache_entry.DataBlk.clearAtomicLogEntries();
}
action(bar_sendBypassedAtomicResponse, "bar", desc="send bypassed Atomic Ack") {

View File

@@ -77,6 +77,8 @@ structure(DataBlock, external = "yes", desc="..."){
void copyPartial(DataBlock, int, int);
void copyPartial(DataBlock, WriteMask);
void atomicPartial(DataBlock, WriteMask);
int numAtomicLogEntries();
void clearAtomicLogEntries();
}
bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);

View File

@@ -554,25 +554,48 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
success, isRegion);
// update the data
//
// MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
// MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER
std::vector<PacketPtr> pktList = crequest->getPackets();
uint8_t* log = nullptr;
DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
pktList.size(), request_line_address);
uint32_t offset;
int pkt_size;
for (auto& pkt : pktList) {
request_address = pkt->getAddr();
offset = getOffset(pkt->getAddr());
pkt_size = pkt->getSize();
if (pkt->getPtr<uint8_t>()) {
if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_ATOMIC) ||
(type == RubyRequestType_ATOMIC_RETURN) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
(type == RubyRequestType_Load_Linked)) {
pkt->setData(
data.getData(getOffset(request_address), pkt->getSize()));
} else {
data.setData(pkt->getPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
switch(type) {
// Store and AtomicNoReturns follow the same path, as the
// data response is not needed.
case RubyRequestType_ATOMIC_NO_RETURN:
assert(pkt->isAtomicOp());
case RubyRequestType_ST:
data.setData(pkt->getPtr<uint8_t>(), offset, pkt_size);
break;
case RubyRequestType_LD:
pkt->setData(data.getData(offset, pkt_size));
break;
case RubyRequestType_ATOMIC_RETURN:
assert(pkt->isAtomicOp());
// Atomic operations are performed by the WriteMask
// in packet order, set by the crequest. Thus, when
// unpacking the changes from the log, we read from
// the front of the log to correctly map response
// data into the packets.
// Log entry contains the old value before the current
// atomic operation occurred.
log = data.popAtomicLogEntryFront();
pkt->setData(&log[offset]);
delete [] log;
log = nullptr;
break;
default:
panic("Unsupported ruby packet type:%s\n",
RubyRequestType_to_string(type));
break;
}
} else {
DPRINTF(MemoryAccess,
@@ -581,6 +604,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
RubyRequestType_to_string(type));
}
}
assert(data.numAtomicLogEntries() == 0);
m_outstanding_count--;
assert(m_outstanding_count >= 0);