mem: Atomic ops to same address

Augmenting the DataBlock class with a change log structure to
record the effects of atomic operations on a data block and
service these changes if the atomic operations require return
values.

Although the operations are atomic, the coalescer need not
send unique memory requests for each operation. Atomic
operations within a wavefront to the same address are now
coalesced into a single memory request. The response of this
request carries all the necessary information to provide the
requesting lanes unique values as a result of their individual
atomic operations. This helps reduce contention for request
and response queues in simulation.

Previously, only the final value of the datablock after all
atomic ops to the same address was visible to the requesting
waves. This change corrects this behavior by allowing each wave
to see the effect of this individual atomic op is a return value
is necessary.

Change-Id: I639bea943afd317e45f8fa3bff7689f6b8df9395
This commit is contained in:
Ranganath (Bujji) Selagamsetty
2023-08-18 12:08:01 -05:00
committed by Bujji
parent c218104f52
commit f6a453362f
7 changed files with 146 additions and 39 deletions

View File

@@ -51,9 +51,19 @@ namespace ruby
DataBlock::DataBlock(const DataBlock &cp)
{
m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
m_data = new uint8_t[block_bytes];
memcpy(m_data, cp.m_data, block_bytes);
m_alloc = true;
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, cp.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
}
void
@@ -73,7 +83,20 @@ DataBlock::clear()
bool
DataBlock::equal(const DataBlock& obj) const
{
return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Check that the block contents match
if (memcmp(m_data, obj.m_data, block_bytes)) {
return false;
}
if (m_atomicLog.size() != obj.m_atomicLog.size()) {
return false;
}
for (size_t i = 0; i < m_atomicLog.size(); i++) {
if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) {
return false;
}
}
return true;
}
void
@@ -92,7 +115,7 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
m_data[i] = dblk.m_data[i];
}
mask.performAtomic(m_data);
mask.performAtomic(m_data, m_atomicLog);
}
void
@@ -107,6 +130,28 @@ DataBlock::print(std::ostream& out) const
out << std::dec << "]" << std::flush;
}
int
DataBlock::numAtomicLogEntries() const
{
return m_atomicLog.size();
}
uint8_t*
DataBlock::popAtomicLogEntryFront()
{
assert(m_atomicLog.size() > 0);
auto ret = m_atomicLog.front();
m_atomicLog.pop_front();
return ret;
}
void
DataBlock::clearAtomicLogEntries()
{
for (auto log : m_atomicLog) {
delete [] log;
}
m_atomicLog.clear();
}
const uint8_t*
DataBlock::getData(int offset, int len) const
{
@@ -137,7 +182,18 @@ DataBlock::setData(PacketPtr pkt)
DataBlock &
DataBlock::operator=(const DataBlock & obj)
{
memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
uint8_t *block_update;
size_t block_bytes = RubySystem::getBlockSizeBytes();
// Copy entire block contents from obj to current block
memcpy(m_data, obj.m_data, block_bytes);
// If this data block is involved in an atomic operation, the effect
// of applying the atomic operations on the data block are recorded in
// m_atomicLog. If so, we must copy over every entry in the change log
for (size_t i = 0; i < obj.m_atomicLog.size(); i++) {
block_update = new uint8_t[block_bytes];
memcpy(block_update, obj.m_atomicLog[i], block_bytes);
m_atomicLog.push_back(block_update);
}
return *this;
}

View File

@@ -44,6 +44,7 @@
#include <inttypes.h>
#include <cassert>
#include <deque>
#include <iomanip>
#include <iostream>
@@ -71,6 +72,12 @@ class DataBlock
{
if (m_alloc)
delete [] m_data;
// If data block involved in atomic
// operations, free all meta data
for (auto log : m_atomicLog) {
delete [] log;
}
}
DataBlock& operator=(const DataBlock& obj);
@@ -80,6 +87,9 @@ class DataBlock
void clear();
uint8_t getByte(int whichByte) const;
const uint8_t *getData(int offset, int len) const;
uint8_t* popAtomicLogEntryFront();
int numAtomicLogEntries() const;
void clearAtomicLogEntries();
uint8_t *getDataMod(int offset);
void setByte(int whichByte, uint8_t data);
void setData(const uint8_t *data, int offset, int len);
@@ -94,6 +104,9 @@ class DataBlock
void alloc();
uint8_t *m_data;
bool m_alloc;
// Tracks block changes when atomic ops are applied
std::deque<uint8_t*> m_atomicLog;
};
inline void

View File

@@ -55,5 +55,27 @@ WriteMask::print(std::ostream& out) const
<< std::flush;
}
void
WriteMask::performAtomic(uint8_t * p,
std::deque<uint8_t*>& log) const
{
int offset;
uint8_t *block_update;
// Here, operations occur in FIFO order from the mAtomicOp
// vector. This is done to match the ordering of packets
// that was seen when the initial coalesced request was created.
for (int i = 0; i < mAtomicOp.size(); i++) {
// Save the old value of the data block in case a
// return value is needed
block_update = new uint8_t[mSize];
std::memcpy(block_update, p, mSize);
log.push_back(block_update);
// Perform the atomic operation
offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}
} // namespace ruby
} // namespace gem5

View File

@@ -222,26 +222,15 @@ class WriteMask
void print(std::ostream& out) const;
void
performAtomic(uint8_t * p) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(&p[offset]);
}
}
void
performAtomic(DataBlock & blk) const
{
for (int i = 0; i < mAtomicOp.size(); i++) {
int offset = mAtomicOp[i].first;
uint8_t *p = blk.getDataMod(offset);
AtomicOpFunctor *fnctr = mAtomicOp[i].second;
(*fnctr)(p);
}
}
/*
* Performs atomic operations on the data block pointed to by p. The
* atomic operations to perform are in the vector mAtomicOp. The
* effect of each atomic operation is pushed to the atomicChangeLog
* so that each individual atomic requestor may see the results of their
* specific atomic operation.
*/
void performAtomic(uint8_t * p,
std::deque<uint8_t*>& atomicChangeLog) const;
const AtomicOpVector&
getAtomicOps() const

View File

@@ -523,6 +523,7 @@ machine(MachineType:TCC, "TCC Cache")
out_msg.isSLCSet := in_msg.isSLCSet;
}
}
cache_entry.DataBlk.clearAtomicLogEntries();
}
action(bar_sendBypassedAtomicResponse, "bar", desc="send bypassed Atomic Ack") {

View File

@@ -77,6 +77,8 @@ structure(DataBlock, external = "yes", desc="..."){
void copyPartial(DataBlock, int, int);
void copyPartial(DataBlock, WriteMask);
void atomicPartial(DataBlock, WriteMask);
int numAtomicLogEntries();
void clearAtomicLogEntries();
}
bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);

View File

@@ -554,25 +554,48 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
success, isRegion);
// update the data
//
// MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
// MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER
std::vector<PacketPtr> pktList = crequest->getPackets();
uint8_t* log = nullptr;
DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
pktList.size(), request_line_address);
uint32_t offset;
int pkt_size;
for (auto& pkt : pktList) {
request_address = pkt->getAddr();
offset = getOffset(pkt->getAddr());
pkt_size = pkt->getSize();
if (pkt->getPtr<uint8_t>()) {
if ((type == RubyRequestType_LD) ||
(type == RubyRequestType_ATOMIC) ||
(type == RubyRequestType_ATOMIC_RETURN) ||
(type == RubyRequestType_IFETCH) ||
(type == RubyRequestType_RMW_Read) ||
(type == RubyRequestType_Locked_RMW_Read) ||
(type == RubyRequestType_Load_Linked)) {
pkt->setData(
data.getData(getOffset(request_address), pkt->getSize()));
} else {
data.setData(pkt->getPtr<uint8_t>(),
getOffset(request_address), pkt->getSize());
switch(type) {
// Store and AtomicNoReturns follow the same path, as the
// data response is not needed.
case RubyRequestType_ATOMIC_NO_RETURN:
assert(pkt->isAtomicOp());
case RubyRequestType_ST:
data.setData(pkt->getPtr<uint8_t>(), offset, pkt_size);
break;
case RubyRequestType_LD:
pkt->setData(data.getData(offset, pkt_size));
break;
case RubyRequestType_ATOMIC_RETURN:
assert(pkt->isAtomicOp());
// Atomic operations are performed by the WriteMask
// in packet order, set by the crequest. Thus, when
// unpacking the changes from the log, we read from
// the front of the log to correctly map response
// data into the packets.
// Log entry contains the old value before the current
// atomic operation occurred.
log = data.popAtomicLogEntryFront();
pkt->setData(&log[offset]);
delete [] log;
log = nullptr;
break;
default:
panic("Unsupported ruby packet type:%s\n",
RubyRequestType_to_string(type));
break;
}
} else {
DPRINTF(MemoryAccess,
@@ -581,6 +604,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
RubyRequestType_to_string(type));
}
}
assert(data.numAtomicLogEntries() == 0);
m_outstanding_count--;
assert(m_outstanding_count >= 0);