mem-ruby: Add cache cooldown and warmup support to GPUCoalescer

The GPU Coalescer does not contain cache cooldown and warmup support.
This commit updates the coalsecer to support cache cooldown during flush
and warmup during checkpoint restore.

Change-Id: I5459471dec20ff304fd5954af1079a7486ee860a
This commit is contained in:
Vishnu Ramadas
2023-09-29 16:37:41 -05:00
parent a50ead5907
commit 61e39d5b26
2 changed files with 75 additions and 22 deletions

View File

@@ -73,6 +73,14 @@ UncoalescedTable::insertPacket(PacketPtr pkt)
pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size());
}
void
UncoalescedTable::insertReqType(PacketPtr pkt, RubyRequestType type)
{
uint64_t seqNum = pkt->req->getReqInstSeqNum();
reqTypeMap[seqNum] = type;
}
bool
UncoalescedTable::packetAvailable()
{
@@ -128,9 +136,21 @@ UncoalescedTable::updateResources()
instMap.erase(iter++);
instPktsRemaining.erase(seq_num);
// Release the token
DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num);
coalescer->getGMTokenPort().sendTokens(1);
// Release the token if the Ruby system is not in cooldown
// or warmup phases. When in these phases, the RubyPorts
// are accessed directly using the makeRequest() command
// instead of accessing through the port. This makes
// sending tokens through the port unnecessary
if (!RubySystem::getWarmupEnabled()
&& !RubySystem::getCooldownEnabled()) {
if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) {
DPRINTF(GPUCoalescer,
"Returning token seqNum %d\n", seq_num);
coalescer->getGMTokenPort().sendTokens(1);
}
}
reqTypeMap.erase(seq_num);
} else {
++iter;
}
@@ -565,6 +585,14 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
for (auto& pkt : pktList) {
offset = getOffset(pkt->getAddr());
pkt_size = pkt->getSize();
request_address = pkt->getAddr();
// When the Ruby system is cooldown phase, the requests come from
// the cache recorder. These requests do not get coalesced and
// do not return valid data.
if (RubySystem::getCooldownEnabled())
continue;
if (pkt->getPtr<uint8_t>()) {
switch(type) {
// Store and AtomicNoReturns follow the same path, as the
@@ -627,7 +655,6 @@ GPUCoalescer::getRequestType(PacketPtr pkt)
assert(!pkt->req->isLLSC());
assert(!pkt->req->isLockedRMW());
assert(!pkt->req->isInstFetch());
assert(!pkt->isFlush());
if (pkt->req->isAtomicReturn()) {
req_type = RubyRequestType_ATOMIC_RETURN;
@@ -637,6 +664,8 @@ GPUCoalescer::getRequestType(PacketPtr pkt)
req_type = RubyRequestType_LD;
} else if (pkt->isWrite()) {
req_type = RubyRequestType_ST;
} else if (pkt->isFlush()) {
req_type = RubyRequestType_FLUSH;
} else {
panic("Unsupported ruby packet type\n");
}
@@ -658,7 +687,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
issueMemSyncRequest(pkt);
} else {
// otherwise, this must be either read or write command
assert(pkt->isRead() || pkt->isWrite());
assert(pkt->isRead() || pkt->isWrite() || pkt->isFlush());
InstSeqNum seq_num = pkt->req->getReqInstSeqNum();
@@ -667,10 +696,17 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
// number of lanes actives for that vmem request (i.e., the popcnt
// of the exec_mask.
int num_packets = 1;
if (!m_usingRubyTester) {
num_packets = 0;
for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
num_packets += getDynInst(pkt)->getLaneStatus(i);
// When Ruby is in warmup or cooldown phase, the requests come from
// the cache recorder. There is no dynamic instruction associated
// with these requests either
if (!RubySystem::getWarmupEnabled()
&& !RubySystem::getCooldownEnabled()) {
if (!m_usingRubyTester) {
num_packets = 0;
for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
num_packets += getDynInst(pkt)->getLaneStatus(i);
}
}
}
@@ -679,6 +715,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
// future cycle. Packets remaining is set to the number of excepted
// requests from the instruction based on its exec_mask.
uncoalescedTable.insertPacket(pkt);
uncoalescedTable.insertReqType(pkt, getRequestType(pkt));
uncoalescedTable.initPacketsRemaining(seq_num, num_packets);
DPRINTF(GPUCoalescer, "Put pkt with addr 0x%X to uncoalescedTable\n",
pkt->getAddr());
@@ -945,21 +982,27 @@ void
GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
{
for (auto& pkt : mylist) {
RubyPort::SenderState *ss =
safe_cast<RubyPort::SenderState *>(pkt->senderState);
MemResponsePort *port = ss->port;
assert(port != NULL);
// When Ruby is in warmup or cooldown phase, the requests come
// from the cache recorder. They do not track which port to use
// and do not need to send the response back
if (!RubySystem::getWarmupEnabled()
&& !RubySystem::getCooldownEnabled()) {
RubyPort::SenderState *ss =
safe_cast<RubyPort::SenderState *>(pkt->senderState);
MemResponsePort *port = ss->port;
assert(port != NULL);
pkt->senderState = ss->predecessor;
pkt->senderState = ss->predecessor;
if (pkt->cmd != MemCmd::WriteReq) {
// for WriteReq, we keep the original senderState until
// writeCompleteCallback
delete ss;
if (pkt->cmd != MemCmd::WriteReq) {
// for WriteReq, we keep the original senderState until
// writeCompleteCallback
delete ss;
}
port->hitCallback(pkt);
trySendRetries();
}
port->hitCallback(pkt);
trySendRetries();
}
// We schedule an event in the same tick as hitCallback (similar to
@@ -971,7 +1014,14 @@ GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
schedule(issueEvent, curTick());
}
testDrainComplete();
RubySystem *rs = m_ruby_system;
if (RubySystem::getWarmupEnabled()) {
rs->m_cache_recorder->enqueueNextFetchRequest();
} else if (RubySystem::getCooldownEnabled()) {
rs->m_cache_recorder->enqueueNextFlushRequest();
} else {
testDrainComplete();
}
}
void

View File

@@ -71,6 +71,7 @@ class UncoalescedTable
~UncoalescedTable() {}
void insertPacket(PacketPtr pkt);
void insertReqType(PacketPtr pkt, RubyRequestType type);
bool packetAvailable();
void printRequestTable(std::stringstream& ss);
@@ -101,6 +102,8 @@ class UncoalescedTable
std::map<InstSeqNum, PerInstPackets> instMap;
std::map<InstSeqNum, int> instPktsRemaining;
std::map<InstSeqNum, RubyRequestType> reqTypeMap;
};
class CoalescedRequest