mem-ruby: Add SQC cache invalidation support to GPU VIPER

This commit adds support for cache invalidation in GPU VIPER protocol's
SQC cache. To support this, the commit also adds L1 cache invalidation
framework in the Sequencer such that the Sequencer sends out an
invalidation request for each line in the cache and declares completion
once all lines are evicted.

Change-Id: I2f52eacabb2412b16f467f994e985c378230f841
This commit is contained in:
Vishnu Ramadas
2024-01-25 13:24:57 -06:00
parent fd3aac1518
commit 23dc98ea72
4 changed files with 124 additions and 2 deletions

View File

@@ -60,6 +60,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
// Mem sys initiated
Repl, desc="Replacing block from cache";
Data, desc="Received Data";
Evict, desc="Evict cache line";
}
enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
@@ -67,6 +68,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
DataArrayWrite, desc="Write the data array";
TagArrayRead, desc="Read the data array";
TagArrayWrite, desc="Write the data array";
TagArrayFlash, desc="Flash clear the data array";
}
@@ -242,7 +244,12 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") {
Entry cache_entry := getCacheEntry(in_msg.LineAddress);
TBE tbe := TBEs.lookup(in_msg.LineAddress);
trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
DPRINTF(RubySlicc, "%s\n", in_msg);
if (in_msg.Type == RubyRequestType:REPLACEMENT) {
trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe);
} else {
trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe);
}
}
}
}
@@ -313,6 +320,11 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
APPEND_TRANSITION_COMMENT(cache_entry.DataBlk);
}
action(inv_invDone, "inv", desc="local inv done") {
sequencer.invL1Callback();
}
action(w_writeCache, "w", desc="write data to cache") {
peek(responseToSQC_in, ResponseMsg) {
assert(is_valid(cache_entry));
@@ -350,6 +362,13 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
ic_invCache;
}
transition({I, IV, V}, Evict, I) {TagArrayRead, TagArrayWrite} {
// since we're evicting something, don't bother classifying as hit/miss
ic_invCache;
inv_invDone;
p_popMandatoryQueue;
}
// if we got a response for a load where the line is in I, then
// another request must have come in that replaced the line in question in
// the cache. Thus, complete this request without allocating the line, but

View File

@@ -157,6 +157,9 @@ structure (Sequencer, external = "yes") {
void llscClearLocalMonitor();
void evictionCallback(Addr);
void invL1Callback();
void recordRequestType(SequencerRequestType);
bool checkResourceAvailable(CacheResourceType, Addr);
}

View File

@@ -85,6 +85,8 @@ Sequencer::Sequencer(const Params &p)
m_runningGarnetStandalone = p.garnet_standalone;
m_num_pending_invs = 0;
m_cache_inv_pkt = nullptr;
// These statistical variables are not for display.
// The profiler will collate these across different
@@ -348,6 +350,10 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
return RequestStatus_Ready;
}
if (pkt->cmd == MemCmd::MemSyncReq) {
return RequestStatus_Aliased;
}
Addr line_addr = makeLineAddress(pkt->getAddr());
// Check if there is any outstanding request for the same cache line.
auto &seq_req_list = m_RequestTable[line_addr];
@@ -576,7 +582,8 @@ Sequencer::readCallback(Addr address, DataBlock& data,
}
if ((seq_req.m_type != RubyRequestType_LD) &&
(seq_req.m_type != RubyRequestType_Load_Linked) &&
(seq_req.m_type != RubyRequestType_IFETCH)) {
(seq_req.m_type != RubyRequestType_IFETCH) &&
(seq_req.m_type != RubyRequestType_REPLACEMENT)) {
// Write request: reissue request to the cache hierarchy
issueRequest(seq_req.pkt, seq_req.m_second_type);
break;
@@ -811,6 +818,86 @@ Sequencer::unaddressedCallback(Addr unaddressedReqId,
}
}
void
Sequencer::completeHitCallback(std::vector<PacketPtr> & mylist)
{
for (auto& pkt : mylist) {
// When Ruby is in warmup or cooldown phase, the requests come
// from the cache recorder. They do not track which port to use
// and do not need to send the response back
if (!RubySystem::getWarmupEnabled()
&& !RubySystem::getCooldownEnabled()) {
RubyPort::SenderState *ss =
safe_cast<RubyPort::SenderState *>(pkt->senderState);
MemResponsePort *port = ss->port;
assert(port != NULL);
pkt->senderState = ss->predecessor;
if (pkt->cmd != MemCmd::WriteReq) {
// for WriteReq, we keep the original senderState until
// writeCompleteCallback
delete ss;
}
port->hitCallback(pkt);
trySendRetries();
}
}
RubySystem *rs = m_ruby_system;
if (RubySystem::getWarmupEnabled()) {
rs->m_cache_recorder->enqueueNextFetchRequest();
} else if (RubySystem::getCooldownEnabled()) {
rs->m_cache_recorder->enqueueNextFlushRequest();
} else {
testDrainComplete();
}
}
void
Sequencer::invL1Callback()
{
// Since L1 invalidate is currently done with paddr = 0
assert(m_cache_inv_pkt && m_num_pending_invs > 0);
m_num_pending_invs--;
if (m_num_pending_invs == 0) {
std::vector<PacketPtr> pkt_list { m_cache_inv_pkt };
m_cache_inv_pkt = nullptr;
completeHitCallback(pkt_list);
}
}
void
Sequencer::invL1()
{
int size = m_dataCache_ptr->getNumBlocks();
DPRINTF(RubySequencer,
"There are %d Invalidations outstanding before Cache Walk\n",
m_num_pending_invs);
// Walk the cache
for (int i = 0; i < size; i++) {
Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
// Evict Read-only data
RubyRequestType request_type = RubyRequestType_REPLACEMENT;
std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
clockEdge(), addr, 0, 0,
request_type, RubyAccessMode_Supervisor,
nullptr);
DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr);
assert(m_mandatory_q_ptr != NULL);
Tick latency = cyclesToTicks(
m_controller->mandatoryQueueLatency(request_type));
m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
m_num_pending_invs++;
}
DPRINTF(RubySequencer,
"There are %d Invalidations outstanding after Cache Walk\n",
m_num_pending_invs);
}
bool
Sequencer::empty() const
{
@@ -915,6 +1002,11 @@ Sequencer::makeRequest(PacketPtr pkt)
}
} else if (pkt->isFlush()) {
primary_type = secondary_type = RubyRequestType_FLUSH;
} else if (pkt->cmd == MemCmd::MemSyncReq) {
primary_type = secondary_type = RubyRequestType_REPLACEMENT;
assert(!m_cache_inv_pkt);
m_cache_inv_pkt = pkt;
invL1();
} else {
panic("Unsupported ruby packet type\n");
}

View File

@@ -141,6 +141,10 @@ class Sequencer : public RubyPort
const Cycles forwardRequestTime = Cycles(0),
const Cycles firstResponseTime = Cycles(0));
void completeHitCallback(std::vector<PacketPtr>& list);
void invL1Callback();
void invL1();
RequestStatus makeRequest(PacketPtr pkt) override;
virtual bool empty() const;
int outstandingCount() const override { return m_outstanding_count; }
@@ -243,6 +247,10 @@ class Sequencer : public RubyPort
private:
int m_max_outstanding_requests;
int m_num_pending_invs;
PacketPtr m_cache_inv_pkt;
CacheMemory* m_dataCache_ptr;
// The cache access latency for top-level caches (L0/L1). These are