mem-ruby: Update cache recorder to use GPUCoalescer port for GPUs

Previously, the cache recorder used the Sequencer to issue flush
requests and cache warmup requests. The GPU however uses GPUCoalescer to
access the cache, and not the Sequencer. This commit adds a GPUCoalescer
map to the cache recorder and uses it to send flushes and cache warmup
requests to any GPU caches in the system

Change-Id: I10490cf5e561c8559a98d4eb0550c62eefe769c9
This commit is contained in:
Vishnu Ramadas
2023-09-29 18:27:46 -05:00
parent 085789d00c
commit ae5a51994c
3 changed files with 49 additions and 5 deletions

View File

@@ -30,8 +30,11 @@
#include "mem/ruby/system/CacheRecorder.hh"
#include "debug/RubyCacheTrace.hh"
#include "mem/packet.hh"
#include "mem/ruby/system/GPUCoalescer.hh"
#include "mem/ruby/system/RubySystem.hh"
#include "mem/ruby/system/Sequencer.hh"
#include "sim/sim_exit.hh"
namespace gem5
{
@@ -57,11 +60,13 @@ CacheRecorder::CacheRecorder()
CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
uint64_t uncompressed_trace_size,
std::vector<Sequencer*>& seq_map,
std::vector<GPUCoalescer*>& coal_map,
uint64_t block_size_bytes)
: m_uncompressed_trace(uncompressed_trace),
m_uncompressed_trace_size(uncompressed_trace_size),
m_seq_map(seq_map), m_bytes_read(0), m_records_read(0),
m_records_flushed(0), m_block_size_bytes(block_size_bytes)
m_seq_map(seq_map), m_coalescer_map(coal_map), m_bytes_read(0),
m_records_read(0), m_records_flushed(0),
m_block_size_bytes(block_size_bytes)
{
if (m_uncompressed_trace != NULL) {
if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) {
@@ -81,6 +86,7 @@ CacheRecorder::~CacheRecorder()
m_uncompressed_trace = NULL;
}
m_seq_map.clear();
m_coalescer_map.clear();
}
void
@@ -96,11 +102,21 @@ CacheRecorder::enqueueNextFlushRequest()
Packet *pkt = new Packet(req, requestType);
Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
GPUCoalescer* m_coal_ptr = m_coalescer_map[rec->m_cntrl_id];
assert(m_sequencer_ptr != NULL);
m_sequencer_ptr->makeRequest(pkt);
if (m_coal_ptr == NULL)
m_sequencer_ptr->makeRequest(pkt);
else {
pkt->req->setReqInstSeqNum(m_records_flushed - 1);
m_coal_ptr->makeRequest(pkt);
}
DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
} else {
if (m_records_flushed > 0) {
exitSimLoop("Finished Drain", 0);
}
DPRINTF(RubyCacheTrace, "Flushed all %d records\n", m_records_flushed);
}
}
@@ -143,13 +159,21 @@ CacheRecorder::enqueueNextFetchRequest()
pkt->dataStatic(traceRecord->m_data + rec_bytes_read);
Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
GPUCoalescer* m_coal_ptr;
m_coal_ptr = m_coalescer_map[traceRecord->m_cntrl_id];
assert(m_sequencer_ptr != NULL);
m_sequencer_ptr->makeRequest(pkt);
if (m_coal_ptr == NULL)
m_sequencer_ptr->makeRequest(pkt);
else {
pkt->req->setReqInstSeqNum(m_records_read);
m_coal_ptr->makeRequest(pkt);
}
}
m_bytes_read += (sizeof(TraceRecord) + m_block_size_bytes);
m_records_read++;
} else {
exitSimLoop("Finished Warmup", 0);
DPRINTF(RubyCacheTrace, "Fetched all %d records\n", m_records_read);
}
}
@@ -168,6 +192,8 @@ CacheRecorder::addRecord(int cntrl, Addr data_addr, Addr pc_addr,
memcpy(rec->m_data, data.getData(0, m_block_size_bytes),
m_block_size_bytes);
DPRINTF(RubyCacheTrace, "Inside addRecord with cntrl id %d and type %d\n",
cntrl, type);
m_records.push_back(rec);
}

View File

@@ -50,6 +50,7 @@ namespace ruby
{
class Sequencer;
class GPUCoalescer;
/*!
* Class for recording cache contents. Note that the last element of the
@@ -79,6 +80,7 @@ class CacheRecorder
CacheRecorder(uint8_t* uncompressed_trace,
uint64_t uncompressed_trace_size,
std::vector<Sequencer*>& SequencerMap,
std::vector<GPUCoalescer*>& CoalescerMap,
uint64_t block_size_bytes);
void addRecord(int cntrl, Addr data_addr, Addr pc_addr,
RubyRequestType type, Tick time, DataBlock& data);
@@ -115,6 +117,7 @@ class CacheRecorder
uint8_t* m_uncompressed_trace;
uint64_t m_uncompressed_trace_size;
std::vector<Sequencer*> m_seq_map;
std::vector<GPUCoalescer*> m_coalescer_map;
uint64_t m_bytes_read;
uint64_t m_records_read;
uint64_t m_records_flushed;

View File

@@ -178,13 +178,22 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
uint64_t block_size_bytes)
{
std::vector<Sequencer*> sequencer_map;
std::vector<GPUCoalescer*> coalescer_map;
Sequencer* sequencer_ptr = NULL;
GPUCoalescer* coalescer_ptr = NULL;
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
coalescer_map.push_back(m_abs_cntrl_vec[cntrl]->getGPUCoalescer());
if (sequencer_ptr == NULL) {
sequencer_ptr = sequencer_map[cntrl];
}
if (coalescer_ptr == NULL) {
coalescer_ptr = coalescer_map[cntrl];
}
}
assert(sequencer_ptr != NULL);
@@ -193,6 +202,11 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
if (sequencer_map[cntrl] == NULL) {
sequencer_map[cntrl] = sequencer_ptr;
}
if (coalescer_map[cntrl] == NULL) {
coalescer_map[cntrl] = coalescer_ptr;
}
}
// Remove the old CacheRecorder if it's still hanging about.
@@ -202,7 +216,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
// Create the CacheRecorder and record the cache trace
m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
sequencer_map, block_size_bytes);
sequencer_map, coalescer_map,
block_size_bytes);
}
void