mem-ruby: Update cache recorder to use GPUCoalescer port for GPUs

Previously, the cache recorder used the Sequencer to issue flush requests and cache warmup requests. The GPU however uses GPUCoalescer to access the cache, and not the Sequencer. This commit adds a GPUCoalescer map to the cache recorder and uses it to send flushes and cache warmup requests to any GPU caches in the system Change-Id: I10490cf5e561c8559a98d4eb0550c62eefe769c9
2023-09-29 18:27:46 -05:00
parent 085789d00c
commit ae5a51994c
3 changed files with 49 additions and 5 deletions
--- a/src/mem/ruby/system/CacheRecorder.cc
+++ b/src/mem/ruby/system/CacheRecorder.cc
@@ -30,8 +30,11 @@
 #include "mem/ruby/system/CacheRecorder.hh"

 #include "debug/RubyCacheTrace.hh"
+#include "mem/packet.hh"
+#include "mem/ruby/system/GPUCoalescer.hh"
 #include "mem/ruby/system/RubySystem.hh"
 #include "mem/ruby/system/Sequencer.hh"
+#include "sim/sim_exit.hh"

 namespace gem5
 {
@@ -57,11 +60,13 @@ CacheRecorder::CacheRecorder()
 CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
                             uint64_t uncompressed_trace_size,
                             std::vector<Sequencer*>& seq_map,
+                             std::vector<GPUCoalescer*>& coal_map,
                             uint64_t block_size_bytes)
    : m_uncompressed_trace(uncompressed_trace),
      m_uncompressed_trace_size(uncompressed_trace_size),
-      m_seq_map(seq_map),  m_bytes_read(0), m_records_read(0),
-      m_records_flushed(0), m_block_size_bytes(block_size_bytes)
+      m_seq_map(seq_map), m_coalescer_map(coal_map), m_bytes_read(0),
+      m_records_read(0), m_records_flushed(0),
+      m_block_size_bytes(block_size_bytes)
 {
    if (m_uncompressed_trace != NULL) {
        if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) {
@@ -81,6 +86,7 @@ CacheRecorder::~CacheRecorder()
        m_uncompressed_trace = NULL;
    }
    m_seq_map.clear();
+    m_coalescer_map.clear();
 }

 void
@@ -96,11 +102,21 @@ CacheRecorder::enqueueNextFlushRequest()
        Packet *pkt = new Packet(req, requestType);

        Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
+        GPUCoalescer* m_coal_ptr = m_coalescer_map[rec->m_cntrl_id];
        assert(m_sequencer_ptr != NULL);
-        m_sequencer_ptr->makeRequest(pkt);
+        if (m_coal_ptr == NULL)
+            m_sequencer_ptr->makeRequest(pkt);
+        else {
+            pkt->req->setReqInstSeqNum(m_records_flushed - 1);
+            m_coal_ptr->makeRequest(pkt);
+        }

        DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
+
    } else {
+        if (m_records_flushed > 0) {
+            exitSimLoop("Finished Drain", 0);
+        }
        DPRINTF(RubyCacheTrace, "Flushed all %d records\n", m_records_flushed);
    }
 }
@@ -143,13 +159,21 @@ CacheRecorder::enqueueNextFetchRequest()
            pkt->dataStatic(traceRecord->m_data + rec_bytes_read);

            Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
+            GPUCoalescer* m_coal_ptr;
+            m_coal_ptr = m_coalescer_map[traceRecord->m_cntrl_id];
            assert(m_sequencer_ptr != NULL);
-            m_sequencer_ptr->makeRequest(pkt);
+            if (m_coal_ptr == NULL)
+                m_sequencer_ptr->makeRequest(pkt);
+            else {
+                pkt->req->setReqInstSeqNum(m_records_read);
+                m_coal_ptr->makeRequest(pkt);
+            }
        }

        m_bytes_read += (sizeof(TraceRecord) + m_block_size_bytes);
        m_records_read++;
    } else {
+        exitSimLoop("Finished Warmup", 0);
        DPRINTF(RubyCacheTrace, "Fetched all %d records\n", m_records_read);
    }
 }
@@ -168,6 +192,8 @@ CacheRecorder::addRecord(int cntrl, Addr data_addr, Addr pc_addr,
    memcpy(rec->m_data, data.getData(0, m_block_size_bytes),
           m_block_size_bytes);

+    DPRINTF(RubyCacheTrace, "Inside addRecord with cntrl id %d and type %d\n",
+            cntrl, type);
    m_records.push_back(rec);
 }

--- a/src/mem/ruby/system/CacheRecorder.hh
+++ b/src/mem/ruby/system/CacheRecorder.hh
@@ -50,6 +50,7 @@ namespace ruby
 {

 class Sequencer;
+class GPUCoalescer;

 /*!
 * Class for recording cache contents. Note that the last element of the
@@ -79,6 +80,7 @@ class CacheRecorder
    CacheRecorder(uint8_t* uncompressed_trace,
                  uint64_t uncompressed_trace_size,
                  std::vector<Sequencer*>& SequencerMap,
+                  std::vector<GPUCoalescer*>& CoalescerMap,
                  uint64_t block_size_bytes);
    void addRecord(int cntrl, Addr data_addr, Addr pc_addr,
                   RubyRequestType type, Tick time, DataBlock& data);
@@ -115,6 +117,7 @@ class CacheRecorder
    uint8_t* m_uncompressed_trace;
    uint64_t m_uncompressed_trace_size;
    std::vector<Sequencer*> m_seq_map;
+    std::vector<GPUCoalescer*> m_coalescer_map;
    uint64_t m_bytes_read;
    uint64_t m_records_read;
    uint64_t m_records_flushed;
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -178,13 +178,22 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
                              uint64_t block_size_bytes)
 {
    std::vector<Sequencer*> sequencer_map;
+    std::vector<GPUCoalescer*> coalescer_map;
    Sequencer* sequencer_ptr = NULL;
+    GPUCoalescer* coalescer_ptr = NULL;

    for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
        sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer());
+        coalescer_map.push_back(m_abs_cntrl_vec[cntrl]->getGPUCoalescer());
+
        if (sequencer_ptr == NULL) {
            sequencer_ptr = sequencer_map[cntrl];
        }
+
+        if (coalescer_ptr == NULL) {
+            coalescer_ptr = coalescer_map[cntrl];
+        }
+
    }

    assert(sequencer_ptr != NULL);
@@ -193,6 +202,11 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
        if (sequencer_map[cntrl] == NULL) {
            sequencer_map[cntrl] = sequencer_ptr;
        }
+
+        if (coalescer_map[cntrl] == NULL) {
+            coalescer_map[cntrl] = coalescer_ptr;
+        }
+
    }

    // Remove the old CacheRecorder if it's still hanging about.
@@ -202,7 +216,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,

    // Create the CacheRecorder and record the cache trace
    m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
-                                         sequencer_map, block_size_bytes);
+                                         sequencer_map, coalescer_map,
+                                         block_size_bytes);
 }

 void