From ae5a51994c112d04bfe0eb84189ca9ec5a46102e Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Fri, 29 Sep 2023 18:27:46 -0500 Subject: [PATCH] mem-ruby: Update cache recorder to use GPUCoalescer port for GPUs Previously, the cache recorder used the Sequencer to issue flush requests and cache warmup requests. The GPU however uses GPUCoalescer to access the cache, and not the Sequencer. This commit adds a GPUCoalescer map to the cache recorder and uses it to send flushes and cache warmup requests to any GPU caches in the system Change-Id: I10490cf5e561c8559a98d4eb0550c62eefe769c9 --- src/mem/ruby/system/CacheRecorder.cc | 34 ++++++++++++++++++++++++---- src/mem/ruby/system/CacheRecorder.hh | 3 +++ src/mem/ruby/system/RubySystem.cc | 17 +++++++++++++- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc index 20a8a30ebc..ec552c07c5 100644 --- a/src/mem/ruby/system/CacheRecorder.cc +++ b/src/mem/ruby/system/CacheRecorder.cc @@ -30,8 +30,11 @@ #include "mem/ruby/system/CacheRecorder.hh" #include "debug/RubyCacheTrace.hh" +#include "mem/packet.hh" +#include "mem/ruby/system/GPUCoalescer.hh" #include "mem/ruby/system/RubySystem.hh" #include "mem/ruby/system/Sequencer.hh" +#include "sim/sim_exit.hh" namespace gem5 { @@ -57,11 +60,13 @@ CacheRecorder::CacheRecorder() CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& seq_map, + std::vector& coal_map, uint64_t block_size_bytes) : m_uncompressed_trace(uncompressed_trace), m_uncompressed_trace_size(uncompressed_trace_size), - m_seq_map(seq_map), m_bytes_read(0), m_records_read(0), - m_records_flushed(0), m_block_size_bytes(block_size_bytes) + m_seq_map(seq_map), m_coalescer_map(coal_map), m_bytes_read(0), + m_records_read(0), m_records_flushed(0), + m_block_size_bytes(block_size_bytes) { if (m_uncompressed_trace != NULL) { if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) { @@ -81,6 +86,7 @@ CacheRecorder::~CacheRecorder() m_uncompressed_trace = NULL; } m_seq_map.clear(); + m_coalescer_map.clear(); } void @@ -96,11 +102,21 @@ CacheRecorder::enqueueNextFlushRequest() Packet *pkt = new Packet(req, requestType); Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id]; + GPUCoalescer* m_coal_ptr = m_coalescer_map[rec->m_cntrl_id]; assert(m_sequencer_ptr != NULL); - m_sequencer_ptr->makeRequest(pkt); + if (m_coal_ptr == NULL) + m_sequencer_ptr->makeRequest(pkt); + else { + pkt->req->setReqInstSeqNum(m_records_flushed - 1); + m_coal_ptr->makeRequest(pkt); + } DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec); + } else { + if (m_records_flushed > 0) { + exitSimLoop("Finished Drain", 0); + } DPRINTF(RubyCacheTrace, "Flushed all %d records\n", m_records_flushed); } } @@ -143,13 +159,21 @@ CacheRecorder::enqueueNextFetchRequest() pkt->dataStatic(traceRecord->m_data + rec_bytes_read); Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id]; + GPUCoalescer* m_coal_ptr; + m_coal_ptr = m_coalescer_map[traceRecord->m_cntrl_id]; assert(m_sequencer_ptr != NULL); - m_sequencer_ptr->makeRequest(pkt); + if (m_coal_ptr == NULL) + m_sequencer_ptr->makeRequest(pkt); + else { + pkt->req->setReqInstSeqNum(m_records_read); + m_coal_ptr->makeRequest(pkt); + } } m_bytes_read += (sizeof(TraceRecord) + m_block_size_bytes); m_records_read++; } else { + exitSimLoop("Finished Warmup", 0); DPRINTF(RubyCacheTrace, "Fetched all %d records\n", m_records_read); } } @@ -168,6 +192,8 @@ CacheRecorder::addRecord(int cntrl, Addr data_addr, Addr pc_addr, memcpy(rec->m_data, data.getData(0, m_block_size_bytes), m_block_size_bytes); + DPRINTF(RubyCacheTrace, "Inside addRecord with cntrl id %d and type %d\n", + cntrl, type); m_records.push_back(rec); } diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh index be95590313..9363e2fde7 100644 --- a/src/mem/ruby/system/CacheRecorder.hh +++ b/src/mem/ruby/system/CacheRecorder.hh @@ -50,6 +50,7 @@ namespace ruby { class Sequencer; +class GPUCoalescer; /*! * Class for recording cache contents. Note that the last element of the @@ -79,6 +80,7 @@ class CacheRecorder CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& SequencerMap, + std::vector& CoalescerMap, uint64_t block_size_bytes); void addRecord(int cntrl, Addr data_addr, Addr pc_addr, RubyRequestType type, Tick time, DataBlock& data); @@ -115,6 +117,7 @@ class CacheRecorder uint8_t* m_uncompressed_trace; uint64_t m_uncompressed_trace_size; std::vector m_seq_map; + std::vector m_coalescer_map; uint64_t m_bytes_read; uint64_t m_records_read; uint64_t m_records_flushed; diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index b38c903b09..232e337752 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -178,13 +178,22 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, uint64_t block_size_bytes) { std::vector sequencer_map; + std::vector coalescer_map; Sequencer* sequencer_ptr = NULL; + GPUCoalescer* coalescer_ptr = NULL; for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) { sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getCPUSequencer()); + coalescer_map.push_back(m_abs_cntrl_vec[cntrl]->getGPUCoalescer()); + if (sequencer_ptr == NULL) { sequencer_ptr = sequencer_map[cntrl]; } + + if (coalescer_ptr == NULL) { + coalescer_ptr = coalescer_map[cntrl]; + } + } assert(sequencer_ptr != NULL); @@ -193,6 +202,11 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, if (sequencer_map[cntrl] == NULL) { sequencer_map[cntrl] = sequencer_ptr; } + + if (coalescer_map[cntrl] == NULL) { + coalescer_map[cntrl] = coalescer_ptr; + } + } // Remove the old CacheRecorder if it's still hanging about. @@ -202,7 +216,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, // Create the CacheRecorder and record the cache trace m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, - sequencer_map, block_size_bytes); + sequencer_map, coalescer_map, + block_size_bytes); } void