diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py index bfcd2c953d..eb7dd3acbd 100644 --- a/configs/example/ruby_gpu_random_test.py +++ b/configs/example/ruby_gpu_random_test.py @@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs): num_lanes=1, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 @@ -393,6 +394,7 @@ for cu_idx in range(n_CUs): num_lanes=args.wf_size, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 diff --git a/configs/learning_gem5/part3/msi_caches.py b/configs/learning_gem5/part3/msi_caches.py index c198662c5e..b719c7ab60 100644 --- a/configs/learning_gem5/part3/msi_caches.py +++ b/configs/learning_gem5/part3/msi_caches.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -191,7 +192,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py b/configs/learning_gem5/part3/ruby_caches_MI_example.py index baee120bb9..583041a674 100644 --- a/configs/learning_gem5/part3/ruby_caches_MI_example.py +++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -180,7 +181,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/test_caches.py b/configs/learning_gem5/part3/test_caches.py index 4e8e8febda..be2d46253e 100644 --- a/configs/learning_gem5/part3/test_caches.py +++ b/configs/learning_gem5/part3/test_caches.py @@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.clk_domain, + ruby_system=self, ) for i in range(num_testers) ] diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py index ff4246a7e0..7d40862517 100644 --- a/configs/ruby/AMD_Base_Constructor.py +++ b/configs/ruby/AMD_Base_Constructor.py @@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index 313d1d514a..15108bb674 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): # TCP_Controller inherits this from RubyController self.mandatory_queue_latency = options.mandatory_queue_latency - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): options.max_coalesces_per_cycle ) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.L1cache.create(options) self.issue_latency = 1 - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.coalescer.support_inst_reqs = False self.coalescer.is_cpu_sequencer = False - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network): dir_cntrl.addr_ranges = dram_intf.range # Append - exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i) + exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mem_ctrls.append(mem_ctrl) diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index e0de4e0636..9054fefc01 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index e6c4e81f91..d7ad3bdc04 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 500afbc199..6e1e0b97f3 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -94,7 +94,7 @@ def create_system( is_icache=False, ) - prefetcher = RubyPrefetcher() + prefetcher = RubyPrefetcher(block_size=options.cacheline_size) clk_domain = cpus[i].clk_domain diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py index aeab96a85f..1095defc57 100644 --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index e427a39de8..0a6671aa4b 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system): for i in range(options.num_dirs): dir_cntrl = Directory_Controller() dir_cntrl.version = i - dir_cntrl.directory = RubyDirectoryMemory() + dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) dir_cntrl.ruby_system = ruby_system exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) @@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system): if bootmem is not None: rom_dir_cntrl = Directory_Controller() - rom_dir_cntrl.directory = RubyDirectoryMemory() + rom_dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) rom_dir_cntrl.ruby_system = ruby_system rom_dir_cntrl.version = i + 1 rom_dir_cntrl.memory = bootmem.port diff --git a/src/cpu/testers/gpu_ruby_test/TesterThread.py b/src/cpu/testers/gpu_ruby_test/TesterThread.py index 49388a76e1..6ddfc66ddc 100644 --- a/src/cpu/testers/gpu_ruby_test/TesterThread.py +++ b/src/cpu/testers/gpu_ruby_test/TesterThread.py @@ -41,3 +41,4 @@ class TesterThread(ClockedObject): thread_id = Param.Int("Unique TesterThread ID") num_lanes = Param.Int("Number of lanes this thread has") deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold") + cache_line_size = Param.UInt32("Size of cache line in cache") diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc index a0c0670a8f..83d8a1a277 100644 --- a/src/cpu/testers/gpu_ruby_test/address_manager.cc +++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc @@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic) std::shuffle( randAddressMap.begin(), randAddressMap.end(), - std::default_random_engine(random_mt.random(0,UINT_MAX)) + // TODO: This is a bug unrelated to this draft PR but the GPU tester is + // useful for testing this PR. + std::default_random_engine(random_mt.random(0,UINT_MAX-1)) ); // initialize atomic locations diff --git a/src/cpu/testers/gpu_ruby_test/dma_thread.cc b/src/cpu/testers/gpu_ruby_test/dma_thread.cc index 1d6f46c44b..2c4c610c51 100644 --- a/src/cpu/testers/gpu_ruby_test/dma_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/dma_thread.cc @@ -70,7 +70,7 @@ DmaThread::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -127,7 +127,7 @@ DmaThread::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -" " Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - resp_cmd.toString(), ruby::printAddress(addr)); + resp_cmd.toString(), printAddress(addr)); if (resp_cmd == MemCmd::SwapResp) { // response to a pending atomic diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc index ae4078ee6c..516e77ddae 100644 --- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); // must be aligned with store size assert(address % sizeof(Value) == 0); @@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - " "Addr %s\n", this->getName(), curEpisode->getEpisodeId(), resp_cmd.toString(), - ruby::printAddress(addr)); + printAddress(addr)); // whether the transaction is done after this hitCallback bool isTransactionDone = true; diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.cc b/src/cpu/testers/gpu_ruby_test/tester_thread.cc index ce3a1bccc6..dbcfba8c3c 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc @@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p) : ClockedObject(p), threadEvent(this, "TesterThread tick"), deadlockCheckEvent(this), + cacheLineSize(p.cache_line_size), threadId(p.thread_id), numLanes(p.num_lanes), tester(nullptr), addrManager(nullptr), port(nullptr), @@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val) ss << threadName << ": Atomic Op returned unexpected value\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tAtomic Op's return value " << ret_val << "\n"; // print out basic info @@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val) << "\tTesterThread " << threadId << "\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tLoaded value " << ret_val << "\n" << "\tLast writer " << addrManager->printLastWriter(loc) << "\n"; @@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table, for (const auto& m : table) { for (const auto& req : m.second) { - ss << "\t\t\tAddr " << ruby::printAddress(m.first) + ss << "\t\t\tAddr " << printAddress(m.first) << ": delta (curCycle - issueCycle) = " << (cur_cycle - req.issueCycle) << std::endl; } @@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const << pendingFenceCount << std::endl; } +std::string +TesterThread::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, cacheLineSize * 8); +} + } // namespace gem5 diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.hh b/src/cpu/testers/gpu_ruby_test/tester_thread.hh index 9877d63c24..f31a5a3dea 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.hh +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.hh @@ -132,6 +132,7 @@ class TesterThread : public ClockedObject {} }; + int cacheLineSize; // the unique global id of this thread int threadId; // width of this thread (1 for cpu thread & wf size for gpu wavefront) @@ -204,6 +205,7 @@ class TesterThread : public ClockedObject void printOutstandingReqs(const OutstandingReqTable& table, std::stringstream& ss) const; + std::string printAddress(Addr addr) const; }; } // namespace gem5 diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index 5a83d9ca27..b9c777526a 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -124,7 +124,8 @@ Check::initiatePrefetch() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "successfully initiated prefetch.\n"); @@ -161,7 +162,8 @@ Check::initiateFlush() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating Flush - successful\n"); @@ -207,7 +209,8 @@ Check::initiateAction() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(writeAddr, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating action - successful\n"); @@ -261,7 +264,8 @@ Check::initiateCheck() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating check - successful\n"); @@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) // This isn't exactly right since we now have multi-byte checks // assert(getAddress() == address); - assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address)); + int block_size_bits = CACHE_LINE_BITS; + assert(ruby::makeLineAddress(m_address, block_size_bits) == + ruby::makeLineAddress(address, block_size_bits)); assert(data != NULL); DPRINTF(RubyTest, "RubyTester Callback\n"); @@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) } DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc, - ruby::makeLineAddress(m_address)); + ruby::makeLineAddress(m_address, block_size_bits)); DPRINTF(RubyTest, "Callback done\n"); debugPrint(); } diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh index 78e2bda77e..0270b800d7 100644 --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -47,6 +47,7 @@ class SubBlock; const int CHECK_SIZE_BITS = 2; const int CHECK_SIZE = (1 << CHECK_SIZE_BITS); +const int CACHE_LINE_BITS = 6; class Check { diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index 9397126180..d306c405ef 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -90,7 +90,9 @@ class RubyTester : public ClockedObject { ruby::SubBlock subBlock; - SenderState(Addr addr, int size) : subBlock(addr, size) {} + SenderState(Addr addr, int size, int cl_size) + : subBlock(addr, size, cl_size) + {} }; diff --git a/src/mem/ruby/common/Address.cc b/src/mem/ruby/common/Address.cc index fcf291af51..8b120324c7 100644 --- a/src/mem/ruby/common/Address.cc +++ b/src/mem/ruby/common/Address.cc @@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number) } Addr -getOffset(Addr addr) +getOffset(Addr addr, int cacheLineBits) { - return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1); -} - -Addr -makeLineAddress(Addr addr) -{ - return mbits(addr, 63, RubySystem::getBlockSizeBits()); + assert(cacheLineBits < 64); + return bitSelect(addr, 0, cacheLineBits - 1); } Addr makeLineAddress(Addr addr, int cacheLineBits) { + assert(cacheLineBits < 64); return maskLowOrderBits(addr, cacheLineBits); } // returns the next stride address based on line address Addr -makeNextStrideAddress(Addr addr, int stride) +makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes) { - return makeLineAddress(addr) + - static_cast(RubySystem::getBlockSizeBytes()) * stride; + return makeLineAddress(addr, floorLog2(cacheLineBytes)) + + cacheLineBytes * stride; } std::string -printAddress(Addr addr) +printAddress(Addr addr, int cacheLineBits) { std::stringstream out; out << "[" << std::hex << "0x" << addr << "," << " line 0x" - << makeLineAddress(addr) << std::dec << "]"; + << makeLineAddress(addr, cacheLineBits) << std::dec << "]"; return out.str(); } diff --git a/src/mem/ruby/common/Address.hh b/src/mem/ruby/common/Address.hh index 565c3c1fb7..51e0b5417a 100644 --- a/src/mem/ruby/common/Address.hh +++ b/src/mem/ruby/common/Address.hh @@ -33,6 +33,7 @@ #include #include +#include "base/intmath.hh" #include "base/types.hh" namespace gem5 @@ -44,11 +45,10 @@ namespace ruby // selects bits inclusive Addr bitSelect(Addr addr, unsigned int small, unsigned int big); Addr maskLowOrderBits(Addr addr, unsigned int number); -Addr getOffset(Addr addr); -Addr makeLineAddress(Addr addr); +Addr getOffset(Addr addr, int cacheLineBits); Addr makeLineAddress(Addr addr, int cacheLineBits); -Addr makeNextStrideAddress(Addr addr, int stride); -std::string printAddress(Addr addr); +Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes); +std::string printAddress(Addr addr, int cacheLineBits); } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc index 8f47d0026b..bbc0fd21c8 100644 --- a/src/mem/ruby/common/DataBlock.cc +++ b/src/mem/ruby/common/DataBlock.cc @@ -40,8 +40,8 @@ #include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/WriteMask.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -51,17 +51,22 @@ namespace ruby DataBlock::DataBlock(const DataBlock &cp) { + assert(cp.isAlloc()); + assert(cp.getBlockSize() > 0); + assert(!m_alloc); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); - m_data = new uint8_t[block_bytes]; - memcpy(m_data, cp.m_data, block_bytes); + m_block_size = cp.getBlockSize(); + m_data = new uint8_t[m_block_size]; + memcpy(m_data, cp.m_data, m_block_size); m_alloc = true; + m_block_size = m_block_size; // If this data block is involved in an atomic operation, the effect // of applying the atomic operations on the data block are recorded in // m_atomicLog. If so, we must copy over every entry in the change log for (size_t i = 0; i < cp.m_atomicLog.size(); i++) { - block_update = new uint8_t[block_bytes]; - memcpy(block_update, cp.m_atomicLog[i], block_bytes); + block_update = new uint8_t[m_block_size]; + memcpy(block_update, cp.m_atomicLog[i], m_block_size); m_atomicLog.push_back(block_update); } } @@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp) void DataBlock::alloc() { - m_data = new uint8_t[RubySystem::getBlockSizeBytes()]; + assert(!m_alloc); + + if (!m_block_size) { + return; + } + + m_data = new uint8_t[m_block_size]; m_alloc = true; clear(); } +void +DataBlock::realloc(int blk_size) +{ + m_block_size = blk_size; + assert(m_block_size > 0); + + if (m_alloc) { + delete [] m_data; + m_alloc = false; + } + alloc(); +} + void DataBlock::clear() { - memset(m_data, 0, RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + memset(m_data, 0, m_block_size); } bool DataBlock::equal(const DataBlock& obj) const { - size_t block_bytes = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + size_t block_bytes = m_block_size; // Check that the block contents match if (memcmp(m_data, obj.m_data, block_bytes)) { return false; @@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const void DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { if (mask.getMask(i, 1)) { m_data[i] = dblk.m_data[i]; } @@ -113,7 +143,9 @@ void DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, bool isAtomicNoReturn) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { m_data[i] = dblk.m_data[i]; } mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn); @@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, void DataBlock::print(std::ostream& out) const { - int size = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + int size = m_block_size; out << "[ "; for (int i = 0; i < size; i++) { out << std::setw(2) << std::setfill('0') << std::hex @@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront() void DataBlock::clearAtomicLogEntries() { + assert(m_alloc); for (auto log : m_atomicLog) { delete [] log; } @@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries() const uint8_t* DataBlock::getData(int offset, int len) const { - assert(offset + len <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + assert(offset + len <= m_block_size); return &m_data[offset]; } uint8_t* DataBlock::getDataMod(int offset) { + assert(m_alloc); return &m_data[offset]; } void DataBlock::setData(const uint8_t *data, int offset, int len) { + assert(m_alloc); memcpy(&m_data[offset], data, len); } void DataBlock::setData(PacketPtr pkt) { - int offset = getOffset(pkt->getAddr()); - assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size)); + assert(offset + pkt->getSize() <= m_block_size); pkt->writeData(&m_data[offset]); } DataBlock & DataBlock::operator=(const DataBlock & obj) { + // Reallocate if needed + if (m_alloc && m_block_size != obj.getBlockSize()) { + delete [] m_data; + m_block_size = obj.getBlockSize(); + alloc(); + } else if (!m_alloc) { + m_block_size = obj.getBlockSize(); + alloc(); + + // Assume this will be realloc'd later if zero. + if (m_block_size == 0) { + return *this; + } + } else { + assert(m_alloc && m_block_size == obj.getBlockSize()); + } + assert(m_block_size > 0); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); + size_t block_bytes = m_block_size; // Copy entire block contents from obj to current block memcpy(m_data, obj.m_data, block_bytes); // If this data block is involved in an atomic operation, the effect diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index 7456a25f3f..ebfa7d1383 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -61,8 +61,14 @@ class WriteMask; class DataBlock { public: - DataBlock() + // Ideally this should nost be called. We allow default so that protocols + // do not need to be changed. + DataBlock() = default; + + DataBlock(int blk_size) { + assert(!m_alloc); + m_block_size = blk_size; alloc(); } @@ -101,10 +107,16 @@ class DataBlock bool equal(const DataBlock& obj) const; void print(std::ostream& out) const; + int getBlockSize() const { return m_block_size; } + void setBlockSize(int block_size) { realloc(block_size); } + bool isAlloc() const { return m_alloc; } + void realloc(int blk_size); + private: void alloc(); - uint8_t *m_data; - bool m_alloc; + uint8_t *m_data = nullptr; + bool m_alloc = false; + int m_block_size = 0; // Tracks block changes when atomic ops are applied std::deque m_atomicLog; @@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data) inline uint8_t DataBlock::getByte(int whichByte) const { + assert(m_alloc); return m_data[whichByte]; } inline void DataBlock::setByte(int whichByte, uint8_t data) { + assert(m_alloc); m_data[whichByte] = data; } inline void DataBlock::copyPartial(const DataBlock & dblk, int offset, int len) { + assert(m_alloc); setData(&dblk.m_data[offset], offset, len); } diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc index ba64f2febd..944315b97f 100644 --- a/src/mem/ruby/common/NetDest.cc +++ b/src/mem/ruby/common/NetDest.cc @@ -30,6 +30,8 @@ #include +#include "mem/ruby/system/RubySystem.hh" + namespace gem5 { @@ -38,12 +40,18 @@ namespace ruby NetDest::NetDest() { - resize(); +} + +NetDest::NetDest(RubySystem *ruby_system) + : m_ruby_system(ruby_system) +{ + resize(); } void NetDest::add(MachineID newElement) { + assert(m_bits.size() > 0); assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize()); m_bits[vecIndex(newElement)].add(bitIndex(newElement.num)); } @@ -51,6 +59,7 @@ NetDest::add(MachineID newElement) void NetDest::addNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].addSet(netDest.m_bits[i]); @@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest) void NetDest::setNetDest(MachineType machine, const Set& set) { + assert(m_ruby_system != nullptr); + // assure that there is only one set of destinations for this machine assert(MachineType_base_level((MachineType)(machine + 1)) - MachineType_base_level(machine) == 1); @@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set) void NetDest::remove(MachineID oldElement) { + assert(m_bits.size() > 0); m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num)); } void NetDest::removeNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].removeSet(netDest.m_bits[i]); @@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest) void NetDest::clear() { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].clear(); } @@ -101,6 +115,8 @@ NetDest::broadcast() void NetDest::broadcast(MachineType machineType) { + assert(m_ruby_system != nullptr); + for (NodeID i = 0; i < MachineType_base_count(machineType); i++) { MachineID mach = {machineType, i}; add(mach); @@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType) std::vector NetDest::getAllDest() { + assert(m_ruby_system != nullptr); + assert(m_bits.size() > 0); + std::vector dest; dest.clear(); for (int i = 0; i < m_bits.size(); i++) { @@ -127,6 +146,8 @@ NetDest::getAllDest() int NetDest::count() const { + assert(m_bits.size() > 0); + int counter = 0; for (int i = 0; i < m_bits.size(); i++) { counter += m_bits[i].count(); @@ -137,12 +158,14 @@ NetDest::count() const NodeID NetDest::elementAt(MachineID index) { + assert(m_bits.size() > 0); return m_bits[vecIndex(index)].elementAt(bitIndex(index.num)); } MachineID NetDest::smallestElement() const { + assert(m_bits.size() > 0); assert(count() > 0); for (int i = 0; i < m_bits.size(); i++) { for (NodeID j = 0; j < m_bits[i].getSize(); j++) { @@ -158,6 +181,9 @@ NetDest::smallestElement() const MachineID NetDest::smallestElement(MachineType machine) const { + assert(m_bits.size() > 0); + assert(m_ruby_system != nullptr); + int size = m_bits[MachineType_base_level(machine)].getSize(); for (NodeID j = 0; j < size; j++) { if (m_bits[MachineType_base_level(machine)].isElement(j)) { @@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const bool NetDest::isBroadcast() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isBroadcast()) { return false; @@ -185,6 +212,7 @@ NetDest::isBroadcast() const bool NetDest::isEmpty() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isEmpty()) { return false; @@ -197,8 +225,9 @@ NetDest::isEmpty() const NetDest NetDest::OR(const NetDest& orNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == orNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]); } @@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const NetDest NetDest::AND(const NetDest& andNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == andNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]); } @@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const bool NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == other_netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) { @@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const bool NetDest::isSuperset(const NetDest& test) const { + assert(m_bits.size() > 0); assert(m_bits.size() == test.getSize()); for (int i = 0; i < m_bits.size(); i++) { @@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const bool NetDest::isElement(MachineID element) const { + assert(m_bits.size() > 0); return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num)); } void NetDest::resize() { + assert(m_ruby_system != nullptr); + m_bits.resize(MachineType_base_level(MachineType_NUM)); assert(m_bits.size() == MachineType_NUM); @@ -263,6 +298,7 @@ NetDest::resize() void NetDest::print(std::ostream& out) const { + assert(m_bits.size() > 0); out << "[NetDest (" << m_bits.size() << ") "; for (int i = 0; i < m_bits.size(); i++) { @@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const bool NetDest::isEqual(const NetDest& n) const { + assert(m_bits.size() > 0); assert(m_bits.size() == n.m_bits.size()); for (unsigned int i = 0; i < m_bits.size(); ++i) { if (!m_bits[i].isEqual(n.m_bits[i])) @@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const return true; } +int +NetDest::MachineType_base_count(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(obj); +} + +int +NetDest::MachineType_base_number(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh index e71b876754..83f340a478 100644 --- a/src/mem/ruby/common/NetDest.hh +++ b/src/mem/ruby/common/NetDest.hh @@ -41,6 +41,8 @@ namespace gem5 namespace ruby { +class RubySystem; + // NetDest specifies the network destination of a Message class NetDest { @@ -48,6 +50,7 @@ class NetDest // Constructors // creates and empty set NetDest(); + NetDest(RubySystem *ruby_system); explicit NetDest(int bit_size); NetDest& operator=(const Set& obj); @@ -98,6 +101,8 @@ class NetDest void print(std::ostream& out) const; + void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); } + private: // returns a value >= MachineType_base_level("this machine") // and < MachineType_base_level("next highest machine") @@ -112,6 +117,12 @@ class NetDest NodeID bitIndex(NodeID index) const { return index; } std::vector m_bits; // a vector of bit vectors - i.e. Sets + + // Needed to call MacheinType_base_count/level + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/common/SubBlock.cc b/src/mem/ruby/common/SubBlock.cc index 92cfd8b633..be0adc1233 100644 --- a/src/mem/ruby/common/SubBlock.cc +++ b/src/mem/ruby/common/SubBlock.cc @@ -38,13 +38,14 @@ namespace ruby using stl_helpers::operator<<; -SubBlock::SubBlock(Addr addr, int size) +SubBlock::SubBlock(Addr addr, int size, int cl_bits) { m_address = addr; resize(size); for (int i = 0; i < size; i++) { setByte(i, 0); } + m_cache_line_bits = cl_bits; } void @@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data) { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { this->setByte(i, data.getByte(offset + i)); } @@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { // This will detect crossing a cache line boundary data.setByte(offset + i, this->getByte(i)); diff --git a/src/mem/ruby/common/SubBlock.hh b/src/mem/ruby/common/SubBlock.hh index e1a83600c2..3790bbac58 100644 --- a/src/mem/ruby/common/SubBlock.hh +++ b/src/mem/ruby/common/SubBlock.hh @@ -45,7 +45,7 @@ class SubBlock { public: SubBlock() { } - SubBlock(Addr addr, int size); + SubBlock(Addr addr, int size, int cl_bits); ~SubBlock() { } Addr getAddress() const { return m_address; } @@ -74,6 +74,7 @@ class SubBlock // Data Members (m_ prefix) Addr m_address; std::vector m_data; + int m_cache_line_bits; }; inline std::ostream& diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc index 1fa03c951e..f176aec9fc 100644 --- a/src/mem/ruby/common/WriteMask.cc +++ b/src/mem/ruby/common/WriteMask.cc @@ -39,13 +39,13 @@ namespace ruby { WriteMask::WriteMask() - : mSize(RubySystem::getBlockSizeBytes()), mMask(mSize, false), - mAtomic(false) + : mSize(0), mMask(mSize, false), mAtomic(false) {} void WriteMask::print(std::ostream& out) const { + assert(mSize > 0); std::string str(mSize,'0'); for (int i = 0; i < mSize; i++) { str[i] = mMask[i] ? ('1') : ('0'); @@ -59,6 +59,7 @@ void WriteMask::performAtomic(uint8_t * p, std::deque& log, bool isAtomicNoReturn) const { + assert(mSize > 0); int offset; uint8_t *block_update; // Here, operations occur in FIFO order from the mAtomicOp diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh index 8c6b8ce976..e620997cd8 100644 --- a/src/mem/ruby/common/WriteMask.hh +++ b/src/mem/ruby/common/WriteMask.hh @@ -78,6 +78,17 @@ class WriteMask ~WriteMask() {} + int getBlockSize() const { return mSize; } + void + setBlockSize(int size) + { + // This should only be used once if the default ctor was used. Probably + // by src/mem/ruby/protocol/RubySlicc_MemControl.sm. + assert(mSize == 0); + assert(size > 0); + mSize = size; + } + void clear() { @@ -87,6 +98,7 @@ class WriteMask bool test(int offset) const { + assert(mSize > 0); assert(offset < mSize); return mMask[offset]; } @@ -94,6 +106,7 @@ class WriteMask void setMask(int offset, int len, bool val = true) { + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { mMask[offset + i] = val; @@ -102,6 +115,7 @@ class WriteMask void fillMask() { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { mMask[i] = true; } @@ -111,6 +125,7 @@ class WriteMask getMask(int offset, int len) const { bool tmp = true; + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { tmp = tmp & mMask.at(offset + i); @@ -122,6 +137,7 @@ class WriteMask isOverlap(const WriteMask &readMask) const { bool tmp = false; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -135,6 +151,7 @@ class WriteMask containsMask(const WriteMask &readMask) const { bool tmp = true; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -146,6 +163,7 @@ class WriteMask bool isEmpty() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (mMask.at(i)) { return false; @@ -157,6 +175,7 @@ class WriteMask bool isFull() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (!mMask.at(i)) { return false; @@ -168,6 +187,7 @@ class WriteMask void andMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) && (writeMask.mMask.at(i)); @@ -182,6 +202,7 @@ class WriteMask void orMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) || (writeMask.mMask.at(i)); @@ -196,6 +217,7 @@ class WriteMask void setInvertedMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = !writeMask.mMask.at(i); @@ -205,6 +227,7 @@ class WriteMask int firstBitSet(bool val, int offset = 0) const { + assert(mSize > 0); for (int i = offset; i < mSize; ++i) if (mMask[i] == val) return i; @@ -214,6 +237,7 @@ class WriteMask int count(int offset = 0) const { + assert(mSize > 0); int count = 0; for (int i = offset; i < mSize; ++i) count += mMask[i]; diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 9a4439a538..8b3a724469 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -47,7 +47,6 @@ #include "base/random.hh" #include "base/stl_helpers.hh" #include "debug/RubyQueue.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -216,6 +215,7 @@ random_time() void MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO) { // record current time incase we have a pop that also adjusts my size @@ -237,7 +237,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, // is turned on and this buffer allows it if ((m_randomization == MessageRandomization::disabled) || ((m_randomization == MessageRandomization::ruby_system) && - !RubySystem::getRandomization())) { + !ruby_is_random)) { // No randomization arrival_time = current_time + delta; } else { @@ -265,7 +265,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, } // If running a cache trace, don't worry about the last arrival checks - if (!RubySystem::getWarmupEnabled()) { + if (!ruby_warmup) { m_last_arrival_time = arrival_time; } @@ -447,7 +447,6 @@ MessageBuffer::stallMessage(Addr addr, Tick current_time) { DPRINTF(RubyQueue, "Stalling due to %#x\n", addr); assert(isReady(current_time)); - assert(getOffset(addr) == 0); MsgPtr message = m_prio_heap.front(); // Since the message will just be moved to stall map, indicate that the @@ -479,7 +478,8 @@ MessageBuffer::deferEnqueueingMessage(Addr addr, MsgPtr message) } void -MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) +MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup) { assert(!isDeferredMsgMapEmpty(addr)); std::vector& msg_vec = m_deferred_msg_map[addr]; @@ -487,7 +487,7 @@ MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) // enqueue all deferred messages associated with this address for (MsgPtr m : msg_vec) { - enqueue(m, curTime, delay); + enqueue(m, curTime, delay, ruby_is_random, ruby_warmup); } msg_vec.clear(); diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 03a0454433..b45e531d11 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -90,13 +90,14 @@ class MessageBuffer : public SimObject Tick readyTime() const; void - delayHead(Tick current_time, Tick delta) + delayHead(Tick current_time, Tick delta, bool ruby_is_random, + bool ruby_warmup) { MsgPtr m = m_prio_heap.front(); std::pop_heap(m_prio_heap.begin(), m_prio_heap.end(), std::greater()); m_prio_heap.pop_back(); - enqueue(m, current_time, delta); + enqueue(m, current_time, delta, ruby_is_random, ruby_warmup); } bool areNSlotsAvailable(unsigned int n, Tick curTime); @@ -124,6 +125,7 @@ class MessageBuffer : public SimObject const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); } void enqueue(MsgPtr message, Tick curTime, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO = false); // Defer enqueueing a message to a later cycle by putting it aside and not @@ -135,7 +137,8 @@ class MessageBuffer : public SimObject // enqueue all previously deferred messages that are associated with the // input address - void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay); + void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup); bool isDeferredMsgMapEmpty(Addr addr) const; //! Updates the delay cycles of the message at the head of the queue, diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc index 757ed9498e..480b5bcef0 100644 --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -65,7 +65,8 @@ Network::Network(const Params &p) "%s: data message size > cache line size", name()); m_data_msg_size = p.data_msg_size + m_control_msg_size; - params().ruby_system->registerNetwork(this); + m_ruby_system = p.ruby_system; + m_ruby_system->registerNetwork(this); // Populate localNodeVersions with the version of each MachineType in // this network. This will be used to compute a global to local ID. @@ -102,7 +103,8 @@ Network::Network(const Params &p) m_topology_ptr = new Topology(m_nodes, p.routers.size(), m_virtual_networks, - p.ext_links, p.int_links); + p.ext_links, p.int_links, + m_ruby_system); // Allocate to and from queues // Queues that are getting messages from protocol @@ -246,7 +248,7 @@ Network::addressToNodeID(Addr addr, MachineType mtype) } } } - return MachineType_base_count(mtype); + return m_ruby_system->MachineType_base_count(mtype); } NodeID @@ -256,5 +258,23 @@ Network::getLocalNodeID(NodeID global_id) const return globalToLocalMap.at(global_id); } +bool +Network::getRandomization() const +{ + return m_ruby_system->getRandomization(); +} + +bool +Network::getWarmupEnabled() const +{ + return m_ruby_system->getWarmupEnabled(); +} + +int +Network::MachineType_base_number(const MachineType& obj) +{ + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh index 8ca68a0279..c0d21af240 100644 --- a/src/mem/ruby/network/Network.hh +++ b/src/mem/ruby/network/Network.hh @@ -78,6 +78,7 @@ namespace ruby class NetDest; class MessageBuffer; +class RubySystem; class Network : public ClockedObject { @@ -147,6 +148,10 @@ class Network : public ClockedObject NodeID getLocalNodeID(NodeID global_id) const; + bool getRandomization() const; + bool getWarmupEnabled() const; + RubySystem *getRubySystem() const { return m_ruby_system; } + protected: // Private copy constructor and assignment operator Network(const Network& obj); @@ -176,6 +181,12 @@ class Network : public ClockedObject // Global NodeID to local node map. If there are not multiple networks in // the same RubySystem, this is a one-to-one mapping of global to local. std::unordered_map globalToLocalMap; + + // For accessing if randomization/warnup are turned on. We cannot store + // those values in the constructor in case we are constructed first. + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/network/Topology.cc b/src/mem/ruby/network/Topology.cc index 39444c9023..b2cd7897f8 100644 --- a/src/mem/ruby/network/Topology.cc +++ b/src/mem/ruby/network/Topology.cc @@ -37,6 +37,7 @@ #include "mem/ruby/network/BasicLink.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -56,10 +57,12 @@ const int INFINITE_LATENCY = 10000; // Yes, this is a big hack Topology::Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links) - : m_nodes(MachineType_base_number(MachineType_NUM)), + const std::vector &int_links, + RubySystem *ruby_system) + : m_nodes(ruby_system->MachineType_base_number(MachineType_NUM)), m_number_of_switches(num_routers), m_vnets(num_vnets), - m_ext_link_vector(ext_links), m_int_link_vector(int_links) + m_ext_link_vector(ext_links), m_int_link_vector(int_links), + m_ruby_system(ruby_system) { // Total nodes/controllers in network assert(m_nodes > 1); @@ -78,7 +81,8 @@ Topology::Topology(uint32_t num_nodes, uint32_t num_routers, AbstractController *abs_cntrl = ext_link->params().ext_node; BasicRouter *router = ext_link->params().int_node; - int machine_base_idx = MachineType_base_number(abs_cntrl->getType()); + int machine_base_idx = + ruby_system->MachineType_base_number(abs_cntrl->getType()); int ext_idx1 = machine_base_idx + abs_cntrl->getVersion(); int ext_idx2 = ext_idx1 + m_nodes; int int_idx = router->params().router_id + 2*m_nodes; @@ -189,7 +193,7 @@ Topology::createLinks(Network *net) for (int i = 0; i < topology_weights[0].size(); i++) { for (int j = 0; j < topology_weights[0][i].size(); j++) { std::vector routingMap; - routingMap.resize(m_vnets); + routingMap.resize(m_vnets, m_ruby_system); // Not all sources and destinations are connected // by direct links. We only construct the links @@ -264,7 +268,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtInLink(src, dest - (2 * m_nodes), link, @@ -287,7 +291,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtOutLink(src - (2 * m_nodes), node, link, @@ -309,7 +313,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeInternalLink(src - (2 * m_nodes), @@ -413,16 +417,17 @@ Topology::shortest_path_to_node(SwitchID src, SwitchID next, const Matrix &weights, const Matrix &dist, int vnet) { - NetDest result; + NetDest result(m_ruby_system); int d = 0; int machines; int max_machines; machines = MachineType_NUM; - max_machines = MachineType_base_number(MachineType_NUM); + max_machines = m_ruby_system->MachineType_base_number(MachineType_NUM); for (int m = 0; m < machines; m++) { - for (NodeID i = 0; i < MachineType_base_count((MachineType)m); i++) { + for (NodeID i = 0; + i < m_ruby_system->MachineType_base_count((MachineType)m); i++) { // we use "d+max_machines" below since the "destination" // switches for the machines are numbered // [MachineType_base_number(MachineType_NUM)... diff --git a/src/mem/ruby/network/Topology.hh b/src/mem/ruby/network/Topology.hh index 301811e6ab..7ab395762a 100644 --- a/src/mem/ruby/network/Topology.hh +++ b/src/mem/ruby/network/Topology.hh @@ -80,7 +80,8 @@ class Topology public: Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links); + const std::vector &int_links, + RubySystem *ruby_system); uint32_t numSwitches() const { return m_number_of_switches; } void createLinks(Network *net); @@ -108,7 +109,7 @@ class Topology const Matrix &weights, const Matrix &dist, int vnet); - const uint32_t m_nodes; + uint32_t m_nodes; const uint32_t m_number_of_switches; int m_vnets; @@ -116,6 +117,8 @@ class Topology std::vector m_int_link_vector; LinkMap m_link_map; + + RubySystem *m_ruby_system = nullptr; }; inline std::ostream& diff --git a/src/mem/ruby/network/garnet/NetworkInterface.cc b/src/mem/ruby/network/garnet/NetworkInterface.cc index 31d625c4d5..8564baca6d 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.cc +++ b/src/mem/ruby/network/garnet/NetworkInterface.cc @@ -41,6 +41,7 @@ #include "mem/ruby/network/garnet/Credit.hh" #include "mem/ruby/network/garnet/flitBuffer.hh" #include "mem/ruby/slicc_interface/Message.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -244,7 +245,9 @@ NetworkInterface::wakeup() outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { // Space is available. Enqueue to protocol buffer. outNode_ptr[vnet]->enqueue(t_flit->get_msg_ptr(), curTime, - cyclesToTicks(Cycles(1))); + cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Simply send a credit back since we are not buffering // this flit in the NI @@ -332,7 +335,9 @@ NetworkInterface::checkStallQueue() if (outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { outNode_ptr[vnet]->enqueue(stallFlit->get_msg_ptr(), - curTime, cyclesToTicks(Cycles(1))); + curTime, cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Send back a credit with free signal now that the // VC is no longer stalled. @@ -699,6 +704,12 @@ NetworkInterface::functionalWrite(Packet *pkt) return num_functional_writes; } +int +NetworkInterface::MachineType_base_number(const MachineType& obj) +{ + return m_net_ptr->getRubySystem()->MachineType_base_number(obj); +} + } // namespace garnet } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/garnet/NetworkInterface.hh b/src/mem/ruby/network/garnet/NetworkInterface.hh index d42db5ee2a..cd7bb3b171 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.hh +++ b/src/mem/ruby/network/garnet/NetworkInterface.hh @@ -306,6 +306,8 @@ class NetworkInterface : public ClockedObject, public Consumer InputPort *getInportForVnet(int vnet); OutputPort *getOutportForVnet(int vnet); + + int MachineType_base_number(const MachineType& obj); }; } // namespace garnet diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 74d78e3aae..20d57f04be 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -268,7 +268,8 @@ PerfectSwitch::operateMessageBuffer(MessageBuffer *buffer, int vnet) buffer->getIncomingLink(), vnet, outgoing, vnet); out_port.buffers[vnet]->enqueue(msg_ptr, current_time, - out_port.latency); + out_port.latency, m_switch->getNetPtr()->getRandomization(), + m_switch->getNetPtr()->getWarmupEnabled()); } } } diff --git a/src/mem/ruby/network/simple/Switch.hh b/src/mem/ruby/network/simple/Switch.hh index 86abfda871..e6e22022bc 100644 --- a/src/mem/ruby/network/simple/Switch.hh +++ b/src/mem/ruby/network/simple/Switch.hh @@ -104,6 +104,7 @@ class Switch : public BasicRouter void print(std::ostream& out) const; void init_net_ptr(SimpleNetwork* net_ptr) { m_network_ptr = net_ptr; } + SimpleNetwork* getNetPtr() const { return m_network_ptr; } bool functionalRead(Packet *); bool functionalRead(Packet *, WriteMask&); diff --git a/src/mem/ruby/network/simple/Throttle.cc b/src/mem/ruby/network/simple/Throttle.cc index 20cebccabb..fc5649330f 100644 --- a/src/mem/ruby/network/simple/Throttle.cc +++ b/src/mem/ruby/network/simple/Throttle.cc @@ -199,7 +199,9 @@ Throttle::operateVnet(int vnet, int channel, int &total_bw_remaining, // Move the message in->dequeue(current_time); out->enqueue(msg_ptr, current_time, - m_switch->cyclesToTicks(m_link_latency)); + m_switch->cyclesToTicks(m_link_latency), + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); // Count the message (*(throttleStats. diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index 05fc486c63..ce40c35a9f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -34,6 +34,7 @@ #include "base/stl_helpers.hh" #include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/protocol/RubyRequest.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -307,7 +308,8 @@ AddressProfiler::addTraceSample(Addr data_addr, Addr pc_addr, } // record data address trace info - data_addr = makeLineAddress(data_addr); + int block_size_bits = m_profiler->m_ruby_system->getBlockSizeBits(); + data_addr = makeLineAddress(data_addr, block_size_bits); lookupTraceForAddress(data_addr, m_dataAccessTrace). update(type, access_mode, id, sharing_miss); diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm index ca606a5921..43fb96c375 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm @@ -95,7 +95,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 5d98a73041..d1e1ffb7b0 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -121,7 +121,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; int WTcnt, default="0"; int Fcnt, default="0"; bool inFlush, default="false"; diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm index bcf99ff362..ed5e40cfa1 100644 --- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm index 2b5935dee5..29f6d8e87d 100644 --- a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm index 5d85ad2fc6..bac7fd1b12 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -181,7 +181,7 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -195,8 +195,8 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm index 2464e038ff..3f1ba2540f 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm @@ -155,7 +155,7 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -169,8 +169,8 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 17a92f5f90..5b5ab3148a 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -183,7 +183,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm index 4e9e9597aa..b53ebe8ee2 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm @@ -192,7 +192,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm index 4a513d6d3f..b6410d12e7 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm @@ -143,7 +143,7 @@ machine(MachineType:Directory, "Directory protocol") bool isPresent(Addr); } - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // ** OBJECTS ** TBETable TBEs, template="", constructor="m_number_of_TBEs"; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm index 865fce4e3c..24f8146a02 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm @@ -198,7 +198,7 @@ machine(MachineType:L1Cache, "Token protocol") TBETable L1_TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; PersistentTable persistentTable; TimerTable useTimerTable; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm index 7f2bdf94e0..8d035a61bb 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm @@ -171,7 +171,7 @@ machine(MachineType:Directory, "Token protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick clockEdge(Cycles c); diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index 8f0341f328..97770e3516 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -72,6 +72,8 @@ structure(WriteMask, external="yes", desc="...") { int count(); int count(int); bool test(int); + int getBlockSize(); + void setBlockSize(int); } structure(DataBlock, external = "yes", desc="..."){ diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm index 012b169dea..848ada4d12 100644 --- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm +++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm @@ -89,7 +89,9 @@ structure(MemoryMsg, desc="...", interface="Message") { if ((MessageSize == MessageSizeType:Response_Data) || (MessageSize == MessageSizeType:Writeback_Data)) { WriteMask read_mask; - read_mask.setMask(addressOffset(addr, makeLineAddress(addr)), Len, true); + read_mask.setBlockSize(mask.getBlockSize()); + read_mask.setMask(addressOffset(addr, + makeLineAddress(addr, mask.getBlockSize())), Len, true); if (MessageSize != MessageSizeType:Writeback_Data) { read_mask.setInvertedMask(mask); } diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 4e0e4f4511..848d16491d 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -94,7 +94,7 @@ structure (Set, external = "yes", non_obj="yes") { NodeID smallestElement(); } -structure (NetDest, external = "yes", non_obj="yes") { +structure (NetDest, external = "yes", non_obj="yes", implicit_ctor="m_ruby_system") { void setSize(int); void setSize(int, int); void add(NodeID); diff --git a/src/mem/ruby/protocol/RubySlicc_Util.sm b/src/mem/ruby/protocol/RubySlicc_Util.sm index 104c7c034c..93976bc4e1 100644 --- a/src/mem/ruby/protocol/RubySlicc_Util.sm +++ b/src/mem/ruby/protocol/RubySlicc_Util.sm @@ -52,6 +52,7 @@ Addr intToAddress(int addr); int addressOffset(Addr addr, Addr base); int max_tokens(); Addr makeLineAddress(Addr addr); +Addr makeLineAddress(Addr addr, int cacheLineBits); int getOffset(Addr addr); int mod(int val, int mod); Addr bitSelect(Addr addr, int small, int big); diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index dcd142ea47..a644bbe506 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -574,7 +574,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // CacheEntry structure(CacheEntry, interface="AbstractCacheEntry") { diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm index aa27c40964..f7616e9ec4 100644 --- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm +++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm @@ -192,7 +192,7 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // Helper class for tracking expected response and data messages structure(ExpectedMap, external ="yes") { diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm index 46f57456a5..58f22d2007 100644 --- a/src/mem/ruby/protocol/chi/CHI-mem.sm +++ b/src/mem/ruby/protocol/chi/CHI-mem.sm @@ -157,7 +157,7 @@ machine(MachineType:Memory, "Memory controller interface") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // TBE fields structure(TBE, desc="...") { diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh index 0e00a60c28..1305deddce 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh @@ -59,6 +59,8 @@ namespace gem5 namespace ruby { +class RubySystem; + class AbstractCacheEntry : public ReplaceableEntry { private: @@ -78,16 +80,15 @@ class AbstractCacheEntry : public ReplaceableEntry // The methods below are those called by ruby runtime, add when it // is absolutely necessary and should all be virtual function. - virtual DataBlock& + [[noreturn]] virtual DataBlock& getDataBlk() { panic("getDataBlk() not implemented!"); - - // Dummy return to appease the compiler - static DataBlock b; - return b; } + virtual void initBlockSize(int block_size) { }; + virtual void setRubySystem(RubySystem *rs) { }; + int validBlocks; virtual int& getNumValidBlocks() { diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 36092387ac..0bcc662629 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -89,6 +89,9 @@ AbstractController::init() getMemReqQueue()->setConsumer(this); } + downstreamDestinations.setRubySystem(m_ruby_system); + upstreamDestinations.setRubySystem(m_ruby_system); + // Initialize the addr->downstream machine mappings. Multiple machines // in downstream_destinations can have the same address range if they have // different types. If this is the case, mapAddressToDownstreamMachine @@ -268,7 +271,7 @@ AbstractController::serviceMemoryQueue() } const MemoryMsg *mem_msg = (const MemoryMsg*)mem_queue->peek(); - unsigned int req_size = RubySystem::getBlockSizeBytes(); + unsigned int req_size = m_ruby_system->getBlockSizeBytes(); if (mem_msg->m_Len > 0) { req_size = mem_msg->m_Len; } @@ -294,7 +297,7 @@ AbstractController::serviceMemoryQueue() SenderState *s = new SenderState(mem_msg->m_Sender); pkt->pushSenderState(s); - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { // Use functional rather than timing accesses during warmup mem_queue->dequeue(clockEdge()); memoryPort.sendFunctional(pkt); @@ -382,7 +385,10 @@ AbstractController::recvTimingResp(PacketPtr pkt) return false; } - std::shared_ptr msg = std::make_shared(clockEdge()); + int blk_size = m_ruby_system->getBlockSizeBytes(); + + std::shared_ptr msg = + std::make_shared(clockEdge(), blk_size, m_ruby_system); (*msg).m_addr = pkt->getAddr(); (*msg).m_Sender = m_machineID; @@ -396,7 +402,7 @@ AbstractController::recvTimingResp(PacketPtr pkt) // Copy data from the packet (*msg).m_DataBlk.setData(pkt->getPtr(), 0, - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); } else if (pkt->isWrite()) { (*msg).m_Type = MemoryRequestType_MEMORY_WB; (*msg).m_MessageSize = MessageSizeType_Writeback_Control; @@ -404,7 +410,8 @@ AbstractController::recvTimingResp(PacketPtr pkt) panic("Incorrect packet type received from memory controller!"); } - memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); delete pkt; return true; } @@ -471,6 +478,45 @@ AbstractController::sendRetryRespToMem() { } } +Addr +AbstractController::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +AbstractController::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +AbstractController::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +NetDest +AbstractController::broadcast(MachineType type) +{ + assert(m_ruby_system != nullptr); + NodeID type_count = m_ruby_system->MachineType_base_count(type); + + NetDest dest; + for (NodeID i = 0; i < type_count; i++) { + MachineID mach = {type, i}; + dest.add(mach); + } + return dest; +} + +int +AbstractController::machineCount(MachineType machType) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(machType); +} + bool AbstractController::MemoryPort::recvTimingResp(PacketPtr pkt) { diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index ce6a6972af..79f67073a6 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -72,6 +72,7 @@ namespace ruby class Network; class GPUCoalescer; class DMASequencer; +class RubySystem; // used to communicate that an in_port peeked the wrong message type class RejectException: public std::exception @@ -229,6 +230,11 @@ class AbstractController : public ClockedObject, public Consumer /** List of upstream destinations (towards the CPU) */ const NetDest& allUpstreamDest() const { return upstreamDestinations; } + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: //! Profiles original cache requests including PUTs void profileRequest(const std::string &request); @@ -452,6 +458,13 @@ class AbstractController : public ClockedObject, public Consumer {} }; + RubySystem *m_ruby_system = nullptr; + + // Formerly in RubySlicc_ComponentMapping.hh. Moved here to access + // RubySystem pointer. + NetDest broadcast(MachineType type); + int machineCount(MachineType machType); + private: /** The address range to which the controller responds on the CPU side. */ const AddrRangeList addrRanges; diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh index 5c824c4a38..31fb5e8e92 100644 --- a/src/mem/ruby/slicc_interface/Message.hh +++ b/src/mem/ruby/slicc_interface/Message.hh @@ -62,10 +62,12 @@ typedef std::shared_ptr MsgPtr; class Message { public: - Message(Tick curTime) - : m_time(curTime), + Message(Tick curTime, int block_size, const RubySystem *rs) + : m_block_size(block_size), + m_time(curTime), m_LastEnqueueTime(curTime), - m_DelayedTicks(0), m_msg_counter(0) + m_DelayedTicks(0), m_msg_counter(0), + p_ruby_system(rs) { } Message(const Message &other) = default; @@ -121,6 +123,9 @@ class Message int getVnet() const { return vnet; } void setVnet(int net) { vnet = net; } + protected: + int m_block_size = 0; + private: Tick m_time; Tick m_LastEnqueueTime; // my last enqueue time @@ -130,6 +135,9 @@ class Message // Variables for required network traversal int incoming_link; int vnet; + + // Needed to call MacheinType_base_count/level + const RubySystem *p_ruby_system = nullptr; }; inline bool diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index a258a18f9a..58eae229be 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -86,11 +86,12 @@ class RubyRequest : public Message bool m_isSLCSet; bool m_isSecure; - RubyRequest(Tick curTime, uint64_t _paddr, int _len, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, ContextID _proc_id = 100, ContextID _core_id = 99) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -99,13 +100,16 @@ class RubyRequest : public Message m_Prefetch(_pb), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), m_tlbiTransactionUid(0), m_isSecure(m_pkt ? m_pkt->req->isSecure() : false) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -116,10 +120,10 @@ class RubyRequest : public Message } /** RubyRequest for memory management commands */ - RubyRequest(Tick curTime, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, ContextID _proc_id, ContextID _core_id) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(0), m_Type(_type), m_ProgramCounter(_pc), @@ -128,6 +132,8 @@ class RubyRequest : public Message m_Prefetch(PrefetchBit_No), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), @@ -144,14 +150,14 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -170,7 +176,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -180,15 +187,15 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, std::vector< std::pair > _atomicOps, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -207,7 +214,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -218,7 +226,12 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime) : Message(curTime) {} + RubyRequest(Tick curTime, int block_size, RubySystem *rs) + : Message(curTime, block_size, rs), + m_writeMask(block_size), + m_WTData(block_size) + { + } MsgPtr clone() const { return std::shared_ptr(new RubyRequest(*this)); } diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh index 9a433d1cee..1195089fc3 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -41,17 +41,6 @@ namespace gem5 namespace ruby { -inline NetDest -broadcast(MachineType type) -{ - NetDest dest; - for (NodeID i = 0; i < MachineType_base_count(type); i++) { - MachineID mach = {type, i}; - dest.add(mach); - } - return dest; -} - inline MachineID mapAddressToRange(Addr addr, MachineType type, int low_bit, int num_bits, int cluster_id = 0) @@ -77,12 +66,6 @@ machineIDToMachineType(MachineID machID) return machID.type; } -inline int -machineCount(MachineType machType) -{ - return MachineType_base_count(machType); -} - inline MachineID createMachineID(MachineType type, NodeID id) { diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh index 8df56c7013..f4a49463a8 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh @@ -233,8 +233,9 @@ addressOffset(Addr addr, Addr base) inline bool testAndRead(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -259,8 +260,10 @@ testAndRead(Addr addr, DataBlock& blk, Packet *pkt) inline bool testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + assert(blk.getBlockSize() == mask.getBlockSize()); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -288,8 +291,9 @@ testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) inline bool testAndWrite(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { const uint8_t *data = pkt->getConstPtr(); diff --git a/src/mem/ruby/structures/ALUFreeListArray.cc b/src/mem/ruby/structures/ALUFreeListArray.cc index 87b5cbfbd2..3e25e5b599 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.cc +++ b/src/mem/ruby/structures/ALUFreeListArray.cc @@ -57,10 +57,10 @@ namespace ruby * - The same line has been accessed in the past accessLatency ticks */ -ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Tick access_latency) +ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks) { this->numALUs = num_ALUs; - this->accessLatency = access_latency; + this->accessClocks = access_clocks; } bool ALUFreeListArray::tryAccess(Addr addr) @@ -85,7 +85,7 @@ bool ALUFreeListArray::tryAccess(Addr addr) } // Block access if the line is already being used - if (record.lineAddr == makeLineAddress(addr)) { + if (record.lineAddr == makeLineAddress(addr, m_block_size_bits)) { return false; } } @@ -99,7 +99,9 @@ void ALUFreeListArray::reserve(Addr addr) // the access is valid // Add record to queue - accessQueue.push_front(AccessRecord(makeLineAddress(addr), curTick())); + accessQueue.push_front( + AccessRecord(makeLineAddress(addr, m_block_size_bits), curTick()) + ); } } // namespace ruby diff --git a/src/mem/ruby/structures/ALUFreeListArray.hh b/src/mem/ruby/structures/ALUFreeListArray.hh index bed1b00b5c..5c4fdd95f9 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.hh +++ b/src/mem/ruby/structures/ALUFreeListArray.hh @@ -32,6 +32,7 @@ #include +#include "base/intmath.hh" #include "mem/ruby/common/TypeDefines.hh" #include "sim/cur_tick.hh" @@ -45,7 +46,8 @@ class ALUFreeListArray { private: unsigned int numALUs; - Tick accessLatency; + Cycles accessClocks; + Tick accessLatency = 0; class AccessRecord { @@ -62,14 +64,33 @@ class ALUFreeListArray // Queue of accesses from past accessLatency cycles std::deque accessQueue; + int m_block_size_bits = 0; + public: - ALUFreeListArray(unsigned int num_ALUs, Tick access_latency); + ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks); bool tryAccess(Addr addr); void reserve(Addr addr); - Tick getLatency() const { return accessLatency; } + Tick + getLatency() const + { + assert(accessLatency > 0); + return accessLatency; + } + + void + setClockPeriod(Tick clockPeriod) + { + accessLatency = accessClocks * clockPeriod; + } + + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } }; } // namespace ruby diff --git a/src/mem/ruby/structures/BankedArray.cc b/src/mem/ruby/structures/BankedArray.cc index 0f01d5c396..2c2202dec5 100644 --- a/src/mem/ruby/structures/BankedArray.cc +++ b/src/mem/ruby/structures/BankedArray.cc @@ -42,8 +42,7 @@ namespace ruby { BankedArray::BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs) - : m_ruby_system(rs) + unsigned int startIndexBit) { this->banks = banks; this->accessLatency = accessLatency; @@ -78,6 +77,8 @@ BankedArray::reserve(int64_t idx) if (accessLatency == 0) return; + assert(clockPeriod > 0); + unsigned int bank = mapIndexToBank(idx); assert(bank < banks); @@ -95,7 +96,7 @@ BankedArray::reserve(int64_t idx) busyBanks[bank].idx = idx; busyBanks[bank].startAccess = curTick(); busyBanks[bank].endAccess = curTick() + - (accessLatency-1) * m_ruby_system->clockPeriod(); + (accessLatency-1) * clockPeriod; } unsigned int diff --git a/src/mem/ruby/structures/BankedArray.hh b/src/mem/ruby/structures/BankedArray.hh index c757759296..ecc984a617 100644 --- a/src/mem/ruby/structures/BankedArray.hh +++ b/src/mem/ruby/structures/BankedArray.hh @@ -48,6 +48,7 @@ class BankedArray private: unsigned int banks; Cycles accessLatency; + Tick clockPeriod = 0; unsigned int bankBits; unsigned int startIndexBit; RubySystem *m_ruby_system; @@ -69,7 +70,7 @@ class BankedArray public: BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs); + unsigned int startIndexBit); // Note: We try the access based on the cache index, not the address // This is so we don't get aliasing on blocks being replaced @@ -78,6 +79,8 @@ class BankedArray void reserve(int64_t idx); Cycles getLatency() const { return accessLatency; } + + void setClockPeriod(Tick _clockPeriod) { clockPeriod = _clockPeriod; } }; } // namespace ruby diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 90d67fb29b..6bc35bac7d 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -69,12 +69,9 @@ operator<<(std::ostream& out, const CacheMemory& obj) CacheMemory::CacheMemory(const Params &p) : SimObject(p), - dataArray(p.dataArrayBanks, p.dataAccessLatency, - p.start_index_bit, p.ruby_system), - tagArray(p.tagArrayBanks, p.tagAccessLatency, - p.start_index_bit, p.ruby_system), - atomicALUArray(p.atomicALUs, p.atomicLatency * - p.ruby_system->clockPeriod()), + dataArray(p.dataArrayBanks, p.dataAccessLatency, p.start_index_bit), + tagArray(p.tagArrayBanks, p.tagAccessLatency, p.start_index_bit), + atomicALUArray(p.atomicALUs, p.atomicLatency), cacheMemoryStats(this) { m_cache_size = p.size; @@ -88,12 +85,25 @@ CacheMemory::CacheMemory(const Params &p) m_replacementPolicy_ptr) ? true : false; } +void +CacheMemory::setRubySystem(RubySystem* rs) +{ + dataArray.setClockPeriod(rs->clockPeriod()); + tagArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setBlockSize(rs->getBlockSizeBytes()); + + if (m_block_size == 0) { + m_block_size = rs->getBlockSizeBytes(); + } + + m_ruby_system = rs; +} + void CacheMemory::init() { - if (m_block_size == 0) { - m_block_size = RubySystem::getBlockSizeBytes(); - } + assert(m_block_size != 0); m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size; assert(m_cache_num_sets > 1); m_cache_num_set_bits = floorLog2(m_cache_num_sets); @@ -286,6 +296,9 @@ CacheMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(cacheAvail(address)); DPRINTF(RubyCache, "allocating address: %#x\n", address); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); + // Find the first open slot int64_t cacheSet = addressToCacheSet(address); std::vector &set = m_cache[cacheSet]; diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index de7c327f63..912ae22d1f 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -154,6 +154,8 @@ class CacheMemory : public SimObject void htmAbortTransaction(); void htmCommitTransaction(); + void setRubySystem(RubySystem* rs); + public: int getCacheSize() const { return m_cache_size; } int getCacheAssoc() const { return m_cache_assoc; } @@ -213,6 +215,14 @@ class CacheMemory : public SimObject */ bool m_use_occupancy; + RubySystem *m_ruby_system = nullptr; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, floorLog2(m_block_size)); + } + private: struct CacheMemoryStats : public statistics::Group { diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc index 620254b82c..7469f72451 100644 --- a/src/mem/ruby/structures/DirectoryMemory.cc +++ b/src/mem/ruby/structures/DirectoryMemory.cc @@ -64,12 +64,14 @@ DirectoryMemory::DirectoryMemory(const Params &p) } m_size_bits = floorLog2(m_size_bytes); m_num_entries = 0; + m_block_size = p.block_size; + m_ruby_system = p.ruby_system; } void DirectoryMemory::init() { - m_num_entries = m_size_bytes / RubySystem::getBlockSizeBytes(); + m_num_entries = m_size_bytes / m_block_size; m_entries = new AbstractCacheEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) m_entries[i] = NULL; @@ -108,7 +110,7 @@ DirectoryMemory::mapAddressToLocalIdx(Addr address) } ret += r.size(); } - return ret >> RubySystem::getBlockSizeBits(); + return ret >> (floorLog2(m_block_size)); } AbstractCacheEntry* @@ -133,6 +135,8 @@ DirectoryMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(idx < m_num_entries); assert(m_entries[idx] == NULL); entry->changePermission(AccessPermission_Read_Only); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); m_entries[idx] = entry; return entry; diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh index 8a4532864d..6e77e2a4ca 100644 --- a/src/mem/ruby/structures/DirectoryMemory.hh +++ b/src/mem/ruby/structures/DirectoryMemory.hh @@ -104,6 +104,9 @@ class DirectoryMemory : public SimObject uint64_t m_size_bytes; uint64_t m_size_bits; uint64_t m_num_entries; + uint32_t m_block_size; + + RubySystem *m_ruby_system = nullptr; /** * The address range for which the directory responds. Normally diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py index 85f05367cf..202617bceb 100644 --- a/src/mem/ruby/structures/DirectoryMemory.py +++ b/src/mem/ruby/structures/DirectoryMemory.py @@ -49,3 +49,7 @@ class RubyDirectoryMemory(SimObject): addr_ranges = VectorParam.AddrRange( Parent.addr_ranges, "Address range this directory responds to" ) + block_size = Param.UInt32( + "Size of a block in bytes. Usually same as cache line size." + ) + ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh index 664d10f202..0966ca80d2 100644 --- a/src/mem/ruby/structures/PerfectCacheMemory.hh +++ b/src/mem/ruby/structures/PerfectCacheMemory.hh @@ -74,6 +74,8 @@ class PerfectCacheMemory public: PerfectCacheMemory(); + void setBlockSize(const int block_size) { m_block_size = block_size; } + // tests to see if an address is present in the cache bool isTagPresent(Addr address) const; @@ -108,6 +110,8 @@ class PerfectCacheMemory // Data Members (m_prefix) std::unordered_map > m_map; + + int m_block_size = 0; }; template @@ -130,7 +134,7 @@ template inline bool PerfectCacheMemory::isTagPresent(Addr address) const { - return m_map.count(makeLineAddress(address)) > 0; + return m_map.count(makeLineAddress(address, floorLog2(m_block_size))) > 0; } template @@ -149,7 +153,8 @@ PerfectCacheMemory::allocate(Addr address) PerfectCacheLineState line_state; line_state.m_permission = AccessPermission_Invalid; line_state.m_entry = ENTRY(); - m_map[makeLineAddress(address)] = line_state; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + m_map.emplace(line_addr, line_state); } // deallocate entry @@ -157,7 +162,8 @@ template inline void PerfectCacheMemory::deallocate(Addr address) { - [[maybe_unused]] auto num_erased = m_map.erase(makeLineAddress(address)); + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + [[maybe_unused]] auto num_erased = m_map.erase(line_addr); assert(num_erased == 1); } @@ -175,7 +181,8 @@ template inline ENTRY* PerfectCacheMemory::lookup(Addr address) { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } // looks an address up in the cache @@ -183,14 +190,16 @@ template inline const ENTRY* PerfectCacheMemory::lookup(Addr address) const { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } template inline AccessPermission PerfectCacheMemory::getPermission(Addr address) const { - return m_map[makeLineAddress(address)].m_permission; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return m_map[line_addr].m_permission; } template @@ -198,8 +207,8 @@ inline void PerfectCacheMemory::changePermission(Addr address, AccessPermission new_perm) { - Addr line_address = makeLineAddress(address); - PerfectCacheLineState& line_state = m_map[line_address]; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + PerfectCacheLineState& line_state = m_map[line_addr]; line_state.m_permission = new_perm; } diff --git a/src/mem/ruby/structures/PersistentTable.hh b/src/mem/ruby/structures/PersistentTable.hh index 5382269273..1162e1dda1 100644 --- a/src/mem/ruby/structures/PersistentTable.hh +++ b/src/mem/ruby/structures/PersistentTable.hh @@ -63,6 +63,12 @@ class PersistentTable // Destructor ~PersistentTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + // Public Methods void persistentRequestLock(Addr address, MachineID locker, AccessType type); @@ -82,9 +88,17 @@ class PersistentTable PersistentTable(const PersistentTable& obj); PersistentTable& operator=(const PersistentTable& obj); + int m_block_size_bits = 0; + // Data Members (m_prefix) typedef std::unordered_map AddressMap; AddressMap m_map; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, m_block_size_bits); + } }; inline std::ostream& diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py index 2f457f5c4a..4b1023fc61 100644 --- a/src/mem/ruby/structures/RubyCache.py +++ b/src/mem/ruby/structures/RubyCache.py @@ -54,4 +54,3 @@ class RubyCache(SimObject): dataAccessLatency = Param.Cycles(1, "cycles for a data array access") tagAccessLatency = Param.Cycles(1, "cycles for a tag array access") resourceStalls = Param.Bool(False, "stall if there is a resource failure") - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc index e45eff2c2f..bffcfe2327 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.cc +++ b/src/mem/ruby/structures/RubyPrefetcher.cc @@ -56,13 +56,15 @@ namespace ruby RubyPrefetcher::RubyPrefetcher(const Params &p) : SimObject(p), m_num_streams(p.num_streams), - m_array(p.num_streams), m_train_misses(p.train_misses), + m_array(p.num_streams, p.block_size), m_train_misses(p.train_misses), m_num_startup_pfs(p.num_startup_pfs), unitFilter(p.unit_filter), negativeFilter(p.unit_filter), nonUnitFilter(p.nonunit_filter), m_prefetch_cross_pages(p.cross_page), pageShift(p.page_shift), + m_block_size_bits(floorLog2(p.block_size)), + m_block_size_bytes(p.block_size), rubyPrefetcherStats(this) { assert(m_num_streams > 0); @@ -90,7 +92,7 @@ void RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) { DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); - Addr line_addr = makeLineAddress(address); + Addr line_addr = makeLineAddress(address, m_block_size_bits); rubyPrefetcherStats.numMissObserved++; // check to see if we have already issued a prefetch for this block @@ -214,7 +216,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // initialize the stream prefetcher PrefetchEntry *mystream = &(m_array[index]); - mystream->m_address = makeLineAddress(address); + mystream->m_address = makeLineAddress(address, m_block_size_bits); mystream->m_stride = stride; mystream->m_use_time = m_controller->curCycle(); mystream->m_is_valid = true; @@ -222,7 +224,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // create a number of initial prefetches for this stream Addr page_addr = pageAddress(mystream->m_address); - Addr line_addr = makeLineAddress(mystream->m_address); + Addr line_addr = makeLineAddress(mystream->m_address, m_block_size_bits); // insert a number of prefetches into the prefetch table for (int k = 0; k < m_num_startup_pfs; k++) { @@ -312,8 +314,7 @@ RubyPrefetcher::accessNonunitFilter(Addr line_addr, // This stride HAS to be the multiplicative constant of // dataBlockBytes (bc makeNextStrideAddress is // calculated based on this multiplicative constant!) - const int stride = entry.stride / - RubySystem::getBlockSizeBytes(); + const int stride = entry.stride / m_block_size_bytes; // clear this filter entry entry.clear(); diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh index 51e1b3c480..5627410713 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.hh +++ b/src/mem/ruby/structures/RubyPrefetcher.hh @@ -68,10 +68,10 @@ class PrefetchEntry { public: /// constructor - PrefetchEntry() + PrefetchEntry(int block_size) { // default: 1 cache-line stride - m_stride = (1 << RubySystem::getBlockSizeBits()); + m_stride = (1 << floorLog2(block_size)); m_use_time = Cycles(0); m_is_valid = false; } @@ -239,6 +239,16 @@ class RubyPrefetcher : public SimObject const unsigned pageShift; + int m_block_size_bits = 0; + int m_block_size_bytes = 0; + + Addr + makeNextStrideAddress(Addr addr, int stride) const + { + return ruby::makeNextStrideAddress(addr, stride, + m_block_size_bytes); + } + struct RubyPrefetcherStats : public statistics::Group { RubyPrefetcherStats(statistics::Group *parent); diff --git a/src/mem/ruby/structures/RubyPrefetcher.py b/src/mem/ruby/structures/RubyPrefetcher.py index d4189ae7d5..155b7c314d 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.py +++ b/src/mem/ruby/structures/RubyPrefetcher.py @@ -62,6 +62,9 @@ class RubyPrefetcher(SimObject): page_shift = Param.UInt32( 12, "Number of bits to mask to get a page number" ) + block_size = Param.UInt32( + "Size of block to prefetch, usually cache line size" + ) class Prefetcher(RubyPrefetcher): diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.cc b/src/mem/ruby/structures/RubyPrefetcherProxy.cc index 2a29fbc88e..a6fed8258c 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.cc +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.cc @@ -66,7 +66,7 @@ RubyPrefetcherProxy::RubyPrefetcherProxy(AbstractController* _parent, prefetcher->setParentInfo( cacheCntrl->params().system, cacheCntrl->getProbeManager(), - RubySystem::getBlockSizeBytes()); + cacheCntrl->m_ruby_system->getBlockSizeBytes()); } } @@ -112,7 +112,7 @@ RubyPrefetcherProxy::issuePrefetch() if (pkt) { DPRINTF(HWPrefetch, "Next prefetch ready %s\n", pkt->print()); - unsigned blk_size = RubySystem::getBlockSizeBytes(); + unsigned blk_size = cacheCntrl->m_ruby_system->getBlockSizeBytes(); Addr line_addr = pkt->getBlockAddr(blk_size); if (issuedPfPkts.count(line_addr) == 0) { @@ -126,6 +126,8 @@ RubyPrefetcherProxy::issuePrefetch() std::shared_ptr msg = std::make_shared(cacheCntrl->clockEdge(), + blk_size, + cacheCntrl->m_ruby_system, pkt->getAddr(), blk_size, 0, // pc @@ -136,7 +138,10 @@ RubyPrefetcherProxy::issuePrefetch() // enqueue request into prefetch queue to the cache pfQueue->enqueue(msg, cacheCntrl->clockEdge(), - cacheCntrl->cyclesToTicks(Cycles(1))); + cacheCntrl->cyclesToTicks(Cycles(1)), + cacheCntrl->m_ruby_system->getRandomization(), + cacheCntrl->m_ruby_system->getWarmupEnabled() + ); // track all pending PF requests issuedPfPkts[line_addr] = pkt; @@ -230,5 +235,19 @@ RubyPrefetcherProxy::regProbePoints() cacheCntrl->getProbeManager(), "Data Update"); } +Addr +RubyPrefetcherProxy::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPrefetcherProxy::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.hh b/src/mem/ruby/structures/RubyPrefetcherProxy.hh index 34c40154b6..e7c044edf8 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.hh +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.hh @@ -142,6 +142,9 @@ class RubyPrefetcherProxy : public CacheAccessor, public Named */ ProbePointArg *ppDataUpdate; + Addr makeLineAddress(Addr addr) const; + Addr getOffset(Addr addr) const; + public: /** Accessor functions */ diff --git a/src/mem/ruby/structures/TBETable.hh b/src/mem/ruby/structures/TBETable.hh index 9030d52d9f..72770ce42f 100644 --- a/src/mem/ruby/structures/TBETable.hh +++ b/src/mem/ruby/structures/TBETable.hh @@ -70,6 +70,8 @@ class TBETable return (m_number_of_TBEs - m_map.size()) >= n; } + void setBlockSize(const int block_size) { m_block_size = block_size; } + ENTRY *getNullEntry(); ENTRY *lookup(Addr address); @@ -85,7 +87,8 @@ class TBETable std::unordered_map m_map; private: - int m_number_of_TBEs; + int m_number_of_TBEs = 0; + int m_block_size = 0; }; template @@ -101,7 +104,7 @@ template inline bool TBETable::isPresent(Addr address) const { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, floorLog2(m_block_size))); assert(m_map.size() <= m_number_of_TBEs); return !!m_map.count(address); } @@ -112,7 +115,8 @@ TBETable::allocate(Addr address) { assert(!isPresent(address)); assert(m_map.size() < m_number_of_TBEs); - m_map[address] = ENTRY(); + assert(m_block_size > 0); + m_map.emplace(address, ENTRY(m_block_size)); } template diff --git a/src/mem/ruby/structures/TimerTable.cc b/src/mem/ruby/structures/TimerTable.cc index f8f24dbfc0..a9ce92252e 100644 --- a/src/mem/ruby/structures/TimerTable.cc +++ b/src/mem/ruby/structures/TimerTable.cc @@ -70,7 +70,7 @@ TimerTable::nextAddress() const void TimerTable::set(Addr address, Tick ready_time) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(!m_map.count(address)); m_map[address] = ready_time; @@ -87,7 +87,7 @@ TimerTable::set(Addr address, Tick ready_time) void TimerTable::unset(Addr address) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(m_map.count(address)); m_map.erase(address); diff --git a/src/mem/ruby/structures/TimerTable.hh b/src/mem/ruby/structures/TimerTable.hh index e676359fd4..92c485ab57 100644 --- a/src/mem/ruby/structures/TimerTable.hh +++ b/src/mem/ruby/structures/TimerTable.hh @@ -48,6 +48,12 @@ class TimerTable public: TimerTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + void setConsumer(Consumer* consumer_ptr) { @@ -88,6 +94,8 @@ class TimerTable //! Consumer to signal a wakeup() Consumer* m_consumer_ptr; + int m_block_size_bits = 0; + std::string m_name; }; diff --git a/src/mem/ruby/structures/WireBuffer.cc b/src/mem/ruby/structures/WireBuffer.cc index a839fe7cc7..3ebbe2a305 100644 --- a/src/mem/ruby/structures/WireBuffer.cc +++ b/src/mem/ruby/structures/WireBuffer.cc @@ -36,7 +36,6 @@ #include "base/cprintf.hh" #include "base/stl_helpers.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -74,7 +73,8 @@ WireBuffer::~WireBuffer() } void -WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) +WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool /*ruby_is_random*/, bool /*ruby_warmup*/) { m_msg_counter++; Tick arrival_time = current_time + delta; diff --git a/src/mem/ruby/structures/WireBuffer.hh b/src/mem/ruby/structures/WireBuffer.hh index b26043b09a..75dfc154c8 100644 --- a/src/mem/ruby/structures/WireBuffer.hh +++ b/src/mem/ruby/structures/WireBuffer.hh @@ -78,7 +78,10 @@ class WireBuffer : public SimObject void setDescription(const std::string& name) { m_description = name; }; std::string getDescription() { return m_description; }; - void enqueue(MsgPtr message, Tick current_time, Tick delta); + // ruby_is_random and ruby_warmup are not used, but this method signature + // must match that of MessageBuffer. + void enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random = false, bool ruby_warmup = false); void dequeue(Tick current_time); const Message* peek(); void recycle(Tick current_time, Tick recycle_latency); diff --git a/src/mem/ruby/structures/WireBuffer.py b/src/mem/ruby/structures/WireBuffer.py index ca67e7cb31..8cb2cfe4d6 100644 --- a/src/mem/ruby/structures/WireBuffer.py +++ b/src/mem/ruby/structures/WireBuffer.py @@ -35,5 +35,3 @@ class RubyWireBuffer(SimObject): type = "RubyWireBuffer" cxx_class = "gem5::ruby::WireBuffer" cxx_header = "mem/ruby/structures/WireBuffer.hh" - - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc index 3326856849..426c604cb0 100644 --- a/src/mem/ruby/system/CacheRecorder.cc +++ b/src/mem/ruby/system/CacheRecorder.cc @@ -49,31 +49,25 @@ TraceRecord::print(std::ostream& out) const << m_type << ", Time: " << m_time << "]"; } -CacheRecorder::CacheRecorder() - : m_uncompressed_trace(NULL), - m_uncompressed_trace_size(0), - m_block_size_bytes(RubySystem::getBlockSizeBytes()) -{ -} - CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes) + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes) : m_uncompressed_trace(uncompressed_trace), m_uncompressed_trace_size(uncompressed_trace_size), m_ruby_port_map(ruby_port_map), m_bytes_read(0), m_records_read(0), m_records_flushed(0), - m_block_size_bytes(block_size_bytes) + m_block_size_bytes(trace_block_size_bytes) { if (m_uncompressed_trace != NULL) { - if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) { + if (m_block_size_bytes < system_block_size_bytes) { // Block sizes larger than when the trace was recorded are not // supported, as we cannot reliably turn accesses to smaller blocks // into larger ones. panic("Recorded cache block size (%d) < current block size (%d) !!", - m_block_size_bytes, RubySystem::getBlockSizeBytes()); + m_block_size_bytes, system_block_size_bytes); } } } @@ -125,7 +119,7 @@ CacheRecorder::enqueueNextFetchRequest() DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord); for (int rec_bytes_read = 0; rec_bytes_read < m_block_size_bytes; - rec_bytes_read += RubySystem::getBlockSizeBytes()) { + rec_bytes_read += m_block_size_bytes) { RequestPtr req; MemCmd::Command requestType; @@ -133,19 +127,19 @@ CacheRecorder::enqueueNextFetchRequest() requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } else if (traceRecord->m_type == RubyRequestType_IFETCH) { requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), + m_block_size_bytes, Request::INST_FETCH, Request::funcRequestorId); } else { requestType = MemCmd::WriteReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh index 021da6a4da..982e8b0592 100644 --- a/src/mem/ruby/system/CacheRecorder.hh +++ b/src/mem/ruby/system/CacheRecorder.hh @@ -73,13 +73,15 @@ class TraceRecord class CacheRecorder { public: - CacheRecorder(); - ~CacheRecorder(); - + // Construction requires block size. + CacheRecorder() = delete; CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes); + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes); + ~CacheRecorder(); + void addRecord(int cntrl, Addr data_addr, Addr pc_addr, RubyRequestType type, Tick time, DataBlock& data); diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index aa3fc66814..cd9d62d12a 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -73,7 +73,7 @@ void DMASequencer::init() { RubyPort::init(); - m_data_block_mask = mask(RubySystem::getBlockSizeBits()); + m_data_block_mask = mask(m_ruby_system->getBlockSizeBits()); } RequestStatus @@ -110,8 +110,10 @@ DMASequencer::makeRequest(PacketPtr pkt) DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len); + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = paddr; msg->getLineAddress() = line_addr; @@ -145,8 +147,8 @@ DMASequencer::makeRequest(PacketPtr pkt) int offset = paddr & m_data_block_mask; - msg->getLen() = (offset + len) <= RubySystem::getBlockSizeBytes() ? - len : RubySystem::getBlockSizeBytes() - offset; + msg->getLen() = (offset + len) <= m_ruby_system->getBlockSizeBytes() ? + len : m_ruby_system->getBlockSizeBytes() - offset; if (write && (data != NULL)) { if (active_request.data != NULL) { @@ -157,7 +159,8 @@ DMASequencer::makeRequest(PacketPtr pkt) m_outstanding_count++; assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); return RequestStatus_Issued; @@ -183,8 +186,10 @@ DMASequencer::issueNext(const Addr& address) return; } + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = active_request.start_paddr + active_request.bytes_completed; @@ -196,9 +201,9 @@ DMASequencer::issueNext(const Addr& address) msg->getLen() = (active_request.len - - active_request.bytes_completed < RubySystem::getBlockSizeBytes() ? + active_request.bytes_completed < m_ruby_system->getBlockSizeBytes() ? active_request.len - active_request.bytes_completed : - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); if (active_request.write) { msg->getDataBlk(). @@ -207,7 +212,8 @@ DMASequencer::issueNext(const Addr& address) } assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); DPRINTF(RubyDma, "DMA request bytes issued %d, bytes completed %d, total len %d\n", diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 072c63efd7..4d66dc6c1b 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -142,8 +142,8 @@ UncoalescedTable::updateResources() // are accessed directly using the makeRequest() command // instead of accessing through the port. This makes // sending tokens through the port unnecessary - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!coalescer->getRubySystem()->getWarmupEnabled() && + !coalescer->getRubySystem()->getCooldownEnabled()) { if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) { DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num); @@ -177,7 +177,7 @@ UncoalescedTable::printRequestTable(std::stringstream& ss) ss << "Listing pending packets from " << instMap.size() << " instructions"; for (auto& inst : instMap) { - ss << "\tAddr: " << printAddress(inst.first) << " with " + ss << "\tAddr: " << coalescer->printAddress(inst.first) << " with " << inst.second.size() << " pending packets" << std::endl; } } @@ -590,7 +590,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, // When the Ruby system is cooldown phase, the requests come from // the cache recorder. These requests do not get coalesced and // do not return valid data. - if (RubySystem::getCooldownEnabled()) + if (m_ruby_system->getCooldownEnabled()) continue; if (pkt->getPtr()) { @@ -700,8 +700,8 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // When Ruby is in warmup or cooldown phase, the requests come from // the cache recorder. There is no dynamic instruction associated // with these requests either - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { if (!m_usingRubyTester) { num_packets = 0; for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { @@ -985,8 +985,8 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -1015,9 +1015,9 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 42efe41cb7..08412baad1 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -341,6 +341,8 @@ class GPUCoalescer : public RubyPort void insertKernel(int wavefront_id, PacketPtr pkt); + RubySystem *getRubySystem() { return m_ruby_system; } + GMTokenPort& getGMTokenPort() { return gmTokenPort; } statistics::Histogram& getOutstandReqHist() { return m_outstandReqHist; } diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 2630a6a27c..127f3c7802 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -326,6 +326,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) panic("Ruby supports atomic accesses only in noncaching mode\n"); } + RubySystem *rs = owner.m_ruby_system; + // Check for pio requests and directly send them to the dedicated // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { @@ -343,12 +345,11 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) return owner.ticksToCycles(req_ticks); } - assert(getOffset(pkt->getAddr()) + pkt->getSize() <= - RubySystem::getBlockSizeBytes()); + assert(owner.getOffset(pkt->getAddr()) + pkt->getSize() <= + rs->getBlockSizeBytes()); } // Find the machine type of memory controller interface - RubySystem *rs = owner.m_ruby_system; static int mem_interface_type = -1; if (mem_interface_type == -1) { if (rs->m_abstract_controls[MachineType_Directory].size() != 0) { @@ -404,7 +405,7 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt) } assert(pkt->getAddr() + pkt->getSize() <= - makeLineAddress(pkt->getAddr()) + RubySystem::getBlockSizeBytes()); + owner.makeLineAddress(pkt->getAddr()) + rs->getBlockSizeBytes()); if (access_backing_store) { // The attached physmem contains the official version of data. @@ -501,7 +502,7 @@ RubyPort::ruby_stale_translation_callback(Addr txnId) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - 0, RubySystem::getBlockSizeBytes(), Request::TLBI_EXT_SYNC, + 0, m_ruby_system->getBlockSizeBytes(), Request::TLBI_EXT_SYNC, Request::funcRequestorId); // Store the txnId in extraData instead of the address request->setExtraData(txnId); @@ -701,7 +702,7 @@ RubyPort::ruby_eviction_callback(Addr address) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - address, RubySystem::getBlockSizeBytes(), 0, + address, m_ruby_system->getBlockSizeBytes(), 0, Request::funcRequestorId); // Use a single packet to signal all snooping ports of the invalidation. @@ -739,5 +740,23 @@ RubyPort::functionalWrite(Packet *func_pkt) return num_written; } +Addr +RubyPort::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPort::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +RubyPort::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 66fe0a7686..39535930b3 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -181,6 +181,11 @@ class RubyPort : public ClockedObject virtual int functionalWrite(Packet *func_pkt); + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index 21062eac14..fd7b262cb1 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -66,15 +66,8 @@ namespace gem5 namespace ruby { -bool RubySystem::m_randomization; -uint32_t RubySystem::m_block_size_bytes; -uint32_t RubySystem::m_block_size_bits; -uint32_t RubySystem::m_memory_size_bits; -bool RubySystem::m_warmup_enabled = false; // To look forward to allowing multiple RubySystem instances, track the number // of RubySystems that need to be warmed up on checkpoint restore. -unsigned RubySystem::m_systems_to_warmup = 0; -bool RubySystem::m_cooldown_enabled = false; RubySystem::RubySystem(const Params &p) : ClockedObject(p), m_access_backing_store(p.access_backing_store), @@ -212,8 +205,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, // Create the CacheRecorder and record the cache trace m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, - ruby_port_map, - block_size_bytes); + ruby_port_map, block_size_bytes, + m_block_size_bytes); } void @@ -331,7 +324,7 @@ RubySystem::serialize(CheckpointOut &cp) const // Store the cache-block size, so we are able to restore on systems // with a different cache-block size. CacheRecorder depends on the // correct cache-block size upon unserializing. - uint64_t block_size_bytes = getBlockSizeBytes(); + uint64_t block_size_bytes = m_block_size_bytes; SERIALIZE_SCALAR(block_size_bytes); // Check that there's a valid trace to use. If not, then memory won't @@ -416,7 +409,6 @@ RubySystem::unserialize(CheckpointIn &cp) readCompressedTrace(cache_trace_file, uncompressed_trace, cache_trace_size); m_warmup_enabled = true; - m_systems_to_warmup++; // Create the cache recorder that will hang around until startup. makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes); @@ -467,10 +459,7 @@ RubySystem::startup() delete m_cache_recorder; m_cache_recorder = NULL; - m_systems_to_warmup--; - if (m_systems_to_warmup == 0) { - m_warmup_enabled = false; - } + m_warmup_enabled = false; // Restore eventq head eventq->replaceHead(eventq_head); @@ -509,7 +498,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; @@ -625,7 +614,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); DPRINTF(RubySystem, "Functional Read request for %#x\n", address); @@ -726,7 +715,7 @@ bool RubySystem::functionalWrite(PacketPtr pkt) { Addr addr(pkt->getAddr()); - Addr line_addr = makeLineAddress(addr); + Addr line_addr = makeLineAddress(addr, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; DPRINTF(RubySystem, "Functional Write request for %#x\n", addr); diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index e16d699204..7e18770230 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -68,12 +68,12 @@ class RubySystem : public ClockedObject ~RubySystem(); // config accessors - static int getRandomization() { return m_randomization; } - static uint32_t getBlockSizeBytes() { return m_block_size_bytes; } - static uint32_t getBlockSizeBits() { return m_block_size_bits; } - static uint32_t getMemorySizeBits() { return m_memory_size_bits; } - static bool getWarmupEnabled() { return m_warmup_enabled; } - static bool getCooldownEnabled() { return m_cooldown_enabled; } + int getRandomization() { return m_randomization; } + uint32_t getBlockSizeBytes() { return m_block_size_bytes; } + uint32_t getBlockSizeBits() { return m_block_size_bits; } + uint32_t getMemorySizeBits() { return m_memory_size_bits; } + bool getWarmupEnabled() { return m_warmup_enabled; } + bool getCooldownEnabled() { return m_cooldown_enabled; } memory::SimpleMemory *getPhysMem() { return m_phys_mem; } Cycles getStartCycle() { return m_start_cycle; } @@ -134,14 +134,13 @@ class RubySystem : public ClockedObject void processRubyEvent(); private: // configuration parameters - static bool m_randomization; - static uint32_t m_block_size_bytes; - static uint32_t m_block_size_bits; - static uint32_t m_memory_size_bits; + bool m_randomization; + uint32_t m_block_size_bytes; + uint32_t m_block_size_bits; + uint32_t m_memory_size_bits; - static bool m_warmup_enabled; - static unsigned m_systems_to_warmup; - static bool m_cooldown_enabled; + bool m_warmup_enabled = false; + bool m_cooldown_enabled = false; memory::SimpleMemory *m_phys_mem; const bool m_access_backing_store; @@ -158,6 +157,11 @@ class RubySystem : public ClockedObject Profiler* m_profiler; CacheRecorder* m_cache_recorder; std::vector > m_abstract_controls; + std::map m_num_controllers; + + // These are auto-generated by SLICC based on the built protocol. + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; } // namespace ruby diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 4b0c6a239c..e2f49f5dff 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -73,6 +73,8 @@ Sequencer::Sequencer(const Params &p) { m_outstanding_count = 0; + m_ruby_system = p.ruby_system; + m_dataCache_ptr = p.dcache; m_max_outstanding_requests = p.max_outstanding_requests; m_deadlock_threshold = p.deadlock_threshold; @@ -726,7 +728,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, printAddress(request_address)); // update the data unless it is a non-data-carrying flush - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { data.setData(pkt); } else if (!pkt->isFlush()) { if ((type == RubyRequestType_LD) || @@ -782,11 +784,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { assert(pkt->req); delete pkt; rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { delete pkt; rs->m_cache_recorder->enqueueNextFlushRequest(); } else { @@ -852,8 +854,8 @@ Sequencer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -873,9 +875,9 @@ Sequencer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); @@ -910,14 +912,16 @@ Sequencer::invL1() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(RubySequencer, @@ -1080,11 +1084,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) pc = pkt->req->getPC(); } + int blk_size = m_ruby_system->getBlockSizeBytes(); + // check if the packet has data as for example prefetch and flush // requests do not std::shared_ptr msg; if (pkt->req->isMemMgmt()) { - msg = std::make_shared(clockEdge(), + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, pc, secondary_type, RubyAccessMode_Supervisor, pkt, proc_id, core_id); @@ -1111,8 +1118,10 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) msg->m_tlbiTransactionUid); } } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, secondary_type, + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, + pkt->getAddr(), pkt->getSize(), + pc, secondary_type, RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, core_id); @@ -1147,7 +1156,9 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) assert(latency > 0); assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } template @@ -1194,7 +1205,7 @@ Sequencer::incrementUnaddressedTransactionCnt() // Limit m_unaddressedTransactionCnt to 32 bits, // top 32 bits should always be zeroed out uint64_t aligned_txid = \ - m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits(); + m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits(); if (aligned_txid > 0xFFFFFFFFull) { m_unaddressedTransactionCnt = 0; @@ -1206,7 +1217,7 @@ Sequencer::getCurrentUnaddressedTransactionID() const { return ( uint64_t(m_version & 0xFFFFFFFF) << 32) | - (m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits() + (m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits() ); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 1f60d2638f..ee16d2fe2e 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -254,6 +254,8 @@ class Sequencer : public RubyPort RubyRequestType primary_type, RubyRequestType secondary_type); + RubySystem *m_ruby_system; + private: int m_max_outstanding_requests; diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 3f570fb952..0994bb4afe 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -83,7 +83,7 @@ class RubyPort(ClockedObject): using_ruby_tester = Param.Bool(False, "") no_retry_on_stall = Param.Bool(False, "") - ruby_system = Param.RubySystem(Parent.any, "") + ruby_system = Param.RubySystem("Parent RubySystem object") system = Param.System(Parent.any, "system object") support_data_reqs = Param.Bool(True, "data cache requests supported") support_inst_reqs = Param.Bool(True, "inst cache requests supported") diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc index 47ceced3a7..67dd88fb2e 100644 --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -135,9 +135,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) // Creating WriteMask that records written bytes // and atomic operations. This enables partial writes // and partial reads of those writes - DataBlock dataBlock; + uint32_t blockSize = m_ruby_system->getBlockSizeBytes(); + DataBlock dataBlock(blockSize); dataBlock.clear(); - uint32_t blockSize = RubySystem::getBlockSizeBytes(); std::vector accessMask(blockSize,false); std::vector< std::pair > atomicOps; uint32_t tableSize = crequest->getPackets().size(); @@ -159,15 +159,17 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) } std::shared_ptr msg; if (pkt->isAtomicOp()) { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, dataBlock, atomicOps, crequest->getSeqNum()); } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, @@ -195,7 +197,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(crequest->getRubyType())); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } void @@ -241,7 +245,7 @@ VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum) std::remove_if( m_writeCompletePktMap[key].begin(), m_writeCompletePktMap[key].end(), - [addr](PacketPtr writeCompletePkt) -> bool { + [this,addr](PacketPtr writeCompletePkt) -> bool { if (makeLineAddress(writeCompletePkt->getAddr()) == addr) { RubyPort::SenderState *ss = safe_cast @@ -296,14 +300,15 @@ VIPERCoalescer::invTCP() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(GPUCoalescer, @@ -343,16 +348,17 @@ VIPERCoalescer::invTCC(PacketPtr pkt) RubyRequestType request_type = RubyRequestType_InvL2; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Sending L2 invalidate to 0x%x\n", addr); assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_pending_invl2s[addr].push_back(pkt); } diff --git a/src/mem/ruby/system/VIPERSequencer.cc b/src/mem/ruby/system/VIPERSequencer.cc index ac840777d4..b8b806aa9c 100644 --- a/src/mem/ruby/system/VIPERSequencer.cc +++ b/src/mem/ruby/system/VIPERSequencer.cc @@ -81,8 +81,8 @@ VIPERSequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, // subBlock with the recieved data. The tester will later access // this state. assert(!m_usingRubyTester); - assert(!RubySystem::getWarmupEnabled()); - assert(!RubySystem::getCooldownEnabled()); + assert(!m_ruby_system->getWarmupEnabled()); + assert(!m_ruby_system->getCooldownEnabled()); ruby_hit_callback(pkt); testDrainComplete(); } diff --git a/src/mem/slicc/ast/CheckProbeStatementAST.py b/src/mem/slicc/ast/CheckProbeStatementAST.py index 10945cfc30..14f6f7e4fa 100644 --- a/src/mem/slicc/ast/CheckProbeStatementAST.py +++ b/src/mem/slicc/ast/CheckProbeStatementAST.py @@ -49,7 +49,8 @@ class CheckProbeStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count($address_code) == 1) && (m_block_map[$address_code] == &$in_port_code)) { - $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py index 14b2e48cd3..4bb446aee2 100644 --- a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py +++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py @@ -68,7 +68,8 @@ class DeferEnqueueingStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge()," + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py index c2d47af9ce..b026f6e7a9 100644 --- a/src/mem/slicc/ast/EnqueueStatementAST.py +++ b/src/mem/slicc/ast/EnqueueStatementAST.py @@ -76,7 +76,8 @@ class EnqueueStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge(), " + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements @@ -89,17 +90,21 @@ class EnqueueStatementAST(StatementAST): bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False) code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled(), " + "$bypass_strict_fifo_code);" ) else: code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) else: code( "(${{self.queue_name.var.code}}).enqueue(out_msg, " - "clockEdge(), cyclesToTicks(Cycles(1)));" + "clockEdge(), cyclesToTicks(Cycles(1))," + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) # End scope diff --git a/src/mem/slicc/ast/LocalVariableAST.py b/src/mem/slicc/ast/LocalVariableAST.py index b4ac8f446b..43ab110a67 100644 --- a/src/mem/slicc/ast/LocalVariableAST.py +++ b/src/mem/slicc/ast/LocalVariableAST.py @@ -73,6 +73,8 @@ class LocalVariableAST(StatementAST): ) ): code += f"{type.c_ident}* {ident}" + elif "implicit_ctor" in type: + code += f"{type.c_ident} {ident}({type['implicit_ctor']})" else: code += f"{type.c_ident} {ident}" return type diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py index 00edff4e7b..415f4ec465 100644 --- a/src/mem/slicc/ast/PeekStatementAST.py +++ b/src/mem/slicc/ast/PeekStatementAST.py @@ -93,7 +93,8 @@ class PeekStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count(in_msg_ptr->m_$address_field) == 1) && (m_block_map[in_msg_ptr->m_$address_field] != &$qcode)) { - $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index b523522501..6202d2d239 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -352,7 +352,6 @@ class $c_ident : public AbstractController public: typedef ${c_ident}Params Params; $c_ident(const Params &p); - static int getNumControllers(); void init(); MessageBuffer *getMandatoryQueue() const; @@ -449,9 +448,8 @@ int m_counters[${ident}_State_NUM][${ident}_Event_NUM]; int m_event_counters[${ident}_Event_NUM]; bool m_possible[${ident}_State_NUM][${ident}_Event_NUM]; -static std::vector eventVec; -static std::vector > transVec; -static int m_num_controllers; +std::vector eventVec; +std::vector > transVec; // Internal functions """ @@ -625,10 +623,6 @@ namespace gem5 namespace ruby { -int $c_ident::m_num_controllers = 0; -std::vector $c_ident::eventVec; -std::vector > $c_ident::transVec; - // for adding information to the protocol debug trace std::stringstream ${ident}_transitionComment; @@ -644,8 +638,9 @@ $c_ident::$c_ident(const Params &p) { m_machineID.type = MachineType_${ident}; m_machineID.num = m_version; - m_num_controllers++; + p.ruby_system->m_num_controllers[MachineType_${ident}]++; p.ruby_system->registerAbstractController(this); + m_ruby_system = p.ruby_system; m_in_ports = $num_in_ports; """ @@ -699,7 +694,7 @@ void $c_ident::initNetQueues() { MachineType machine_type = string_to_MachineType("${{self.ident}}"); - [[maybe_unused]] int base = MachineType_base_number(machine_type); + [[maybe_unused]] int base = m_ruby_system->MachineType_base_number(machine_type); """ ) @@ -776,6 +771,17 @@ $c_ident::init() comment = f"Type {vtype.ident} default" code('*$vid = ${{vtype["default"]}}; // $comment') + # For objects that require knowing the cache line size, + # set the value here. + if vtype.c_ident in ("TBETable"): + block_size_func = "m_ruby_system->getBlockSizeBytes()" + code(f"(*{vid}).setBlockSize({block_size_func});") + + for param in self.config_parameters: + if param.type_ast.type.ident == "CacheMemory": + assert param.pointer + code(f"m_{param.ident}_ptr->setRubySystem(m_ruby_system);") + # Set the prefetchers code() for prefetcher in self.prefetchers: @@ -942,7 +948,9 @@ $c_ident::regStats() "${c_ident}." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector(profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); @@ -961,7 +969,9 @@ $c_ident::regStats() "." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector( profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); transVec[state].push_back(t); @@ -1062,9 +1072,12 @@ $c_ident::regStats() void $c_ident::collateStats() { + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1080,7 +1093,7 @@ $c_ident::collateStats() for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1125,12 +1138,6 @@ $c_ident::getTransitionCount(${ident}_State state, return m_counters[state][event]; } -int -$c_ident::getNumControllers() -{ - return m_num_controllers; -} - MessageBuffer* $c_ident::getMandatoryQueue() const { @@ -1181,6 +1188,7 @@ void $c_ident::set_cache_entry(${{self.EntryType.c_ident}}*& m_cache_entry_ptr, AbstractCacheEntry* m_new_cache_entry) { m_cache_entry_ptr = (${{self.EntryType.c_ident}}*)m_new_cache_entry; + m_cache_entry_ptr->setRubySystem(m_ruby_system); } void @@ -1200,6 +1208,7 @@ void $c_ident::set_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr, ${{self.TBEType.c_ident}}* m_new_tbe) { m_tbe_ptr = m_new_tbe; + m_tbe_ptr->setRubySystem(m_ruby_system); } void diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py index 535a4165b3..53c8ff877e 100644 --- a/src/mem/slicc/symbols/Type.py +++ b/src/mem/slicc/symbols/Type.py @@ -119,6 +119,10 @@ class Type(Symbol): def isMessage(self): return "message" in self + @property + def isTBE(self): + return "tbe" in self + @property def isBuffer(self): return "buffer" in self @@ -250,18 +254,54 @@ namespace gem5 namespace ruby { +class RubySystem; + $klass ${{self.c_ident}}$parent { public: - ${{self.c_ident}} """, klass="class", ) if self.isMessage: - code("(Tick curTime) : %s(curTime) {" % self["interface"]) + code( + "${{self.c_ident}}(Tick curTime, int blockSize, RubySystem* rs) : %s(curTime, blockSize, rs)" + % self["interface"] + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") + + code("{") + elif self.isTBE: + code("${{self.c_ident}}(int block_size)") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(block_size)") + ctor_count += 1 + + code("{") else: - code("()\n\t\t{") + code("${{self.c_ident}}()") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(0)") + ctor_count += 1 + + code("{") code.indent() if not self.isGlobal: @@ -280,6 +320,12 @@ $klass ${{self.c_ident}}$parent code(" // default value of $tid") else: code("// m_$ident has no default") + + # These parts of Messages need RubySystem pointers. For things + # like Entry which only store NetDest, RubySystem is not needed. + if self.isMessage and dm.real_c_type == "NetDest": + code("// m_$ident requires RubySystem") + code("m_$ident.setRubySystem(rs);") code.dedent() code("}") @@ -300,21 +346,45 @@ $klass ${{self.c_ident}}$parent params = ", ".join(params) if self.isMessage: - params = "const Tick curTime, " + params + params = ( + "const Tick curTime, const int blockSize, const RubySystem *rs, " + + params + ) code("${{self.c_ident}}($params)") # Call superclass constructor if "interface" in self: if self.isMessage: - code(' : ${{self["interface"]}}(curTime)') + code( + ' : ${{self["interface"]}}(curTime, blockSize, rs)' + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") else: code(' : ${{self["interface"]}}()') + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(local_{dm.ident})") + else: + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(local_{dm.ident})") + ctor_count += 1 + code("{") code.indent() for dm in self.data_members.values(): - code("m_${{dm.ident}} = local_${{dm.ident}};") + if not dm.real_c_type in ("DataBlock", "WriteMask"): + code("m_${{dm.ident}} = local_${{dm.ident}};") code.dedent() code("}") @@ -342,6 +412,35 @@ clone() const ) if not self.isGlobal: + # Block size setter for fields that require block size + # Intentionally do not begin function name with "set" in case + # the user has a field named BlockSize which would conflict + # with the method generated below. + code("\nvoid initBlockSize(int block_size)") + code("{") + code("\tblock_size_bits = floorLog2(block_size);") + + needs_block_size = ( + "DataBlock", + "WriteMask", + "PersistentTable", + "TimerTable", + "PerfectCacheMemory", + ) + + for dm in self.data_members.values(): + if dm.real_c_type in needs_block_size: + code(f"\tm_{dm.ident}.setBlockSize(block_size);") + code("}\n") + + code("\nvoid setRubySystem(RubySystem *ruby_system)") + code("{") + for dm in self.data_members.values(): + if dm.real_c_type in ("NetDest"): + code(f"// m_{dm.ident} requires RubySystem") + code(f"\tm_{dm.ident}.setRubySystem(ruby_system);") + code("}\n") + # const Get methods for each field code("// Const accessors methods for each field") for dm in self.data_members.values(): @@ -393,6 +492,9 @@ set${{dm.ident}}(const ${{dm.real_c_type}}& local_${{dm.ident}}) code(" //private:") code.indent() + # block_size_bits for print methods + code("int block_size_bits = 0;") + # Data members for each field for dm in self.data_members.values(): if "abstract" not in dm: @@ -473,7 +575,7 @@ ${{self.c_ident}}::print(std::ostream& out) const if dm.type.c_ident == "Addr": code( """ -out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}) << " ";""" +out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}, block_size_bits) << " ";""" ) else: code('out << "${{dm.ident}} = " << m_${{dm.ident}} << " ";' "") @@ -846,7 +948,7 @@ ${{self.c_ident}}_from_base_level(int type) * \\return the base number of components for each machine */ int -${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) { int base = 0; switch(obj) { @@ -860,7 +962,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) # Check if there is a defined machine with this type if enum.primary: code( - " base += ${{enum.ident}}_Controller::getNumControllers();" + "\tbase += m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code(" base += 0;") @@ -882,7 +984,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) * \\return the total number of components for each machine */ int -${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) { switch(obj) { """ @@ -893,7 +995,7 @@ ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) code("case ${{self.c_ident}}_${{enum.ident}}:") if enum.primary: code( - "return ${{enum.ident}}_Controller::getNumControllers();" + "return m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code("return 0;") diff --git a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py index 29df2a969c..a469fead61 100644 --- a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py @@ -137,7 +137,9 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_core_cluster( @@ -167,12 +169,16 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): ) cluster.icache.sequencer = RubySequencer( - version=core_num, dcache=NULL, clk_domain=cluster.icache.clk_domain + version=core_num, + dcache=NULL, + clk_domain=cluster.icache.clk_domain, + ruby_system=self.ruby_system, ) cluster.dcache.sequencer = RubySequencer( version=core_num, dcache=cluster.dcache.cache, clk_domain=cluster.dcache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -223,7 +229,11 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): board.get_clock_domain(), ) version = len(board.get_processor().get_cores()) + i - ctrl.sequencer = RubySequencer(version=version, in_ports=port) + ctrl.sequencer = RubySequencer( + version=version, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.sequencer.dcache = NULL ctrl.ruby_system = self.ruby_system diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py index 6d203f978a..ef90ac79f6 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -80,7 +80,7 @@ class L1Cache(L0Cache_Controller): replacement_policy=LRURP(), ) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 32 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py index ff2b8e3dd9..7c473f8be9 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -75,7 +75,7 @@ class L2Cache(L1Cache_Controller): self.l2_select_num_bits = int(math.log(num_l3Caches, 2)) self.cluster_id = cluster_id self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.transitions_per_cycle = 32 # l1_request_latency, l1_response_latency, to_l2_latency are # ruby backend terminology. diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py index 7787644c9b..13625beea7 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py @@ -73,7 +73,7 @@ class L1Cache(AbstractL1Cache): ) self.l2_select_num_bits = int(math.log(num_l2Caches, 2)) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 4 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py index 3d1ae54104..79e40e9e01 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py @@ -41,7 +41,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py index 9aa0dc4a36..212c06c4c3 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py @@ -143,6 +143,7 @@ class CoreComplex(SubSystem, RubyNetworkComponent): version=core_id, dcache=cluster.l1_cache.Dcache, clk_domain=cluster.l1_cache.clk_domain, + ruby_system=self._ruby_system, ) if self._board.has_io_bus(): diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py index f7d4d63de1..83137ce15a 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py @@ -151,7 +151,9 @@ class OctopiCache( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_directory_controllers(self, board): @@ -228,7 +230,11 @@ class OctopiCache( if board.has_dma_ports(): self.ruby_system.dma_controllers = [ DMAController( - dma_sequencer=DMASequencer(version=i + 1, in_ports=port), + dma_sequencer=DMASequencer( + version=i + 1, + in_ports=port, + ruby_system=self.ruby_system, + ), ruby_system=self.ruby_system, ) for i, port in enumerate(board.get_dma_ports()) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py index 66fea95636..92e8860a24 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py @@ -118,6 +118,7 @@ class MESIThreeLevelCacheHierarchy( version=core_idx, dcache=l1_cache.Dcache, clk_domain=l1_cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -196,7 +197,12 @@ class MESIThreeLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController( - DMASequencer(version=i, in_ports=port), self.ruby_system + DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ), + self.ruby_system, ) self._dma_controllers.append(ctrl) @@ -223,5 +229,7 @@ class MESIThreeLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py index 004c2ff9d2..efe714c23c 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py @@ -109,7 +109,10 @@ class MESITwoLevelCacheHierarchy( ) cache.sequencer = RubySequencer( - version=i, dcache=cache.L1Dcache, clk_domain=cache.clk_domain + version=i, + dcache=cache.L1Dcache, + clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -163,7 +166,11 @@ class MESITwoLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController(self.ruby_system.network, cache_line_size) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) self._dma_controllers.append(ctrl) ctrl.ruby_system = self.ruby_system @@ -188,5 +195,7 @@ class MESITwoLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py index 478c793560..56e620ff0c 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py @@ -95,6 +95,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): version=i, dcache=cache.cacheMemory, clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -140,7 +141,11 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): ctrl = DMAController( self.ruby_system.network, board.get_cache_line_size() ) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.ruby_system = self.ruby_system ctrl.dma_sequencer.ruby_system = self.ruby_system @@ -167,5 +172,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)