diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py
index bfcd2c953d..eb7dd3acbd 100644
--- a/configs/example/ruby_gpu_random_test.py
+++ b/configs/example/ruby_gpu_random_test.py
@@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs):
             num_lanes=1,
             clk_domain=thread_clock,
             deadlock_threshold=tester_deadlock_threshold,
+            cache_line_size=system.cache_line_size,
         )
     )
     g_thread_idx += 1
@@ -393,6 +394,7 @@ for cu_idx in range(n_CUs):
                 num_lanes=args.wf_size,
                 clk_domain=thread_clock,
                 deadlock_threshold=tester_deadlock_threshold,
+                cache_line_size=system.cache_line_size,
             )
         )
         g_thread_idx += 1
diff --git a/configs/learning_gem5/part3/msi_caches.py b/configs/learning_gem5/part3/msi_caches.py
index c198662c5e..b719c7ab60 100644
--- a/configs/learning_gem5/part3/msi_caches.py
+++ b/configs/learning_gem5/part3/msi_caches.py
@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
                 # I/D cache is combined and grab from ctrl
                 dcache=self.controllers[i].cacheMemory,
                 clk_domain=self.controllers[i].clk_domain,
+                ruby_system=self,
             )
             for i in range(len(cpus))
         ]
@@ -191,7 +192,9 @@ class DirController(Directory_Controller):
         self.version = self.versionCount()
         self.addr_ranges = ranges
         self.ruby_system = ruby_system
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
         # Connect this directory to the memory side.
         self.memory = mem_ctrls[0].port
         self.connectQueues(ruby_system)
diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py b/configs/learning_gem5/part3/ruby_caches_MI_example.py
index baee120bb9..583041a674 100644
--- a/configs/learning_gem5/part3/ruby_caches_MI_example.py
+++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py
@@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem):
                 # I/D cache is combined and grab from ctrl
                 dcache=self.controllers[i].cacheMemory,
                 clk_domain=self.controllers[i].clk_domain,
+                ruby_system=self,
             )
             for i in range(len(cpus))
         ]
@@ -180,7 +181,9 @@ class DirController(Directory_Controller):
         self.version = self.versionCount()
         self.addr_ranges = ranges
         self.ruby_system = ruby_system
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
         # Connect this directory to the memory side.
         self.memory = mem_ctrls[0].port
         self.connectQueues(ruby_system)
diff --git a/configs/learning_gem5/part3/test_caches.py b/configs/learning_gem5/part3/test_caches.py
index 4e8e8febda..be2d46253e 100644
--- a/configs/learning_gem5/part3/test_caches.py
+++ b/configs/learning_gem5/part3/test_caches.py
@@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem):
                 # I/D cache is combined and grab from ctrl
                 dcache=self.controllers[i].cacheMemory,
                 clk_domain=self.clk_domain,
+                ruby_system=self,
             )
             for i in range(num_testers)
         ]
diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py
index ff4246a7e0..7d40862517 100644
--- a/configs/ruby/AMD_Base_Constructor.py
+++ b/configs/ruby/AMD_Base_Constructor.py
@@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase):
         self.L2cache = L2Cache()
         self.L2cache.create(options.l2_size, options.l2_assoc, options)
 
-        self.sequencer = RubySequencer()
+        self.sequencer = RubySequencer(ruby_system=ruby_system)
         self.sequencer.version = self.seqCount()
         self.sequencer.dcache = self.L1D0cache
         self.sequencer.ruby_system = ruby_system
         self.sequencer.coreid = 0
         self.sequencer.is_cpu_sequencer = True
 
-        self.sequencer1 = RubySequencer()
+        self.sequencer1 = RubySequencer(ruby_system=ruby_system)
         self.sequencer1.version = self.seqCount()
         self.sequencer1.dcache = self.L1D1cache
         self.sequencer1.ruby_system = ruby_system
diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py
index 313d1d514a..15108bb674 100644
--- a/configs/ruby/GPU_VIPER.py
+++ b/configs/ruby/GPU_VIPER.py
@@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
         self.L2cache = L2Cache()
         self.L2cache.create(options.l2_size, options.l2_assoc, options)
 
-        self.sequencer = RubySequencer()
+        self.sequencer = RubySequencer(ruby_system=ruby_system)
         self.sequencer.version = self.seqCount()
         self.sequencer.dcache = self.L1D0cache
         self.sequencer.ruby_system = ruby_system
         self.sequencer.coreid = 0
         self.sequencer.is_cpu_sequencer = True
 
-        self.sequencer1 = RubySequencer()
+        self.sequencer1 = RubySequencer(ruby_system=ruby_system)
         self.sequencer1.version = self.seqCount()
         self.sequencer1.dcache = self.L1D1cache
         self.sequencer1.ruby_system = ruby_system
@@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
         # TCP_Controller inherits this from RubyController
         self.mandatory_queue_latency = options.mandatory_queue_latency
 
-        self.coalescer = VIPERCoalescer()
+        self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
         self.coalescer.version = self.seqCount()
         self.coalescer.icache = self.L1cache
         self.coalescer.dcache = self.L1cache
@@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
             options.max_coalesces_per_cycle
         )
 
-        self.sequencer = RubySequencer()
+        self.sequencer = RubySequencer(ruby_system=ruby_system)
         self.sequencer.version = self.seqCount()
         self.sequencer.dcache = self.L1cache
         self.sequencer.ruby_system = ruby_system
@@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
         self.L1cache.create(options)
         self.issue_latency = 1
 
-        self.coalescer = VIPERCoalescer()
+        self.coalescer = VIPERCoalescer(ruby_system=ruby_system)
         self.coalescer.version = self.seqCount()
         self.coalescer.icache = self.L1cache
         self.coalescer.dcache = self.L1cache
@@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase):
         self.coalescer.support_inst_reqs = False
         self.coalescer.is_cpu_sequencer = False
 
-        self.sequencer = RubySequencer()
+        self.sequencer = RubySequencer(ruby_system=ruby_system)
         self.sequencer.version = self.seqCount()
         self.sequencer.dcache = self.L1cache
         self.sequencer.ruby_system = ruby_system
@@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
         self.response_latency = 30
 
         self.addr_ranges = dir_ranges
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
 
         self.L3CacheMemory = L3Cache()
         self.L3CacheMemory.create(options, ruby_system, system)
@@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network):
         dir_cntrl.addr_ranges = dram_intf.range
 
         # Append
-        exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i)
+        exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i)
         dir_cntrl_nodes.append(dir_cntrl)
         mem_ctrls.append(mem_ctrl)
 
diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py
index e0de4e0636..9054fefc01 100644
--- a/configs/ruby/MESI_Three_Level.py
+++ b/configs/ruby/MESI_Three_Level.py
@@ -148,6 +148,7 @@ def create_system(
                 train_misses=5,
                 num_startup_pfs=4,
                 cross_page=True,
+                block_size=options.cacheline_size,
             )
 
             l0_cntrl = L0Cache_Controller(
diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py
index e6c4e81f91..d7ad3bdc04 100644
--- a/configs/ruby/MESI_Three_Level_HTM.py
+++ b/configs/ruby/MESI_Three_Level_HTM.py
@@ -148,6 +148,7 @@ def create_system(
                 train_misses=5,
                 num_startup_pfs=4,
                 cross_page=True,
+                block_size=options.cacheline_size,
             )
 
             l0_cntrl = L0Cache_Controller(
diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py
index 500afbc199..6e1e0b97f3 100644
--- a/configs/ruby/MESI_Two_Level.py
+++ b/configs/ruby/MESI_Two_Level.py
@@ -94,7 +94,7 @@ def create_system(
             is_icache=False,
         )
 
-        prefetcher = RubyPrefetcher()
+        prefetcher = RubyPrefetcher(block_size=options.cacheline_size)
 
         clk_domain = cpus[i].clk_domain
 
diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py
index aeab96a85f..1095defc57 100644
--- a/configs/ruby/MOESI_AMD_Base.py
+++ b/configs/ruby/MOESI_AMD_Base.py
@@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase):
         self.L2cache = L2Cache()
         self.L2cache.create(options)
 
-        self.sequencer = RubySequencer()
+        self.sequencer = RubySequencer(ruby_system=ruby_system)
         self.sequencer.version = self.seqCount()
         self.sequencer.dcache = self.L1D0cache
         self.sequencer.ruby_system = ruby_system
         self.sequencer.coreid = 0
         self.sequencer.is_cpu_sequencer = True
 
-        self.sequencer1 = RubySequencer()
+        self.sequencer1 = RubySequencer(ruby_system=ruby_system)
         self.sequencer1.version = self.seqCount()
         self.sequencer1.dcache = self.L1D1cache
         self.sequencer1.ruby_system = ruby_system
@@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase):
         self.response_latency = 30
 
         self.addr_ranges = dir_ranges
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
 
         self.L3CacheMemory = L3Cache()
         self.L3CacheMemory.create(options, ruby_system, system)
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index e427a39de8..0a6671aa4b 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system):
     for i in range(options.num_dirs):
         dir_cntrl = Directory_Controller()
         dir_cntrl.version = i
-        dir_cntrl.directory = RubyDirectoryMemory()
+        dir_cntrl.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
         dir_cntrl.ruby_system = ruby_system
 
         exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
@@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system):
 
     if bootmem is not None:
         rom_dir_cntrl = Directory_Controller()
-        rom_dir_cntrl.directory = RubyDirectoryMemory()
+        rom_dir_cntrl.directory = RubyDirectoryMemory(
+            block_size=ruby_system.block_size_bytes
+        )
         rom_dir_cntrl.ruby_system = ruby_system
         rom_dir_cntrl.version = i + 1
         rom_dir_cntrl.memory = bootmem.port
diff --git a/src/cpu/testers/gpu_ruby_test/TesterThread.py b/src/cpu/testers/gpu_ruby_test/TesterThread.py
index 49388a76e1..6ddfc66ddc 100644
--- a/src/cpu/testers/gpu_ruby_test/TesterThread.py
+++ b/src/cpu/testers/gpu_ruby_test/TesterThread.py
@@ -41,3 +41,4 @@ class TesterThread(ClockedObject):
     thread_id = Param.Int("Unique TesterThread ID")
     num_lanes = Param.Int("Number of lanes this thread has")
     deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold")
+    cache_line_size = Param.UInt32("Size of cache line in cache")
diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc
index a0c0670a8f..83d8a1a277 100644
--- a/src/cpu/testers/gpu_ruby_test/address_manager.cc
+++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc
@@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic)
     std::shuffle(
         randAddressMap.begin(),
         randAddressMap.end(),
-        std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX))
+        // TODO: This is a bug unrelated to this draft PR but the GPU tester is
+        // useful for testing this PR.
+        std::default_random_engine(random_mt.random<unsigned>(0,UINT_MAX-1))
     );
 
     // initialize atomic locations
diff --git a/src/cpu/testers/gpu_ruby_test/dma_thread.cc b/src/cpu/testers/gpu_ruby_test/dma_thread.cc
index 1d6f46c44b..2c4c610c51 100644
--- a/src/cpu/testers/gpu_ruby_test/dma_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/dma_thread.cc
@@ -70,7 +70,7 @@ DmaThread::issueLoadOps()
         Addr address = addrManager->getAddress(location);
         DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
                 this->getName(), curEpisode->getEpisodeId(),
-                ruby::printAddress(address));
+                printAddress(address));
 
         int load_size = sizeof(Value);
 
@@ -127,7 +127,7 @@ DmaThread::issueStoreOps()
 
         DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
                 "Value %d\n", this->getName(),
-                curEpisode->getEpisodeId(), ruby::printAddress(address),
+                curEpisode->getEpisodeId(), printAddress(address),
                 new_value);
 
         auto req = std::make_shared<Request>(address, sizeof(Value),
@@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt)
 
     DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -"
             " Addr %s\n", this->getName(), curEpisode->getEpisodeId(),
-            resp_cmd.toString(), ruby::printAddress(addr));
+            resp_cmd.toString(), printAddress(addr));
 
     if (resp_cmd == MemCmd::SwapResp) {
         // response to a pending atomic
diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
index ae4078ee6c..516e77ddae 100644
--- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
+++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc
@@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps()
             Addr address = addrManager->getAddress(location);
             DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n",
                     this->getName(), curEpisode->getEpisodeId(),
-                    ruby::printAddress(address));
+                    printAddress(address));
 
             int load_size = sizeof(Value);
 
@@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps()
 
             DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - "
                     "Value %d\n", this->getName(),
-                    curEpisode->getEpisodeId(), ruby::printAddress(address),
+                    curEpisode->getEpisodeId(), printAddress(address),
                     new_value);
 
             auto req = std::make_shared<Request>(address, sizeof(Value),
@@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps()
 
         DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n",
                 this->getName(), curEpisode->getEpisodeId(),
-                ruby::printAddress(address));
+                printAddress(address));
 
         // must be aligned with store size
         assert(address % sizeof(Value) == 0);
@@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt)
     DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - "
                     "Addr %s\n", this->getName(),
                     curEpisode->getEpisodeId(), resp_cmd.toString(),
-                    ruby::printAddress(addr));
+                    printAddress(addr));
 
     // whether the transaction is done after this hitCallback
     bool isTransactionDone = true;
diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.cc b/src/cpu/testers/gpu_ruby_test/tester_thread.cc
index ce3a1bccc6..dbcfba8c3c 100644
--- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc
+++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc
@@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p)
       : ClockedObject(p),
         threadEvent(this, "TesterThread tick"),
         deadlockCheckEvent(this),
+        cacheLineSize(p.cache_line_size),
         threadId(p.thread_id),
         numLanes(p.num_lanes),
         tester(nullptr), addrManager(nullptr), port(nullptr),
@@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val)
         ss << threadName << ": Atomic Op returned unexpected value\n"
            << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
            << "\tLane ID " << lane << "\n"
-           << "\tAddress " << ruby::printAddress(addr) << "\n"
+           << "\tAddress " << printAddress(addr) << "\n"
            << "\tAtomic Op's return value " << ret_val << "\n";
 
         // print out basic info
@@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val)
            << "\tTesterThread " << threadId << "\n"
            << "\tEpisode " << curEpisode->getEpisodeId() << "\n"
            << "\tLane ID " << lane << "\n"
-           << "\tAddress " << ruby::printAddress(addr) << "\n"
+           << "\tAddress " << printAddress(addr) << "\n"
            << "\tLoaded value " << ret_val << "\n"
            << "\tLast writer " << addrManager->printLastWriter(loc) << "\n";
 
@@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table,
 
     for (const auto& m : table) {
         for (const auto& req : m.second) {
-            ss << "\t\t\tAddr " << ruby::printAddress(m.first)
+            ss << "\t\t\tAddr " << printAddress(m.first)
                << ": delta (curCycle - issueCycle) = "
                << (cur_cycle - req.issueCycle) << std::endl;
         }
@@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const
        << pendingFenceCount << std::endl;
 }
 
+std::string
+TesterThread::printAddress(Addr addr) const
+{
+    return ruby::printAddress(addr, cacheLineSize * 8);
+}
+
 } // namespace gem5
diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.hh b/src/cpu/testers/gpu_ruby_test/tester_thread.hh
index 9877d63c24..f31a5a3dea 100644
--- a/src/cpu/testers/gpu_ruby_test/tester_thread.hh
+++ b/src/cpu/testers/gpu_ruby_test/tester_thread.hh
@@ -132,6 +132,7 @@ class TesterThread : public ClockedObject
         {}
     };
 
+    int cacheLineSize;
     // the unique global id of this thread
     int threadId;
     // width of this thread (1 for cpu thread & wf size for gpu wavefront)
@@ -204,6 +205,7 @@ class TesterThread : public ClockedObject
 
     void printOutstandingReqs(const OutstandingReqTable& table,
                               std::stringstream& ss) const;
+    std::string printAddress(Addr addr) const;
 };
 
 } // namespace gem5
diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc
index 5a83d9ca27..b9c777526a 100644
--- a/src/cpu/testers/rubytest/Check.cc
+++ b/src/cpu/testers/rubytest/Check.cc
@@ -124,7 +124,8 @@ Check::initiatePrefetch()
 
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
-    pkt->senderState = new SenderState(m_address, req->getSize());
+    pkt->senderState = new SenderState(m_address, req->getSize(),
+                                       CACHE_LINE_BITS);
 
     if (port->sendTimingReq(pkt)) {
         DPRINTF(RubyTest, "successfully initiated prefetch.\n");
@@ -161,7 +162,8 @@ Check::initiateFlush()
 
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
-    pkt->senderState = new SenderState(m_address, req->getSize());
+    pkt->senderState = new SenderState(m_address, req->getSize(),
+                                       CACHE_LINE_BITS);
 
     if (port->sendTimingReq(pkt)) {
         DPRINTF(RubyTest, "initiating Flush - successful\n");
@@ -207,7 +209,8 @@ Check::initiateAction()
 
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
-    pkt->senderState = new SenderState(writeAddr, req->getSize());
+    pkt->senderState = new SenderState(m_address, req->getSize(),
+                                       CACHE_LINE_BITS);
 
     if (port->sendTimingReq(pkt)) {
         DPRINTF(RubyTest, "initiating action - successful\n");
@@ -261,7 +264,8 @@ Check::initiateCheck()
 
     // push the subblock onto the sender state.  The sequencer will
     // update the subblock on the return
-    pkt->senderState = new SenderState(m_address, req->getSize());
+    pkt->senderState = new SenderState(m_address, req->getSize(),
+                                       CACHE_LINE_BITS);
 
     if (port->sendTimingReq(pkt)) {
         DPRINTF(RubyTest, "initiating check - successful\n");
@@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
     // This isn't exactly right since we now have multi-byte checks
     //  assert(getAddress() == address);
 
-    assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address));
+    int block_size_bits = CACHE_LINE_BITS;
+    assert(ruby::makeLineAddress(m_address, block_size_bits) ==
+           ruby::makeLineAddress(address, block_size_bits));
     assert(data != NULL);
 
     DPRINTF(RubyTest, "RubyTester Callback\n");
@@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime)
     }
 
     DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc,
-            ruby::makeLineAddress(m_address));
+            ruby::makeLineAddress(m_address, block_size_bits));
     DPRINTF(RubyTest, "Callback done\n");
     debugPrint();
 }
diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh
index 78e2bda77e..0270b800d7 100644
--- a/src/cpu/testers/rubytest/Check.hh
+++ b/src/cpu/testers/rubytest/Check.hh
@@ -47,6 +47,7 @@ class SubBlock;
 
 const int CHECK_SIZE_BITS = 2;
 const int CHECK_SIZE = (1 << CHECK_SIZE_BITS);
+const int CACHE_LINE_BITS = 6;
 
 class Check
 {
diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh
index 9397126180..d306c405ef 100644
--- a/src/cpu/testers/rubytest/RubyTester.hh
+++ b/src/cpu/testers/rubytest/RubyTester.hh
@@ -90,7 +90,9 @@ class RubyTester : public ClockedObject
     {
         ruby::SubBlock subBlock;
 
-        SenderState(Addr addr, int size) : subBlock(addr, size) {}
+        SenderState(Addr addr, int size, int cl_size)
+            : subBlock(addr, size, cl_size)
+        {}
 
     };
 
diff --git a/src/mem/ruby/common/Address.cc b/src/mem/ruby/common/Address.cc
index fcf291af51..8b120324c7 100644
--- a/src/mem/ruby/common/Address.cc
+++ b/src/mem/ruby/common/Address.cc
@@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number)
 }
 
 Addr
-getOffset(Addr addr)
+getOffset(Addr addr, int cacheLineBits)
 {
-    return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1);
-}
-
-Addr
-makeLineAddress(Addr addr)
-{
-    return mbits<Addr>(addr, 63, RubySystem::getBlockSizeBits());
+    assert(cacheLineBits < 64);
+    return bitSelect(addr, 0, cacheLineBits - 1);
 }
 
 Addr
 makeLineAddress(Addr addr, int cacheLineBits)
 {
+    assert(cacheLineBits < 64);
     return maskLowOrderBits(addr, cacheLineBits);
 }
 
 // returns the next stride address based on line address
 Addr
-makeNextStrideAddress(Addr addr, int stride)
+makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes)
 {
-    return makeLineAddress(addr) +
-        static_cast<int>(RubySystem::getBlockSizeBytes()) * stride;
+    return makeLineAddress(addr, floorLog2(cacheLineBytes))
+           + cacheLineBytes * stride;
 }
 
 std::string
-printAddress(Addr addr)
+printAddress(Addr addr, int cacheLineBits)
 {
     std::stringstream out;
     out << "[" << std::hex << "0x" << addr << "," << " line 0x"
-       << makeLineAddress(addr) << std::dec << "]";
+       << makeLineAddress(addr, cacheLineBits) << std::dec << "]";
     return out.str();
 }
 
diff --git a/src/mem/ruby/common/Address.hh b/src/mem/ruby/common/Address.hh
index 565c3c1fb7..51e0b5417a 100644
--- a/src/mem/ruby/common/Address.hh
+++ b/src/mem/ruby/common/Address.hh
@@ -33,6 +33,7 @@
 #include <iomanip>
 #include <iostream>
 
+#include "base/intmath.hh"
 #include "base/types.hh"
 
 namespace gem5
@@ -44,11 +45,10 @@ namespace ruby
 // selects bits inclusive
 Addr bitSelect(Addr addr, unsigned int small, unsigned int big);
 Addr maskLowOrderBits(Addr addr, unsigned int number);
-Addr getOffset(Addr addr);
-Addr makeLineAddress(Addr addr);
+Addr getOffset(Addr addr, int cacheLineBits);
 Addr makeLineAddress(Addr addr, int cacheLineBits);
-Addr makeNextStrideAddress(Addr addr, int stride);
-std::string printAddress(Addr addr);
+Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes);
+std::string printAddress(Addr addr, int cacheLineBits);
 
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc
index 8f47d0026b..bbc0fd21c8 100644
--- a/src/mem/ruby/common/DataBlock.cc
+++ b/src/mem/ruby/common/DataBlock.cc
@@ -40,8 +40,8 @@
 
 #include "mem/ruby/common/DataBlock.hh"
 
+#include "mem/ruby/common/Address.hh"
 #include "mem/ruby/common/WriteMask.hh"
-#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -51,17 +51,22 @@ namespace ruby
 
 DataBlock::DataBlock(const DataBlock &cp)
 {
+    assert(cp.isAlloc());
+    assert(cp.getBlockSize() > 0);
+    assert(!m_alloc);
+
     uint8_t *block_update;
-    size_t block_bytes = RubySystem::getBlockSizeBytes();
-    m_data = new uint8_t[block_bytes];
-    memcpy(m_data, cp.m_data, block_bytes);
+    m_block_size = cp.getBlockSize();
+    m_data = new uint8_t[m_block_size];
+    memcpy(m_data, cp.m_data, m_block_size);
     m_alloc = true;
+    m_block_size = m_block_size;
     // If this data block is involved in an atomic operation, the effect
     // of applying the atomic operations on the data block are recorded in
     // m_atomicLog. If so, we must copy over every entry in the change log
     for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
-        block_update = new uint8_t[block_bytes];
-        memcpy(block_update, cp.m_atomicLog[i], block_bytes);
+        block_update = new uint8_t[m_block_size];
+        memcpy(block_update, cp.m_atomicLog[i], m_block_size);
         m_atomicLog.push_back(block_update);
     }
 }
@@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp)
 void
 DataBlock::alloc()
 {
-    m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
+    assert(!m_alloc);
+
+    if (!m_block_size) {
+        return;
+    }
+
+    m_data = new uint8_t[m_block_size];
     m_alloc = true;
     clear();
 }
 
+void
+DataBlock::realloc(int blk_size)
+{
+    m_block_size = blk_size;
+    assert(m_block_size > 0);
+
+    if (m_alloc) {
+        delete [] m_data;
+        m_alloc = false;
+    }
+    alloc();
+}
+
 void
 DataBlock::clear()
 {
-    memset(m_data, 0, RubySystem::getBlockSizeBytes());
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    memset(m_data, 0, m_block_size);
 }
 
 bool
 DataBlock::equal(const DataBlock& obj) const
 {
-    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    size_t block_bytes = m_block_size;
     // Check that the block contents match
     if (memcmp(m_data, obj.m_data, block_bytes)) {
         return false;
@@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const
 void
 DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask)
 {
-    for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    for (int i = 0; i < m_block_size; i++) {
         if (mask.getMask(i, 1)) {
             m_data[i] = dblk.m_data[i];
         }
@@ -113,7 +143,9 @@ void
 DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
         bool isAtomicNoReturn)
 {
-    for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    for (int i = 0; i < m_block_size; i++) {
         m_data[i] = dblk.m_data[i];
     }
     mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn);
@@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask,
 void
 DataBlock::print(std::ostream& out) const
 {
-    int size = RubySystem::getBlockSizeBytes();
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    int size = m_block_size;
     out << "[ ";
     for (int i = 0; i < size; i++) {
         out << std::setw(2) << std::setfill('0') << std::hex
@@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront()
 void
 DataBlock::clearAtomicLogEntries()
 {
+    assert(m_alloc);
     for (auto log : m_atomicLog) {
         delete [] log;
     }
@@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries()
 const uint8_t*
 DataBlock::getData(int offset, int len) const
 {
-    assert(offset + len <= RubySystem::getBlockSizeBytes());
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    assert(offset + len <= m_block_size);
     return &m_data[offset];
 }
 
 uint8_t*
 DataBlock::getDataMod(int offset)
 {
+    assert(m_alloc);
     return &m_data[offset];
 }
 
 void
 DataBlock::setData(const uint8_t *data, int offset, int len)
 {
+    assert(m_alloc);
     memcpy(&m_data[offset], data, len);
 }
 
 void
 DataBlock::setData(PacketPtr pkt)
 {
-    int offset = getOffset(pkt->getAddr());
-    assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes());
+    assert(m_alloc);
+    assert(m_block_size > 0);
+    int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size));
+    assert(offset + pkt->getSize() <= m_block_size);
     pkt->writeData(&m_data[offset]);
 }
 
 DataBlock &
 DataBlock::operator=(const DataBlock & obj)
 {
+    // Reallocate if needed
+    if (m_alloc && m_block_size != obj.getBlockSize()) {
+        delete [] m_data;
+        m_block_size = obj.getBlockSize();
+        alloc();
+    } else if (!m_alloc) {
+        m_block_size = obj.getBlockSize();
+        alloc();
+
+        // Assume this will be realloc'd later if zero.
+        if (m_block_size == 0) {
+            return *this;
+        }
+    } else {
+        assert(m_alloc && m_block_size == obj.getBlockSize());
+    }
+    assert(m_block_size > 0);
+
     uint8_t *block_update;
-    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    size_t block_bytes = m_block_size;
     // Copy entire block contents from obj to current block
     memcpy(m_data, obj.m_data, block_bytes);
     // If this data block is involved in an atomic operation, the effect
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh
index 7456a25f3f..ebfa7d1383 100644
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -61,8 +61,14 @@ class WriteMask;
 class DataBlock
 {
   public:
-    DataBlock()
+    // Ideally this should nost be called. We allow default so that protocols
+    // do not need to be changed.
+    DataBlock() = default;
+
+    DataBlock(int blk_size)
     {
+        assert(!m_alloc);
+        m_block_size = blk_size;
         alloc();
     }
 
@@ -101,10 +107,16 @@ class DataBlock
     bool equal(const DataBlock& obj) const;
     void print(std::ostream& out) const;
 
+    int getBlockSize() const { return m_block_size; }
+    void setBlockSize(int block_size) { realloc(block_size); }
+    bool isAlloc() const { return m_alloc; }
+    void realloc(int blk_size);
+
   private:
     void alloc();
-    uint8_t *m_data;
-    bool m_alloc;
+    uint8_t *m_data = nullptr;
+    bool m_alloc = false;
+    int m_block_size = 0;
 
     // Tracks block changes when atomic ops are applied
     std::deque<uint8_t*> m_atomicLog;
@@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data)
 inline uint8_t
 DataBlock::getByte(int whichByte) const
 {
+    assert(m_alloc);
     return m_data[whichByte];
 }
 
 inline void
 DataBlock::setByte(int whichByte, uint8_t data)
 {
+    assert(m_alloc);
     m_data[whichByte] = data;
 }
 
 inline void
 DataBlock::copyPartial(const DataBlock & dblk, int offset, int len)
 {
+    assert(m_alloc);
     setData(&dblk.m_data[offset], offset, len);
 }
 
diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc
index ba64f2febd..944315b97f 100644
--- a/src/mem/ruby/common/NetDest.cc
+++ b/src/mem/ruby/common/NetDest.cc
@@ -30,6 +30,8 @@
 
 #include <algorithm>
 
+#include "mem/ruby/system/RubySystem.hh"
+
 namespace gem5
 {
 
@@ -38,12 +40,18 @@ namespace ruby
 
 NetDest::NetDest()
 {
-  resize();
+}
+
+NetDest::NetDest(RubySystem *ruby_system)
+    : m_ruby_system(ruby_system)
+{
+    resize();
 }
 
 void
 NetDest::add(MachineID newElement)
 {
+    assert(m_bits.size() > 0);
     assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize());
     m_bits[vecIndex(newElement)].add(bitIndex(newElement.num));
 }
@@ -51,6 +59,7 @@ NetDest::add(MachineID newElement)
 void
 NetDest::addNetDest(const NetDest& netDest)
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == netDest.getSize());
     for (int i = 0; i < m_bits.size(); i++) {
         m_bits[i].addSet(netDest.m_bits[i]);
@@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest)
 void
 NetDest::setNetDest(MachineType machine, const Set& set)
 {
+    assert(m_ruby_system != nullptr);
+
     // assure that there is only one set of destinations for this machine
     assert(MachineType_base_level((MachineType)(machine + 1)) -
            MachineType_base_level(machine) == 1);
@@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set)
 void
 NetDest::remove(MachineID oldElement)
 {
+    assert(m_bits.size() > 0);
     m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num));
 }
 
 void
 NetDest::removeNetDest(const NetDest& netDest)
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == netDest.getSize());
     for (int i = 0; i < m_bits.size(); i++) {
         m_bits[i].removeSet(netDest.m_bits[i]);
@@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest)
 void
 NetDest::clear()
 {
+    assert(m_bits.size() > 0);
     for (int i = 0; i < m_bits.size(); i++) {
         m_bits[i].clear();
     }
@@ -101,6 +115,8 @@ NetDest::broadcast()
 void
 NetDest::broadcast(MachineType machineType)
 {
+    assert(m_ruby_system != nullptr);
+
     for (NodeID i = 0; i < MachineType_base_count(machineType); i++) {
         MachineID mach = {machineType, i};
         add(mach);
@@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType)
 std::vector<NodeID>
 NetDest::getAllDest()
 {
+    assert(m_ruby_system != nullptr);
+    assert(m_bits.size() > 0);
+
     std::vector<NodeID> dest;
     dest.clear();
     for (int i = 0; i < m_bits.size(); i++) {
@@ -127,6 +146,8 @@ NetDest::getAllDest()
 int
 NetDest::count() const
 {
+    assert(m_bits.size() > 0);
+
     int counter = 0;
     for (int i = 0; i < m_bits.size(); i++) {
         counter += m_bits[i].count();
@@ -137,12 +158,14 @@ NetDest::count() const
 NodeID
 NetDest::elementAt(MachineID index)
 {
+    assert(m_bits.size() > 0);
     return m_bits[vecIndex(index)].elementAt(bitIndex(index.num));
 }
 
 MachineID
 NetDest::smallestElement() const
 {
+    assert(m_bits.size() > 0);
     assert(count() > 0);
     for (int i = 0; i < m_bits.size(); i++) {
         for (NodeID j = 0; j < m_bits[i].getSize(); j++) {
@@ -158,6 +181,9 @@ NetDest::smallestElement() const
 MachineID
 NetDest::smallestElement(MachineType machine) const
 {
+    assert(m_bits.size() > 0);
+    assert(m_ruby_system != nullptr);
+
     int size = m_bits[MachineType_base_level(machine)].getSize();
     for (NodeID j = 0; j < size; j++) {
         if (m_bits[MachineType_base_level(machine)].isElement(j)) {
@@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const
 bool
 NetDest::isBroadcast() const
 {
+    assert(m_bits.size() > 0);
     for (int i = 0; i < m_bits.size(); i++) {
         if (!m_bits[i].isBroadcast()) {
             return false;
@@ -185,6 +212,7 @@ NetDest::isBroadcast() const
 bool
 NetDest::isEmpty() const
 {
+    assert(m_bits.size() > 0);
     for (int i = 0; i < m_bits.size(); i++) {
         if (!m_bits[i].isEmpty()) {
             return false;
@@ -197,8 +225,9 @@ NetDest::isEmpty() const
 NetDest
 NetDest::OR(const NetDest& orNetDest) const
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == orNetDest.getSize());
-    NetDest result;
+    NetDest result(m_ruby_system);
     for (int i = 0; i < m_bits.size(); i++) {
         result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]);
     }
@@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const
 NetDest
 NetDest::AND(const NetDest& andNetDest) const
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == andNetDest.getSize());
-    NetDest result;
+    NetDest result(m_ruby_system);
     for (int i = 0; i < m_bits.size(); i++) {
         result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]);
     }
@@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const
 bool
 NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == other_netDest.getSize());
     for (int i = 0; i < m_bits.size(); i++) {
         if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) {
@@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const
 bool
 NetDest::isSuperset(const NetDest& test) const
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == test.getSize());
 
     for (int i = 0; i < m_bits.size(); i++) {
@@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const
 bool
 NetDest::isElement(MachineID element) const
 {
+    assert(m_bits.size() > 0);
     return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num));
 }
 
 void
 NetDest::resize()
 {
+    assert(m_ruby_system != nullptr);
+
     m_bits.resize(MachineType_base_level(MachineType_NUM));
     assert(m_bits.size() == MachineType_NUM);
 
@@ -263,6 +298,7 @@ NetDest::resize()
 void
 NetDest::print(std::ostream& out) const
 {
+    assert(m_bits.size() > 0);
     out << "[NetDest (" << m_bits.size() << ") ";
 
     for (int i = 0; i < m_bits.size(); i++) {
@@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const
 bool
 NetDest::isEqual(const NetDest& n) const
 {
+    assert(m_bits.size() > 0);
     assert(m_bits.size() == n.m_bits.size());
     for (unsigned int i = 0; i < m_bits.size(); ++i) {
         if (!m_bits[i].isEqual(n.m_bits[i]))
@@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const
     return true;
 }
 
+int
+NetDest::MachineType_base_count(const MachineType& obj)
+{
+    assert(m_ruby_system != nullptr);
+    return m_ruby_system->MachineType_base_count(obj);
+}
+
+int
+NetDest::MachineType_base_number(const MachineType& obj)
+{
+    assert(m_ruby_system != nullptr);
+    return m_ruby_system->MachineType_base_number(obj);
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh
index e71b876754..83f340a478 100644
--- a/src/mem/ruby/common/NetDest.hh
+++ b/src/mem/ruby/common/NetDest.hh
@@ -41,6 +41,8 @@ namespace gem5
 namespace ruby
 {
 
+class RubySystem;
+
 // NetDest specifies the network destination of a Message
 class NetDest
 {
@@ -48,6 +50,7 @@ class NetDest
     // Constructors
     // creates and empty set
     NetDest();
+    NetDest(RubySystem *ruby_system);
     explicit NetDest(int bit_size);
 
     NetDest& operator=(const Set& obj);
@@ -98,6 +101,8 @@ class NetDest
 
     void print(std::ostream& out) const;
 
+    void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); }
+
   private:
     // returns a value >= MachineType_base_level("this machine")
     // and < MachineType_base_level("next highest machine")
@@ -112,6 +117,12 @@ class NetDest
     NodeID bitIndex(NodeID index) const { return index; }
 
     std::vector<Set> m_bits;  // a vector of bit vectors - i.e. Sets
+
+    // Needed to call MacheinType_base_count/level
+    RubySystem *m_ruby_system = nullptr;
+
+    int MachineType_base_count(const MachineType& obj);
+    int MachineType_base_number(const MachineType& obj);
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/common/SubBlock.cc b/src/mem/ruby/common/SubBlock.cc
index 92cfd8b633..be0adc1233 100644
--- a/src/mem/ruby/common/SubBlock.cc
+++ b/src/mem/ruby/common/SubBlock.cc
@@ -38,13 +38,14 @@ namespace ruby
 
 using stl_helpers::operator<<;
 
-SubBlock::SubBlock(Addr addr, int size)
+SubBlock::SubBlock(Addr addr, int size, int cl_bits)
 {
     m_address = addr;
     resize(size);
     for (int i = 0; i < size; i++) {
         setByte(i, 0);
     }
+    m_cache_line_bits = cl_bits;
 }
 
 void
@@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data)
 {
     int size = getSize();
     assert(size > 0);
-    int offset = getOffset(m_address);
+    int offset = getOffset(m_address, m_cache_line_bits);
     for (int i = 0; i < size; i++) {
         this->setByte(i, data.getByte(offset + i));
     }
@@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const
 {
     int size = getSize();
     assert(size > 0);
-    int offset = getOffset(m_address);
+    int offset = getOffset(m_address, m_cache_line_bits);
     for (int i = 0; i < size; i++) {
         // This will detect crossing a cache line boundary
         data.setByte(offset + i, this->getByte(i));
diff --git a/src/mem/ruby/common/SubBlock.hh b/src/mem/ruby/common/SubBlock.hh
index e1a83600c2..3790bbac58 100644
--- a/src/mem/ruby/common/SubBlock.hh
+++ b/src/mem/ruby/common/SubBlock.hh
@@ -45,7 +45,7 @@ class SubBlock
 {
   public:
     SubBlock() { }
-    SubBlock(Addr addr, int size);
+    SubBlock(Addr addr, int size, int cl_bits);
     ~SubBlock() { }
 
     Addr getAddress() const { return m_address; }
@@ -74,6 +74,7 @@ class SubBlock
     // Data Members (m_ prefix)
     Addr m_address;
     std::vector<uint8_t> m_data;
+    int m_cache_line_bits;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc
index 1fa03c951e..f176aec9fc 100644
--- a/src/mem/ruby/common/WriteMask.cc
+++ b/src/mem/ruby/common/WriteMask.cc
@@ -39,13 +39,13 @@ namespace ruby
 {
 
 WriteMask::WriteMask()
-    : mSize(RubySystem::getBlockSizeBytes()), mMask(mSize, false),
-      mAtomic(false)
+    : mSize(0), mMask(mSize, false), mAtomic(false)
 {}
 
 void
 WriteMask::print(std::ostream& out) const
 {
+    assert(mSize > 0);
     std::string str(mSize,'0');
     for (int i = 0; i < mSize; i++) {
         str[i] = mMask[i] ? ('1') : ('0');
@@ -59,6 +59,7 @@ void
 WriteMask::performAtomic(uint8_t * p,
         std::deque<uint8_t*>& log, bool isAtomicNoReturn) const
 {
+    assert(mSize > 0);
     int offset;
     uint8_t *block_update;
     // Here, operations occur in FIFO order from the mAtomicOp
diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh
index 8c6b8ce976..e620997cd8 100644
--- a/src/mem/ruby/common/WriteMask.hh
+++ b/src/mem/ruby/common/WriteMask.hh
@@ -78,6 +78,17 @@ class WriteMask
     ~WriteMask()
     {}
 
+    int getBlockSize() const { return mSize; }
+    void
+    setBlockSize(int size)
+    {
+        // This should only be used once if the default ctor was used. Probably
+        // by src/mem/ruby/protocol/RubySlicc_MemControl.sm.
+        assert(mSize == 0);
+        assert(size > 0);
+        mSize = size;
+    }
+
     void
     clear()
     {
@@ -87,6 +98,7 @@ class WriteMask
     bool
     test(int offset) const
     {
+        assert(mSize > 0);
         assert(offset < mSize);
         return mMask[offset];
     }
@@ -94,6 +106,7 @@ class WriteMask
     void
     setMask(int offset, int len, bool val = true)
     {
+        assert(mSize > 0);
         assert(mSize >= (offset + len));
         for (int i = 0; i < len; i++) {
             mMask[offset + i] = val;
@@ -102,6 +115,7 @@ class WriteMask
     void
     fillMask()
     {
+        assert(mSize > 0);
         for (int i = 0; i < mSize; i++) {
             mMask[i] = true;
         }
@@ -111,6 +125,7 @@ class WriteMask
     getMask(int offset, int len) const
     {
         bool tmp = true;
+        assert(mSize > 0);
         assert(mSize >= (offset + len));
         for (int i = 0; i < len; i++) {
             tmp = tmp & mMask.at(offset + i);
@@ -122,6 +137,7 @@ class WriteMask
     isOverlap(const WriteMask &readMask) const
     {
         bool tmp = false;
+        assert(mSize > 0);
         assert(mSize == readMask.mSize);
         for (int i = 0; i < mSize; i++) {
             if (readMask.mMask.at(i)) {
@@ -135,6 +151,7 @@ class WriteMask
     containsMask(const WriteMask &readMask) const
     {
         bool tmp = true;
+        assert(mSize > 0);
         assert(mSize == readMask.mSize);
         for (int i = 0; i < mSize; i++) {
             if (readMask.mMask.at(i)) {
@@ -146,6 +163,7 @@ class WriteMask
 
     bool isEmpty() const
     {
+        assert(mSize > 0);
         for (int i = 0; i < mSize; i++) {
             if (mMask.at(i)) {
                 return false;
@@ -157,6 +175,7 @@ class WriteMask
     bool
     isFull() const
     {
+        assert(mSize > 0);
         for (int i = 0; i < mSize; i++) {
             if (!mMask.at(i)) {
                 return false;
@@ -168,6 +187,7 @@ class WriteMask
     void
     andMask(const WriteMask & writeMask)
     {
+        assert(mSize > 0);
         assert(mSize == writeMask.mSize);
         for (int i = 0; i < mSize; i++) {
             mMask[i] = (mMask.at(i)) && (writeMask.mMask.at(i));
@@ -182,6 +202,7 @@ class WriteMask
     void
     orMask(const WriteMask & writeMask)
     {
+        assert(mSize > 0);
         assert(mSize == writeMask.mSize);
         for (int i = 0; i < mSize; i++) {
             mMask[i] = (mMask.at(i)) || (writeMask.mMask.at(i));
@@ -196,6 +217,7 @@ class WriteMask
     void
     setInvertedMask(const WriteMask & writeMask)
     {
+        assert(mSize > 0);
         assert(mSize == writeMask.mSize);
         for (int i = 0; i < mSize; i++) {
             mMask[i] = !writeMask.mMask.at(i);
@@ -205,6 +227,7 @@ class WriteMask
     int
     firstBitSet(bool val, int offset = 0) const
     {
+        assert(mSize > 0);
         for (int i = offset; i < mSize; ++i)
             if (mMask[i] == val)
                 return i;
@@ -214,6 +237,7 @@ class WriteMask
     int
     count(int offset = 0) const
     {
+        assert(mSize > 0);
         int count = 0;
         for (int i = offset; i < mSize; ++i)
             count += mMask[i];
diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc
index 9a4439a538..8b3a724469 100644
--- a/src/mem/ruby/network/MessageBuffer.cc
+++ b/src/mem/ruby/network/MessageBuffer.cc
@@ -47,7 +47,6 @@
 #include "base/random.hh"
 #include "base/stl_helpers.hh"
 #include "debug/RubyQueue.hh"
-#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -216,6 +215,7 @@ random_time()
 
 void
 MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
+                       bool ruby_is_random, bool ruby_warmup,
                        bool bypassStrictFIFO)
 {
     // record current time incase we have a pop that also adjusts my size
@@ -237,7 +237,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
     // is turned on and this buffer allows it
     if ((m_randomization == MessageRandomization::disabled) ||
         ((m_randomization == MessageRandomization::ruby_system) &&
-          !RubySystem::getRandomization())) {
+          !ruby_is_random)) {
         // No randomization
         arrival_time = current_time + delta;
     } else {
@@ -265,7 +265,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
     }
 
     // If running a cache trace, don't worry about the last arrival checks
-    if (!RubySystem::getWarmupEnabled()) {
+    if (!ruby_warmup) {
         m_last_arrival_time = arrival_time;
     }
 
@@ -447,7 +447,6 @@ MessageBuffer::stallMessage(Addr addr, Tick current_time)
 {
     DPRINTF(RubyQueue, "Stalling due to %#x\n", addr);
     assert(isReady(current_time));
-    assert(getOffset(addr) == 0);
     MsgPtr message = m_prio_heap.front();
 
     // Since the message will just be moved to stall map, indicate that the
@@ -479,7 +478,8 @@ MessageBuffer::deferEnqueueingMessage(Addr addr, MsgPtr message)
 }
 
 void
-MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay)
+MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay,
+                                       bool ruby_is_random, bool ruby_warmup)
 {
     assert(!isDeferredMsgMapEmpty(addr));
     std::vector<MsgPtr>& msg_vec = m_deferred_msg_map[addr];
@@ -487,7 +487,7 @@ MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay)
 
     // enqueue all deferred messages associated with this address
     for (MsgPtr m : msg_vec) {
-        enqueue(m, curTime, delay);
+        enqueue(m, curTime, delay, ruby_is_random, ruby_warmup);
     }
 
     msg_vec.clear();
diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh
index 03a0454433..b45e531d11 100644
--- a/src/mem/ruby/network/MessageBuffer.hh
+++ b/src/mem/ruby/network/MessageBuffer.hh
@@ -90,13 +90,14 @@ class MessageBuffer : public SimObject
     Tick readyTime() const;
 
     void
-    delayHead(Tick current_time, Tick delta)
+    delayHead(Tick current_time, Tick delta, bool ruby_is_random,
+              bool ruby_warmup)
     {
         MsgPtr m = m_prio_heap.front();
         std::pop_heap(m_prio_heap.begin(), m_prio_heap.end(),
                       std::greater<MsgPtr>());
         m_prio_heap.pop_back();
-        enqueue(m, current_time, delta);
+        enqueue(m, current_time, delta, ruby_is_random, ruby_warmup);
     }
 
     bool areNSlotsAvailable(unsigned int n, Tick curTime);
@@ -124,6 +125,7 @@ class MessageBuffer : public SimObject
     const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); }
 
     void enqueue(MsgPtr message, Tick curTime, Tick delta,
+                bool ruby_is_random, bool ruby_warmup,
                 bool bypassStrictFIFO = false);
 
     // Defer enqueueing a message to a later cycle by putting it aside and not
@@ -135,7 +137,8 @@ class MessageBuffer : public SimObject
 
     // enqueue all previously deferred messages that are associated with the
     // input address
-    void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay);
+    void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay,
+                                 bool ruby_is_random, bool ruby_warmup);
     bool isDeferredMsgMapEmpty(Addr addr) const;
 
     //! Updates the delay cycles of the message at the head of the queue,
diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc
index 757ed9498e..480b5bcef0 100644
--- a/src/mem/ruby/network/Network.cc
+++ b/src/mem/ruby/network/Network.cc
@@ -65,7 +65,8 @@ Network::Network(const Params &p)
              "%s: data message size > cache line size", name());
     m_data_msg_size = p.data_msg_size + m_control_msg_size;
 
-    params().ruby_system->registerNetwork(this);
+    m_ruby_system = p.ruby_system;
+    m_ruby_system->registerNetwork(this);
 
     // Populate localNodeVersions with the version of each MachineType in
     // this network. This will be used to compute a global to local ID.
@@ -102,7 +103,8 @@ Network::Network(const Params &p)
 
     m_topology_ptr = new Topology(m_nodes, p.routers.size(),
                                   m_virtual_networks,
-                                  p.ext_links, p.int_links);
+                                  p.ext_links, p.int_links,
+                                  m_ruby_system);
 
     // Allocate to and from queues
     // Queues that are getting messages from protocol
@@ -246,7 +248,7 @@ Network::addressToNodeID(Addr addr, MachineType mtype)
             }
         }
     }
-    return MachineType_base_count(mtype);
+    return m_ruby_system->MachineType_base_count(mtype);
 }
 
 NodeID
@@ -256,5 +258,23 @@ Network::getLocalNodeID(NodeID global_id) const
     return globalToLocalMap.at(global_id);
 }
 
+bool
+Network::getRandomization() const
+{
+    return m_ruby_system->getRandomization();
+}
+
+bool
+Network::getWarmupEnabled() const
+{
+    return m_ruby_system->getWarmupEnabled();
+}
+
+int
+Network::MachineType_base_number(const MachineType& obj)
+{
+    return m_ruby_system->MachineType_base_number(obj);
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh
index 8ca68a0279..c0d21af240 100644
--- a/src/mem/ruby/network/Network.hh
+++ b/src/mem/ruby/network/Network.hh
@@ -78,6 +78,7 @@ namespace ruby
 
 class NetDest;
 class MessageBuffer;
+class RubySystem;
 
 class Network : public ClockedObject
 {
@@ -147,6 +148,10 @@ class Network : public ClockedObject
 
     NodeID getLocalNodeID(NodeID global_id) const;
 
+    bool getRandomization() const;
+    bool getWarmupEnabled() const;
+    RubySystem *getRubySystem() const { return m_ruby_system; }
+
   protected:
     // Private copy constructor and assignment operator
     Network(const Network& obj);
@@ -176,6 +181,12 @@ class Network : public ClockedObject
     // Global NodeID to local node map. If there are not multiple networks in
     // the same RubySystem, this is a one-to-one mapping of global to local.
     std::unordered_map<NodeID, NodeID> globalToLocalMap;
+
+    // For accessing if randomization/warnup are turned on. We cannot store
+    // those values in the constructor in case we are constructed first.
+    RubySystem *m_ruby_system = nullptr;
+
+    int MachineType_base_number(const MachineType& obj);
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/network/Topology.cc b/src/mem/ruby/network/Topology.cc
index 39444c9023..b2cd7897f8 100644
--- a/src/mem/ruby/network/Topology.cc
+++ b/src/mem/ruby/network/Topology.cc
@@ -37,6 +37,7 @@
 #include "mem/ruby/network/BasicLink.hh"
 #include "mem/ruby/network/Network.hh"
 #include "mem/ruby/slicc_interface/AbstractController.hh"
+#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -56,10 +57,12 @@ const int INFINITE_LATENCY = 10000; // Yes, this is a big hack
 Topology::Topology(uint32_t num_nodes, uint32_t num_routers,
                    uint32_t num_vnets,
                    const std::vector<BasicExtLink *> &ext_links,
-                   const std::vector<BasicIntLink *> &int_links)
-    : m_nodes(MachineType_base_number(MachineType_NUM)),
+                   const std::vector<BasicIntLink *> &int_links,
+                   RubySystem *ruby_system)
+    : m_nodes(ruby_system->MachineType_base_number(MachineType_NUM)),
       m_number_of_switches(num_routers), m_vnets(num_vnets),
-      m_ext_link_vector(ext_links), m_int_link_vector(int_links)
+      m_ext_link_vector(ext_links), m_int_link_vector(int_links),
+      m_ruby_system(ruby_system)
 {
     // Total nodes/controllers in network
     assert(m_nodes > 1);
@@ -78,7 +81,8 @@ Topology::Topology(uint32_t num_nodes, uint32_t num_routers,
         AbstractController *abs_cntrl = ext_link->params().ext_node;
         BasicRouter *router = ext_link->params().int_node;
 
-        int machine_base_idx = MachineType_base_number(abs_cntrl->getType());
+        int machine_base_idx =
+            ruby_system->MachineType_base_number(abs_cntrl->getType());
         int ext_idx1 = machine_base_idx + abs_cntrl->getVersion();
         int ext_idx2 = ext_idx1 + m_nodes;
         int int_idx = router->params().router_id + 2*m_nodes;
@@ -189,7 +193,7 @@ Topology::createLinks(Network *net)
     for (int i = 0; i < topology_weights[0].size(); i++) {
         for (int j = 0; j < topology_weights[0][i].size(); j++) {
             std::vector<NetDest> routingMap;
-            routingMap.resize(m_vnets);
+            routingMap.resize(m_vnets, m_ruby_system);
 
             // Not all sources and destinations are connected
             // by direct links. We only construct the links
@@ -264,7 +268,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest,
         for (int l = 0; l < links.size(); l++) {
             link_entry = links[l];
             std::vector<NetDest> linkRoute;
-            linkRoute.resize(m_vnets);
+            linkRoute.resize(m_vnets, m_ruby_system);
             BasicLink *link = link_entry.link;
             if (link->mVnets.size() == 0) {
                 net->makeExtInLink(src, dest - (2 * m_nodes), link,
@@ -287,7 +291,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest,
         for (int l = 0; l < links.size(); l++) {
             link_entry = links[l];
             std::vector<NetDest> linkRoute;
-            linkRoute.resize(m_vnets);
+            linkRoute.resize(m_vnets, m_ruby_system);
             BasicLink *link = link_entry.link;
             if (link->mVnets.size() == 0) {
                 net->makeExtOutLink(src - (2 * m_nodes), node, link,
@@ -309,7 +313,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest,
         for (int l = 0; l < links.size(); l++) {
             link_entry = links[l];
             std::vector<NetDest> linkRoute;
-            linkRoute.resize(m_vnets);
+            linkRoute.resize(m_vnets, m_ruby_system);
             BasicLink *link = link_entry.link;
             if (link->mVnets.size() == 0) {
                 net->makeInternalLink(src - (2 * m_nodes),
@@ -413,16 +417,17 @@ Topology::shortest_path_to_node(SwitchID src, SwitchID next,
                                 const Matrix &weights, const Matrix &dist,
                                 int vnet)
 {
-    NetDest result;
+    NetDest result(m_ruby_system);
     int d = 0;
     int machines;
     int max_machines;
 
     machines = MachineType_NUM;
-    max_machines = MachineType_base_number(MachineType_NUM);
+    max_machines = m_ruby_system->MachineType_base_number(MachineType_NUM);
 
     for (int m = 0; m < machines; m++) {
-        for (NodeID i = 0; i < MachineType_base_count((MachineType)m); i++) {
+        for (NodeID i = 0;
+            i < m_ruby_system->MachineType_base_count((MachineType)m); i++) {
             // we use "d+max_machines" below since the "destination"
             // switches for the machines are numbered
             // [MachineType_base_number(MachineType_NUM)...
diff --git a/src/mem/ruby/network/Topology.hh b/src/mem/ruby/network/Topology.hh
index 301811e6ab..7ab395762a 100644
--- a/src/mem/ruby/network/Topology.hh
+++ b/src/mem/ruby/network/Topology.hh
@@ -80,7 +80,8 @@ class Topology
   public:
     Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets,
              const std::vector<BasicExtLink *> &ext_links,
-             const std::vector<BasicIntLink *> &int_links);
+             const std::vector<BasicIntLink *> &int_links,
+             RubySystem *ruby_system);
 
     uint32_t numSwitches() const { return m_number_of_switches; }
     void createLinks(Network *net);
@@ -108,7 +109,7 @@ class Topology
                                   const Matrix &weights, const Matrix &dist,
                                   int vnet);
 
-    const uint32_t m_nodes;
+    uint32_t m_nodes;
     const uint32_t m_number_of_switches;
     int m_vnets;
 
@@ -116,6 +117,8 @@ class Topology
     std::vector<BasicIntLink*> m_int_link_vector;
 
     LinkMap m_link_map;
+
+    RubySystem *m_ruby_system = nullptr;
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/network/garnet/NetworkInterface.cc b/src/mem/ruby/network/garnet/NetworkInterface.cc
index 31d625c4d5..8564baca6d 100644
--- a/src/mem/ruby/network/garnet/NetworkInterface.cc
+++ b/src/mem/ruby/network/garnet/NetworkInterface.cc
@@ -41,6 +41,7 @@
 #include "mem/ruby/network/garnet/Credit.hh"
 #include "mem/ruby/network/garnet/flitBuffer.hh"
 #include "mem/ruby/slicc_interface/Message.hh"
+#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -244,7 +245,9 @@ NetworkInterface::wakeup()
                     outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) {
                     // Space is available. Enqueue to protocol buffer.
                     outNode_ptr[vnet]->enqueue(t_flit->get_msg_ptr(), curTime,
-                                               cyclesToTicks(Cycles(1)));
+                                               cyclesToTicks(Cycles(1)),
+                                               m_net_ptr->getRandomization(),
+                                               m_net_ptr->getWarmupEnabled());
 
                     // Simply send a credit back since we are not buffering
                     // this flit in the NI
@@ -332,7 +335,9 @@ NetworkInterface::checkStallQueue()
                 if (outNode_ptr[vnet]->areNSlotsAvailable(1,
                     curTime)) {
                     outNode_ptr[vnet]->enqueue(stallFlit->get_msg_ptr(),
-                        curTime, cyclesToTicks(Cycles(1)));
+                        curTime, cyclesToTicks(Cycles(1)),
+                        m_net_ptr->getRandomization(),
+                        m_net_ptr->getWarmupEnabled());
 
                     // Send back a credit with free signal now that the
                     // VC is no longer stalled.
@@ -699,6 +704,12 @@ NetworkInterface::functionalWrite(Packet *pkt)
     return num_functional_writes;
 }
 
+int
+NetworkInterface::MachineType_base_number(const MachineType& obj)
+{
+    return m_net_ptr->getRubySystem()->MachineType_base_number(obj);
+}
+
 } // namespace garnet
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/network/garnet/NetworkInterface.hh b/src/mem/ruby/network/garnet/NetworkInterface.hh
index d42db5ee2a..cd7bb3b171 100644
--- a/src/mem/ruby/network/garnet/NetworkInterface.hh
+++ b/src/mem/ruby/network/garnet/NetworkInterface.hh
@@ -306,6 +306,8 @@ class NetworkInterface : public ClockedObject, public Consumer
 
     InputPort *getInportForVnet(int vnet);
     OutputPort *getOutportForVnet(int vnet);
+
+    int MachineType_base_number(const MachineType& obj);
 };
 
 } // namespace garnet
diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc
index 74d78e3aae..20d57f04be 100644
--- a/src/mem/ruby/network/simple/PerfectSwitch.cc
+++ b/src/mem/ruby/network/simple/PerfectSwitch.cc
@@ -268,7 +268,8 @@ PerfectSwitch::operateMessageBuffer(MessageBuffer *buffer, int vnet)
                     buffer->getIncomingLink(), vnet, outgoing, vnet);
 
             out_port.buffers[vnet]->enqueue(msg_ptr, current_time,
-                                           out_port.latency);
+                out_port.latency, m_switch->getNetPtr()->getRandomization(),
+                m_switch->getNetPtr()->getWarmupEnabled());
         }
     }
 }
diff --git a/src/mem/ruby/network/simple/Switch.hh b/src/mem/ruby/network/simple/Switch.hh
index 86abfda871..e6e22022bc 100644
--- a/src/mem/ruby/network/simple/Switch.hh
+++ b/src/mem/ruby/network/simple/Switch.hh
@@ -104,6 +104,7 @@ class Switch : public BasicRouter
 
     void print(std::ostream& out) const;
     void init_net_ptr(SimpleNetwork* net_ptr) { m_network_ptr = net_ptr; }
+    SimpleNetwork* getNetPtr() const { return m_network_ptr; }
 
     bool functionalRead(Packet *);
     bool functionalRead(Packet *, WriteMask&);
diff --git a/src/mem/ruby/network/simple/Throttle.cc b/src/mem/ruby/network/simple/Throttle.cc
index 20cebccabb..fc5649330f 100644
--- a/src/mem/ruby/network/simple/Throttle.cc
+++ b/src/mem/ruby/network/simple/Throttle.cc
@@ -199,7 +199,9 @@ Throttle::operateVnet(int vnet, int channel, int &total_bw_remaining,
             // Move the message
             in->dequeue(current_time);
             out->enqueue(msg_ptr, current_time,
-                         m_switch->cyclesToTicks(m_link_latency));
+                         m_switch->cyclesToTicks(m_link_latency),
+                         m_ruby_system->getRandomization(),
+                         m_ruby_system->getWarmupEnabled());
 
             // Count the message
             (*(throttleStats.
diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc
index 05fc486c63..ce40c35a9f 100644
--- a/src/mem/ruby/profiler/AddressProfiler.cc
+++ b/src/mem/ruby/profiler/AddressProfiler.cc
@@ -34,6 +34,7 @@
 #include "base/stl_helpers.hh"
 #include "mem/ruby/profiler/Profiler.hh"
 #include "mem/ruby/protocol/RubyRequest.hh"
+#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -307,7 +308,8 @@ AddressProfiler::addTraceSample(Addr data_addr, Addr pc_addr,
         }
 
         // record data address trace info
-        data_addr = makeLineAddress(data_addr);
+        int block_size_bits = m_profiler->m_ruby_system->getBlockSizeBits();
+        data_addr = makeLineAddress(data_addr, block_size_bits);
         lookupTraceForAddress(data_addr, m_dataAccessTrace).
             update(type, access_mode, id, sharing_miss);
 
diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
index ca606a5921..43fb96c375 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm
@@ -95,7 +95,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)")
   }
 
   TBETable TBEs, template="<SQC_TBE>", constructor="m_number_of_TBEs";
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   void set_cache_entry(AbstractCacheEntry b);
   void unset_cache_entry();
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 5d98a73041..d1e1ffb7b0 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -121,7 +121,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   }
 
   TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs";
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
   int WTcnt, default="0";
   int Fcnt, default="0";
   bool inFlush, default="false";
diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm
index bcf99ff362..ed5e40cfa1 100644
--- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm
+++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm
@@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
 
   TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
 
-  int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Cycles ticksToCycles(Tick t);
diff --git a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm
index 2b5935dee5..29f6d8e87d 100644
--- a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm
+++ b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm
@@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP")
 
   TBETable TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
 
-  int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Cycles ticksToCycles(Tick t);
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
index 5d85ad2fc6..bac7fd1b12 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm
@@ -181,7 +181,7 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
 
   // Stores only region addresses
   TBETable TBEs, template="<RegionBuffer_TBE>", constructor="m_number_of_TBEs";
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Tick cyclesToTicks(Cycles c);
@@ -195,8 +195,8 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol")
   Cycles curCycle();
   MachineID mapAddressToMachine(Addr addr, MachineType mtype);
 
-  int blockBits,  default="RubySystem::getBlockSizeBits()";
-  int blockBytes, default="RubySystem::getBlockSizeBytes()";
+  int blockBits,  default="m_ruby_system->getBlockSizeBits()";
+  int blockBytes, default="m_ruby_system->getBlockSizeBytes()";
   int regionBits, default="log2(m_blocksPerRegion)";
 
   // Functions
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm
index 2464e038ff..3f1ba2540f 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm
@@ -155,7 +155,7 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol")
 
   // Stores only region addresses
   TBETable TBEs, template="<RegionDir_TBE>", constructor="m_number_of_TBEs";
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Tick cyclesToTicks(Cycles c);
@@ -169,8 +169,8 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol")
   Cycles curCycle();
   MachineID mapAddressToMachine(Addr addr, MachineType mtype);
 
-  int blockBits,  default="RubySystem::getBlockSizeBits()";
-  int blockBytes, default="RubySystem::getBlockSizeBytes()";
+  int blockBits,  default="m_ruby_system->getBlockSizeBits()";
+  int blockBytes, default="m_ruby_system->getBlockSizeBytes()";
   int regionBits, default="log2(m_blocksPerRegion)";
 
   // Functions
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 17a92f5f90..5b5ab3148a 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -183,7 +183,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
 
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Tick cyclesToTicks(Cycles c);
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
index 4e9e9597aa..b53ebe8ee2 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm
@@ -192,7 +192,7 @@ machine(MachineType:Directory, "AMD Baseline protocol")
 
   TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
 
-  int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Tick cyclesToTicks(Cycles c);
diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
index 4a513d6d3f..b6410d12e7 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm
@@ -143,7 +143,7 @@ machine(MachineType:Directory, "Directory protocol")
     bool isPresent(Addr);
   }
 
-  int blockSize, default="RubySystem::getBlockSizeBytes()";
+  int blockSize, default="m_ruby_system->getBlockSizeBytes()";
 
   // ** OBJECTS **
   TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm
index 865fce4e3c..24f8146a02 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm
@@ -198,7 +198,7 @@ machine(MachineType:L1Cache, "Token protocol")
   TBETable L1_TBEs, template="<L1Cache_TBE>", constructor="m_number_of_TBEs";
 
   bool starving, default="false";
-  int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   PersistentTable persistentTable;
   TimerTable useTimerTable;
diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
index 7f2bdf94e0..8d035a61bb 100644
--- a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm
@@ -171,7 +171,7 @@ machine(MachineType:Directory, "Token protocol")
   TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs";
 
   bool starving, default="false";
-  int l2_select_low_bit, default="RubySystem::getBlockSizeBits()";
+  int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()";
 
   Tick clockEdge();
   Tick clockEdge(Cycles c);
diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm
index 8f0341f328..97770e3516 100644
--- a/src/mem/ruby/protocol/RubySlicc_Exports.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm
@@ -72,6 +72,8 @@ structure(WriteMask, external="yes", desc="...") {
   int count();
   int count(int);
   bool test(int);
+  int getBlockSize();
+  void setBlockSize(int);
 }
 
 structure(DataBlock, external = "yes", desc="..."){
diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm
index 012b169dea..848ada4d12 100644
--- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm
+++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm
@@ -89,7 +89,9 @@ structure(MemoryMsg, desc="...", interface="Message") {
     if ((MessageSize == MessageSizeType:Response_Data) ||
         (MessageSize == MessageSizeType:Writeback_Data))  {
       WriteMask read_mask;
-      read_mask.setMask(addressOffset(addr, makeLineAddress(addr)), Len, true);
+      read_mask.setBlockSize(mask.getBlockSize());
+      read_mask.setMask(addressOffset(addr,
+        makeLineAddress(addr, mask.getBlockSize())), Len, true);
       if (MessageSize != MessageSizeType:Writeback_Data) {
         read_mask.setInvertedMask(mask);
       }
diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm
index 4e0e4f4511..848d16491d 100644
--- a/src/mem/ruby/protocol/RubySlicc_Types.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Types.sm
@@ -94,7 +94,7 @@ structure (Set, external = "yes", non_obj="yes") {
   NodeID smallestElement();
 }
 
-structure (NetDest, external = "yes", non_obj="yes") {
+structure (NetDest, external = "yes", non_obj="yes", implicit_ctor="m_ruby_system") {
   void setSize(int);
   void setSize(int, int);
   void add(NodeID);
diff --git a/src/mem/ruby/protocol/RubySlicc_Util.sm b/src/mem/ruby/protocol/RubySlicc_Util.sm
index 104c7c034c..93976bc4e1 100644
--- a/src/mem/ruby/protocol/RubySlicc_Util.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Util.sm
@@ -52,6 +52,7 @@ Addr intToAddress(int addr);
 int addressOffset(Addr addr, Addr base);
 int max_tokens();
 Addr makeLineAddress(Addr addr);
+Addr makeLineAddress(Addr addr, int cacheLineBits);
 int getOffset(Addr addr);
 int mod(int val, int mod);
 Addr bitSelect(Addr addr, int small, int big);
diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm
index dcd142ea47..a644bbe506 100644
--- a/src/mem/ruby/protocol/chi/CHI-cache.sm
+++ b/src/mem/ruby/protocol/chi/CHI-cache.sm
@@ -574,7 +574,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
   ////////////////////////////////////////////////////////////////////////////
 
   // Cache block size
-  int blockSize, default="RubySystem::getBlockSizeBytes()";
+  int blockSize, default="m_ruby_system->getBlockSizeBytes()";
 
   // CacheEntry
   structure(CacheEntry, interface="AbstractCacheEntry") {
diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
index aa27c40964..f7616e9ec4 100644
--- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
+++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm
@@ -192,7 +192,7 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op
   ////////////////////////////////////////////////////////////////////////////
 
   // Cache block size
-  int blockSize, default="RubySystem::getBlockSizeBytes()";
+  int blockSize, default="m_ruby_system->getBlockSizeBytes()";
 
   // Helper class for tracking expected response and data messages
   structure(ExpectedMap, external ="yes") {
diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm
index 46f57456a5..58f22d2007 100644
--- a/src/mem/ruby/protocol/chi/CHI-mem.sm
+++ b/src/mem/ruby/protocol/chi/CHI-mem.sm
@@ -157,7 +157,7 @@ machine(MachineType:Memory, "Memory controller interface") :
   ////////////////////////////////////////////////////////////////////////////
 
   // Cache block size
-  int blockSize, default="RubySystem::getBlockSizeBytes()";
+  int blockSize, default="m_ruby_system->getBlockSizeBytes()";
 
   // TBE fields
   structure(TBE, desc="...") {
diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
index 0e00a60c28..1305deddce 100644
--- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
+++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh
@@ -59,6 +59,8 @@ namespace gem5
 namespace ruby
 {
 
+class RubySystem;
+
 class AbstractCacheEntry : public ReplaceableEntry
 {
   private:
@@ -78,16 +80,15 @@ class AbstractCacheEntry : public ReplaceableEntry
 
     // The methods below are those called by ruby runtime, add when it
     // is absolutely necessary and should all be virtual function.
-    virtual DataBlock&
+    [[noreturn]] virtual DataBlock&
     getDataBlk()
     {
         panic("getDataBlk() not implemented!");
-
-        // Dummy return to appease the compiler
-        static DataBlock b;
-        return b;
     }
 
+    virtual void initBlockSize(int block_size) { };
+    virtual void setRubySystem(RubySystem *rs) { };
+
     int validBlocks;
     virtual int& getNumValidBlocks()
     {
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 36092387ac..0bcc662629 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -89,6 +89,9 @@ AbstractController::init()
         getMemReqQueue()->setConsumer(this);
     }
 
+    downstreamDestinations.setRubySystem(m_ruby_system);
+    upstreamDestinations.setRubySystem(m_ruby_system);
+
     // Initialize the addr->downstream machine mappings. Multiple machines
     // in downstream_destinations can have the same address range if they have
     // different types. If this is the case, mapAddressToDownstreamMachine
@@ -268,7 +271,7 @@ AbstractController::serviceMemoryQueue()
     }
 
     const MemoryMsg *mem_msg = (const MemoryMsg*)mem_queue->peek();
-    unsigned int req_size = RubySystem::getBlockSizeBytes();
+    unsigned int req_size = m_ruby_system->getBlockSizeBytes();
     if (mem_msg->m_Len > 0) {
         req_size = mem_msg->m_Len;
     }
@@ -294,7 +297,7 @@ AbstractController::serviceMemoryQueue()
     SenderState *s = new SenderState(mem_msg->m_Sender);
     pkt->pushSenderState(s);
 
-    if (RubySystem::getWarmupEnabled()) {
+    if (m_ruby_system->getWarmupEnabled()) {
         // Use functional rather than timing accesses during warmup
         mem_queue->dequeue(clockEdge());
         memoryPort.sendFunctional(pkt);
@@ -382,7 +385,10 @@ AbstractController::recvTimingResp(PacketPtr pkt)
         return false;
     }
 
-    std::shared_ptr<MemoryMsg> msg = std::make_shared<MemoryMsg>(clockEdge());
+    int blk_size = m_ruby_system->getBlockSizeBytes();
+
+    std::shared_ptr<MemoryMsg> msg =
+        std::make_shared<MemoryMsg>(clockEdge(), blk_size, m_ruby_system);
     (*msg).m_addr = pkt->getAddr();
     (*msg).m_Sender = m_machineID;
 
@@ -396,7 +402,7 @@ AbstractController::recvTimingResp(PacketPtr pkt)
 
         // Copy data from the packet
         (*msg).m_DataBlk.setData(pkt->getPtr<uint8_t>(), 0,
-                                 RubySystem::getBlockSizeBytes());
+                                 m_ruby_system->getBlockSizeBytes());
     } else if (pkt->isWrite()) {
         (*msg).m_Type = MemoryRequestType_MEMORY_WB;
         (*msg).m_MessageSize = MessageSizeType_Writeback_Control;
@@ -404,7 +410,8 @@ AbstractController::recvTimingResp(PacketPtr pkt)
         panic("Incorrect packet type received from memory controller!");
     }
 
-    memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+    memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)),
+        m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());
     delete pkt;
     return true;
 }
@@ -471,6 +478,45 @@ AbstractController::sendRetryRespToMem() {
     }
 }
 
+Addr
+AbstractController::getOffset(Addr addr) const
+{
+    return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits());
+}
+
+Addr
+AbstractController::makeLineAddress(Addr addr) const
+{
+    return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits());
+}
+
+std::string
+AbstractController::printAddress(Addr addr) const
+{
+    return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits());
+}
+
+NetDest
+AbstractController::broadcast(MachineType type)
+{
+    assert(m_ruby_system != nullptr);
+    NodeID type_count = m_ruby_system->MachineType_base_count(type);
+
+    NetDest dest;
+    for (NodeID i = 0; i < type_count; i++) {
+        MachineID mach = {type, i};
+        dest.add(mach);
+    }
+    return dest;
+}
+
+int
+AbstractController::machineCount(MachineType machType)
+{
+    assert(m_ruby_system != nullptr);
+    return m_ruby_system->MachineType_base_count(machType);
+}
+
 bool
 AbstractController::MemoryPort::recvTimingResp(PacketPtr pkt)
 {
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh
index ce6a6972af..79f67073a6 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -72,6 +72,7 @@ namespace ruby
 class Network;
 class GPUCoalescer;
 class DMASequencer;
+class RubySystem;
 
 // used to communicate that an in_port peeked the wrong message type
 class RejectException: public std::exception
@@ -229,6 +230,11 @@ class AbstractController : public ClockedObject, public Consumer
     /** List of upstream destinations (towards the CPU) */
     const NetDest& allUpstreamDest() const { return upstreamDestinations; }
 
+    // Helper methods for commonly used functions called in common/address.hh
+    Addr getOffset(Addr addr) const;
+    Addr makeLineAddress(Addr addr) const;
+    std::string printAddress(Addr addr) const;
+
   protected:
     //! Profiles original cache requests including PUTs
     void profileRequest(const std::string &request);
@@ -452,6 +458,13 @@ class AbstractController : public ClockedObject, public Consumer
         {}
     };
 
+    RubySystem *m_ruby_system = nullptr;
+
+    // Formerly in RubySlicc_ComponentMapping.hh. Moved here to access
+    // RubySystem pointer.
+    NetDest broadcast(MachineType type);
+    int machineCount(MachineType machType);
+
   private:
     /** The address range to which the controller responds on the CPU side. */
     const AddrRangeList addrRanges;
diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh
index 5c824c4a38..31fb5e8e92 100644
--- a/src/mem/ruby/slicc_interface/Message.hh
+++ b/src/mem/ruby/slicc_interface/Message.hh
@@ -62,10 +62,12 @@ typedef std::shared_ptr<Message> MsgPtr;
 class Message
 {
   public:
-    Message(Tick curTime)
-        : m_time(curTime),
+    Message(Tick curTime, int block_size, const RubySystem *rs)
+        : m_block_size(block_size),
+          m_time(curTime),
           m_LastEnqueueTime(curTime),
-          m_DelayedTicks(0), m_msg_counter(0)
+          m_DelayedTicks(0), m_msg_counter(0),
+          p_ruby_system(rs)
     { }
 
     Message(const Message &other) = default;
@@ -121,6 +123,9 @@ class Message
     int getVnet() const { return vnet; }
     void setVnet(int net) { vnet = net; }
 
+  protected:
+    int m_block_size = 0;
+
   private:
     Tick m_time;
     Tick m_LastEnqueueTime; // my last enqueue time
@@ -130,6 +135,9 @@ class Message
     // Variables for required network traversal
     int incoming_link;
     int vnet;
+
+    // Needed to call MacheinType_base_count/level
+    const RubySystem *p_ruby_system = nullptr;
 };
 
 inline bool
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index a258a18f9a..58eae229be 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -86,11 +86,12 @@ class RubyRequest : public Message
     bool m_isSLCSet;
     bool m_isSecure;
 
-    RubyRequest(Tick curTime, uint64_t _paddr, int _len,
+    RubyRequest(Tick curTime, int block_size, RubySystem *rs,
+        uint64_t _paddr, int _len,
         uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
         PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No,
         ContextID _proc_id = 100, ContextID _core_id = 99)
-        : Message(curTime),
+        : Message(curTime, block_size, rs),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
           m_ProgramCounter(_pc),
@@ -99,13 +100,16 @@ class RubyRequest : public Message
           m_Prefetch(_pb),
           m_pkt(_pkt),
           m_contextId(_core_id),
+          m_writeMask(block_size),
+          m_WTData(block_size),
           m_htmFromTransaction(false),
           m_htmTransactionUid(0),
           m_isTlbi(false),
           m_tlbiTransactionUid(0),
           m_isSecure(m_pkt ? m_pkt->req->isSecure() : false)
     {
-        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        int block_size_bits = floorLog2(block_size);
+        m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits);
         if (_pkt) {
             m_isGLCSet = m_pkt->req->isGLCSet();
             m_isSLCSet = m_pkt->req->isSLCSet();
@@ -116,10 +120,10 @@ class RubyRequest : public Message
     }
 
     /** RubyRequest for memory management commands */
-    RubyRequest(Tick curTime,
+    RubyRequest(Tick curTime, int block_size, RubySystem *rs,
         uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
         PacketPtr _pkt, ContextID _proc_id, ContextID _core_id)
-        : Message(curTime),
+        : Message(curTime, block_size, rs),
           m_PhysicalAddress(0),
           m_Type(_type),
           m_ProgramCounter(_pc),
@@ -128,6 +132,8 @@ class RubyRequest : public Message
           m_Prefetch(PrefetchBit_No),
           m_pkt(_pkt),
           m_contextId(_core_id),
+          m_writeMask(block_size),
+          m_WTData(block_size),
           m_htmFromTransaction(false),
           m_htmTransactionUid(0),
           m_isTlbi(false),
@@ -144,14 +150,14 @@ class RubyRequest : public Message
         }
     }
 
-    RubyRequest(Tick curTime, uint64_t _paddr, int _len,
-        uint64_t _pc, RubyRequestType _type,
+    RubyRequest(Tick curTime, int block_size, RubySystem *rs,
+        uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type,
         RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
         unsigned _proc_id, unsigned _core_id,
         int _wm_size, std::vector<bool> & _wm_mask,
         DataBlock & _Data,
         uint64_t _instSeqNum = 0)
-        : Message(curTime),
+        : Message(curTime, block_size, rs),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
           m_ProgramCounter(_pc),
@@ -170,7 +176,8 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0),
           m_isSecure(m_pkt->req->isSecure())
     {
-        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        int block_size_bits = floorLog2(block_size);
+        m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits);
         if (_pkt) {
             m_isGLCSet = m_pkt->req->isGLCSet();
             m_isSLCSet = m_pkt->req->isSLCSet();
@@ -180,15 +187,15 @@ class RubyRequest : public Message
         }
     }
 
-    RubyRequest(Tick curTime, uint64_t _paddr, int _len,
-        uint64_t _pc, RubyRequestType _type,
+    RubyRequest(Tick curTime, int block_size, RubySystem *rs,
+        uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type,
         RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb,
         unsigned _proc_id, unsigned _core_id,
         int _wm_size, std::vector<bool> & _wm_mask,
         DataBlock & _Data,
         std::vector< std::pair<int,AtomicOpFunctor*> > _atomicOps,
         uint64_t _instSeqNum = 0)
-        : Message(curTime),
+        : Message(curTime, block_size, rs),
           m_PhysicalAddress(_paddr),
           m_Type(_type),
           m_ProgramCounter(_pc),
@@ -207,7 +214,8 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0),
           m_isSecure(m_pkt->req->isSecure())
     {
-        m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        int block_size_bits = floorLog2(block_size);
+        m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits);
         if (_pkt) {
             m_isGLCSet = m_pkt->req->isGLCSet();
             m_isSLCSet = m_pkt->req->isSLCSet();
@@ -218,7 +226,12 @@ class RubyRequest : public Message
         }
     }
 
-    RubyRequest(Tick curTime) : Message(curTime) {}
+    RubyRequest(Tick curTime, int block_size, RubySystem *rs)
+        : Message(curTime, block_size, rs),
+          m_writeMask(block_size),
+          m_WTData(block_size)
+    {
+    }
     MsgPtr clone() const
     { return std::shared_ptr<Message>(new RubyRequest(*this)); }
 
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
index 9a433d1cee..1195089fc3 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh
@@ -41,17 +41,6 @@ namespace gem5
 namespace ruby
 {
 
-inline NetDest
-broadcast(MachineType type)
-{
-    NetDest dest;
-    for (NodeID i = 0; i < MachineType_base_count(type); i++) {
-        MachineID mach = {type, i};
-        dest.add(mach);
-    }
-    return dest;
-}
-
 inline MachineID
 mapAddressToRange(Addr addr, MachineType type, int low_bit,
                   int num_bits, int cluster_id = 0)
@@ -77,12 +66,6 @@ machineIDToMachineType(MachineID machID)
     return machID.type;
 }
 
-inline int
-machineCount(MachineType machType)
-{
-    return MachineType_base_count(machType);
-}
-
 inline MachineID
 createMachineID(MachineType type, NodeID id)
 {
diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
index 8df56c7013..f4a49463a8 100644
--- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
+++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh
@@ -233,8 +233,9 @@ addressOffset(Addr addr, Addr base)
 inline bool
 testAndRead(Addr addr, DataBlock& blk, Packet *pkt)
 {
-    Addr pktLineAddr = makeLineAddress(pkt->getAddr());
-    Addr lineAddr = makeLineAddress(addr);
+    int block_size_bits = floorLog2(blk.getBlockSize());
+    Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits);
+    Addr lineAddr = makeLineAddress(addr, block_size_bits);
 
     if (pktLineAddr == lineAddr) {
         uint8_t *data = pkt->getPtr<uint8_t>();
@@ -259,8 +260,10 @@ testAndRead(Addr addr, DataBlock& blk, Packet *pkt)
 inline bool
 testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt)
 {
-    Addr pktLineAddr = makeLineAddress(pkt->getAddr());
-    Addr lineAddr = makeLineAddress(addr);
+    assert(blk.getBlockSize() == mask.getBlockSize());
+    int block_size_bits = floorLog2(blk.getBlockSize());
+    Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits);
+    Addr lineAddr = makeLineAddress(addr, block_size_bits);
 
     if (pktLineAddr == lineAddr) {
         uint8_t *data = pkt->getPtr<uint8_t>();
@@ -288,8 +291,9 @@ testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt)
 inline bool
 testAndWrite(Addr addr, DataBlock& blk, Packet *pkt)
 {
-    Addr pktLineAddr = makeLineAddress(pkt->getAddr());
-    Addr lineAddr = makeLineAddress(addr);
+    int block_size_bits = floorLog2(blk.getBlockSize());
+    Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits);
+    Addr lineAddr = makeLineAddress(addr, block_size_bits);
 
     if (pktLineAddr == lineAddr) {
         const uint8_t *data = pkt->getConstPtr<uint8_t>();
diff --git a/src/mem/ruby/structures/ALUFreeListArray.cc b/src/mem/ruby/structures/ALUFreeListArray.cc
index 87b5cbfbd2..3e25e5b599 100644
--- a/src/mem/ruby/structures/ALUFreeListArray.cc
+++ b/src/mem/ruby/structures/ALUFreeListArray.cc
@@ -57,10 +57,10 @@ namespace ruby
 *       - The same line has been accessed in the past accessLatency ticks
 */
 
-ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Tick access_latency)
+ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks)
 {
     this->numALUs = num_ALUs;
-    this->accessLatency = access_latency;
+    this->accessClocks = access_clocks;
 }
 
 bool ALUFreeListArray::tryAccess(Addr addr)
@@ -85,7 +85,7 @@ bool ALUFreeListArray::tryAccess(Addr addr)
         }
 
         // Block access if the line is already being used
-        if (record.lineAddr == makeLineAddress(addr)) {
+        if (record.lineAddr == makeLineAddress(addr, m_block_size_bits)) {
             return false;
         }
     }
@@ -99,7 +99,9 @@ void ALUFreeListArray::reserve(Addr addr)
     // the access is valid
 
     // Add record to queue
-    accessQueue.push_front(AccessRecord(makeLineAddress(addr), curTick()));
+    accessQueue.push_front(
+        AccessRecord(makeLineAddress(addr, m_block_size_bits), curTick())
+    );
 }
 
 } // namespace ruby
diff --git a/src/mem/ruby/structures/ALUFreeListArray.hh b/src/mem/ruby/structures/ALUFreeListArray.hh
index bed1b00b5c..5c4fdd95f9 100644
--- a/src/mem/ruby/structures/ALUFreeListArray.hh
+++ b/src/mem/ruby/structures/ALUFreeListArray.hh
@@ -32,6 +32,7 @@
 
 #include <deque>
 
+#include "base/intmath.hh"
 #include "mem/ruby/common/TypeDefines.hh"
 #include "sim/cur_tick.hh"
 
@@ -45,7 +46,8 @@ class ALUFreeListArray
 {
   private:
     unsigned int numALUs;
-    Tick accessLatency;
+    Cycles accessClocks;
+    Tick accessLatency = 0;
 
     class AccessRecord
     {
@@ -62,14 +64,33 @@ class ALUFreeListArray
     // Queue of accesses from past accessLatency cycles
     std::deque<AccessRecord> accessQueue;
 
+    int m_block_size_bits = 0;
+
   public:
-    ALUFreeListArray(unsigned int num_ALUs, Tick access_latency);
+    ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks);
 
     bool tryAccess(Addr addr);
 
     void reserve(Addr addr);
 
-    Tick getLatency() const { return accessLatency; }
+    Tick
+    getLatency() const
+    {
+        assert(accessLatency > 0);
+        return accessLatency;
+    }
+
+    void
+    setClockPeriod(Tick clockPeriod)
+    {
+        accessLatency = accessClocks * clockPeriod;
+    }
+
+    void
+    setBlockSize(int block_size)
+    {
+        m_block_size_bits = floorLog2(block_size);
+    }
 };
 
 } // namespace ruby
diff --git a/src/mem/ruby/structures/BankedArray.cc b/src/mem/ruby/structures/BankedArray.cc
index 0f01d5c396..2c2202dec5 100644
--- a/src/mem/ruby/structures/BankedArray.cc
+++ b/src/mem/ruby/structures/BankedArray.cc
@@ -42,8 +42,7 @@ namespace ruby
 {
 
 BankedArray::BankedArray(unsigned int banks, Cycles accessLatency,
-                         unsigned int startIndexBit, RubySystem *rs)
-    : m_ruby_system(rs)
+                         unsigned int startIndexBit)
 {
     this->banks = banks;
     this->accessLatency = accessLatency;
@@ -78,6 +77,8 @@ BankedArray::reserve(int64_t idx)
     if (accessLatency == 0)
         return;
 
+    assert(clockPeriod > 0);
+
     unsigned int bank = mapIndexToBank(idx);
     assert(bank < banks);
 
@@ -95,7 +96,7 @@ BankedArray::reserve(int64_t idx)
     busyBanks[bank].idx = idx;
     busyBanks[bank].startAccess = curTick();
     busyBanks[bank].endAccess = curTick() +
-        (accessLatency-1) * m_ruby_system->clockPeriod();
+        (accessLatency-1) * clockPeriod;
 }
 
 unsigned int
diff --git a/src/mem/ruby/structures/BankedArray.hh b/src/mem/ruby/structures/BankedArray.hh
index c757759296..ecc984a617 100644
--- a/src/mem/ruby/structures/BankedArray.hh
+++ b/src/mem/ruby/structures/BankedArray.hh
@@ -48,6 +48,7 @@ class BankedArray
   private:
     unsigned int banks;
     Cycles accessLatency;
+    Tick clockPeriod = 0;
     unsigned int bankBits;
     unsigned int startIndexBit;
     RubySystem *m_ruby_system;
@@ -69,7 +70,7 @@ class BankedArray
 
   public:
     BankedArray(unsigned int banks, Cycles accessLatency,
-                unsigned int startIndexBit, RubySystem *rs);
+                unsigned int startIndexBit);
 
     // Note: We try the access based on the cache index, not the address
     // This is so we don't get aliasing on blocks being replaced
@@ -78,6 +79,8 @@ class BankedArray
     void reserve(int64_t idx);
 
     Cycles getLatency() const { return accessLatency; }
+
+    void setClockPeriod(Tick _clockPeriod) { clockPeriod = _clockPeriod; }
 };
 
 } // namespace ruby
diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc
index 90d67fb29b..6bc35bac7d 100644
--- a/src/mem/ruby/structures/CacheMemory.cc
+++ b/src/mem/ruby/structures/CacheMemory.cc
@@ -69,12 +69,9 @@ operator<<(std::ostream& out, const CacheMemory& obj)
 
 CacheMemory::CacheMemory(const Params &p)
     : SimObject(p),
-    dataArray(p.dataArrayBanks, p.dataAccessLatency,
-              p.start_index_bit, p.ruby_system),
-    tagArray(p.tagArrayBanks, p.tagAccessLatency,
-             p.start_index_bit, p.ruby_system),
-    atomicALUArray(p.atomicALUs, p.atomicLatency *
-             p.ruby_system->clockPeriod()),
+    dataArray(p.dataArrayBanks, p.dataAccessLatency, p.start_index_bit),
+    tagArray(p.tagArrayBanks, p.tagAccessLatency, p.start_index_bit),
+    atomicALUArray(p.atomicALUs, p.atomicLatency),
     cacheMemoryStats(this)
 {
     m_cache_size = p.size;
@@ -88,12 +85,25 @@ CacheMemory::CacheMemory(const Params &p)
                                     m_replacementPolicy_ptr) ? true : false;
 }
 
+void
+CacheMemory::setRubySystem(RubySystem* rs)
+{
+    dataArray.setClockPeriod(rs->clockPeriod());
+    tagArray.setClockPeriod(rs->clockPeriod());
+    atomicALUArray.setClockPeriod(rs->clockPeriod());
+    atomicALUArray.setBlockSize(rs->getBlockSizeBytes());
+
+    if (m_block_size == 0) {
+        m_block_size = rs->getBlockSizeBytes();
+    }
+
+    m_ruby_system = rs;
+}
+
 void
 CacheMemory::init()
 {
-    if (m_block_size == 0) {
-        m_block_size = RubySystem::getBlockSizeBytes();
-    }
+    assert(m_block_size != 0);
     m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size;
     assert(m_cache_num_sets > 1);
     m_cache_num_set_bits = floorLog2(m_cache_num_sets);
@@ -286,6 +296,9 @@ CacheMemory::allocate(Addr address, AbstractCacheEntry *entry)
     assert(cacheAvail(address));
     DPRINTF(RubyCache, "allocating address: %#x\n", address);
 
+    entry->initBlockSize(m_block_size);
+    entry->setRubySystem(m_ruby_system);
+
     // Find the first open slot
     int64_t cacheSet = addressToCacheSet(address);
     std::vector<AbstractCacheEntry*> &set = m_cache[cacheSet];
diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh
index de7c327f63..912ae22d1f 100644
--- a/src/mem/ruby/structures/CacheMemory.hh
+++ b/src/mem/ruby/structures/CacheMemory.hh
@@ -154,6 +154,8 @@ class CacheMemory : public SimObject
     void htmAbortTransaction();
     void htmCommitTransaction();
 
+    void setRubySystem(RubySystem* rs);
+
   public:
     int getCacheSize() const { return m_cache_size; }
     int getCacheAssoc() const { return m_cache_assoc; }
@@ -213,6 +215,14 @@ class CacheMemory : public SimObject
      */
     bool m_use_occupancy;
 
+    RubySystem *m_ruby_system = nullptr;
+
+    Addr
+    makeLineAddress(Addr addr) const
+    {
+        return ruby::makeLineAddress(addr, floorLog2(m_block_size));
+    }
+
     private:
       struct CacheMemoryStats : public statistics::Group
       {
diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc
index 620254b82c..7469f72451 100644
--- a/src/mem/ruby/structures/DirectoryMemory.cc
+++ b/src/mem/ruby/structures/DirectoryMemory.cc
@@ -64,12 +64,14 @@ DirectoryMemory::DirectoryMemory(const Params &p)
     }
     m_size_bits = floorLog2(m_size_bytes);
     m_num_entries = 0;
+    m_block_size = p.block_size;
+    m_ruby_system = p.ruby_system;
 }
 
 void
 DirectoryMemory::init()
 {
-    m_num_entries = m_size_bytes / RubySystem::getBlockSizeBytes();
+    m_num_entries = m_size_bytes / m_block_size;
     m_entries = new AbstractCacheEntry*[m_num_entries];
     for (int i = 0; i < m_num_entries; i++)
         m_entries[i] = NULL;
@@ -108,7 +110,7 @@ DirectoryMemory::mapAddressToLocalIdx(Addr address)
         }
         ret += r.size();
     }
-    return ret >> RubySystem::getBlockSizeBits();
+    return ret >> (floorLog2(m_block_size));
 }
 
 AbstractCacheEntry*
@@ -133,6 +135,8 @@ DirectoryMemory::allocate(Addr address, AbstractCacheEntry *entry)
     assert(idx < m_num_entries);
     assert(m_entries[idx] == NULL);
     entry->changePermission(AccessPermission_Read_Only);
+    entry->initBlockSize(m_block_size);
+    entry->setRubySystem(m_ruby_system);
     m_entries[idx] = entry;
 
     return entry;
diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh
index 8a4532864d..6e77e2a4ca 100644
--- a/src/mem/ruby/structures/DirectoryMemory.hh
+++ b/src/mem/ruby/structures/DirectoryMemory.hh
@@ -104,6 +104,9 @@ class DirectoryMemory : public SimObject
     uint64_t m_size_bytes;
     uint64_t m_size_bits;
     uint64_t m_num_entries;
+    uint32_t m_block_size;
+
+    RubySystem *m_ruby_system = nullptr;
 
     /**
      * The address range for which the directory responds. Normally
diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py
index 85f05367cf..202617bceb 100644
--- a/src/mem/ruby/structures/DirectoryMemory.py
+++ b/src/mem/ruby/structures/DirectoryMemory.py
@@ -49,3 +49,7 @@ class RubyDirectoryMemory(SimObject):
     addr_ranges = VectorParam.AddrRange(
         Parent.addr_ranges, "Address range this directory responds to"
     )
+    block_size = Param.UInt32(
+        "Size of a block in bytes. Usually same as cache line size."
+    )
+    ruby_system = Param.RubySystem(Parent.any, "")
diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh
index 664d10f202..0966ca80d2 100644
--- a/src/mem/ruby/structures/PerfectCacheMemory.hh
+++ b/src/mem/ruby/structures/PerfectCacheMemory.hh
@@ -74,6 +74,8 @@ class PerfectCacheMemory
   public:
     PerfectCacheMemory();
 
+    void setBlockSize(const int block_size) { m_block_size = block_size; }
+
     // tests to see if an address is present in the cache
     bool isTagPresent(Addr address) const;
 
@@ -108,6 +110,8 @@ class PerfectCacheMemory
 
     // Data Members (m_prefix)
     std::unordered_map<Addr, PerfectCacheLineState<ENTRY> > m_map;
+
+    int m_block_size = 0;
 };
 
 template<class ENTRY>
@@ -130,7 +134,7 @@ template<class ENTRY>
 inline bool
 PerfectCacheMemory<ENTRY>::isTagPresent(Addr address) const
 {
-    return m_map.count(makeLineAddress(address)) > 0;
+    return m_map.count(makeLineAddress(address, floorLog2(m_block_size))) > 0;
 }
 
 template<class ENTRY>
@@ -149,7 +153,8 @@ PerfectCacheMemory<ENTRY>::allocate(Addr address)
     PerfectCacheLineState<ENTRY> line_state;
     line_state.m_permission = AccessPermission_Invalid;
     line_state.m_entry = ENTRY();
-    m_map[makeLineAddress(address)] = line_state;
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    m_map.emplace(line_addr, line_state);
 }
 
 // deallocate entry
@@ -157,7 +162,8 @@ template<class ENTRY>
 inline void
 PerfectCacheMemory<ENTRY>::deallocate(Addr address)
 {
-    [[maybe_unused]] auto num_erased = m_map.erase(makeLineAddress(address));
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    [[maybe_unused]] auto num_erased = m_map.erase(line_addr);
     assert(num_erased == 1);
 }
 
@@ -175,7 +181,8 @@ template<class ENTRY>
 inline ENTRY*
 PerfectCacheMemory<ENTRY>::lookup(Addr address)
 {
-    return &m_map[makeLineAddress(address)].m_entry;
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    return &m_map[line_addr].m_entry;
 }
 
 // looks an address up in the cache
@@ -183,14 +190,16 @@ template<class ENTRY>
 inline const ENTRY*
 PerfectCacheMemory<ENTRY>::lookup(Addr address) const
 {
-    return &m_map[makeLineAddress(address)].m_entry;
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    return &m_map[line_addr].m_entry;
 }
 
 template<class ENTRY>
 inline AccessPermission
 PerfectCacheMemory<ENTRY>::getPermission(Addr address) const
 {
-    return m_map[makeLineAddress(address)].m_permission;
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    return m_map[line_addr].m_permission;
 }
 
 template<class ENTRY>
@@ -198,8 +207,8 @@ inline void
 PerfectCacheMemory<ENTRY>::changePermission(Addr address,
                                             AccessPermission new_perm)
 {
-    Addr line_address = makeLineAddress(address);
-    PerfectCacheLineState<ENTRY>& line_state = m_map[line_address];
+    Addr line_addr = makeLineAddress(address, floorLog2(m_block_size));
+    PerfectCacheLineState<ENTRY>& line_state = m_map[line_addr];
     line_state.m_permission = new_perm;
 }
 
diff --git a/src/mem/ruby/structures/PersistentTable.hh b/src/mem/ruby/structures/PersistentTable.hh
index 5382269273..1162e1dda1 100644
--- a/src/mem/ruby/structures/PersistentTable.hh
+++ b/src/mem/ruby/structures/PersistentTable.hh
@@ -63,6 +63,12 @@ class PersistentTable
     // Destructor
     ~PersistentTable();
 
+    void
+    setBlockSize(int block_size)
+    {
+        m_block_size_bits = floorLog2(block_size);
+    }
+
     // Public Methods
     void persistentRequestLock(Addr address, MachineID locker,
                                AccessType type);
@@ -82,9 +88,17 @@ class PersistentTable
     PersistentTable(const PersistentTable& obj);
     PersistentTable& operator=(const PersistentTable& obj);
 
+    int m_block_size_bits = 0;
+
     // Data Members (m_prefix)
     typedef std::unordered_map<Addr, PersistentTableEntry> AddressMap;
     AddressMap m_map;
+
+    Addr
+    makeLineAddress(Addr addr) const
+    {
+        return ruby::makeLineAddress(addr, m_block_size_bits);
+    }
 };
 
 inline std::ostream&
diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py
index 2f457f5c4a..4b1023fc61 100644
--- a/src/mem/ruby/structures/RubyCache.py
+++ b/src/mem/ruby/structures/RubyCache.py
@@ -54,4 +54,3 @@ class RubyCache(SimObject):
     dataAccessLatency = Param.Cycles(1, "cycles for a data array access")
     tagAccessLatency = Param.Cycles(1, "cycles for a tag array access")
     resourceStalls = Param.Bool(False, "stall if there is a resource failure")
-    ruby_system = Param.RubySystem(Parent.any, "")
diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc
index e45eff2c2f..bffcfe2327 100644
--- a/src/mem/ruby/structures/RubyPrefetcher.cc
+++ b/src/mem/ruby/structures/RubyPrefetcher.cc
@@ -56,13 +56,15 @@ namespace ruby
 
 RubyPrefetcher::RubyPrefetcher(const Params &p)
     : SimObject(p), m_num_streams(p.num_streams),
-    m_array(p.num_streams), m_train_misses(p.train_misses),
+    m_array(p.num_streams, p.block_size), m_train_misses(p.train_misses),
     m_num_startup_pfs(p.num_startup_pfs),
     unitFilter(p.unit_filter),
     negativeFilter(p.unit_filter),
     nonUnitFilter(p.nonunit_filter),
     m_prefetch_cross_pages(p.cross_page),
     pageShift(p.page_shift),
+    m_block_size_bits(floorLog2(p.block_size)),
+    m_block_size_bytes(p.block_size),
     rubyPrefetcherStats(this)
 {
     assert(m_num_streams > 0);
@@ -90,7 +92,7 @@ void
 RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type)
 {
     DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address);
-    Addr line_addr = makeLineAddress(address);
+    Addr line_addr = makeLineAddress(address, m_block_size_bits);
     rubyPrefetcherStats.numMissObserved++;
 
     // check to see if we have already issued a prefetch for this block
@@ -214,7 +216,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride,
 
     // initialize the stream prefetcher
     PrefetchEntry *mystream = &(m_array[index]);
-    mystream->m_address = makeLineAddress(address);
+    mystream->m_address = makeLineAddress(address, m_block_size_bits);
     mystream->m_stride = stride;
     mystream->m_use_time = m_controller->curCycle();
     mystream->m_is_valid = true;
@@ -222,7 +224,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride,
 
     // create a number of initial prefetches for this stream
     Addr page_addr = pageAddress(mystream->m_address);
-    Addr line_addr = makeLineAddress(mystream->m_address);
+    Addr line_addr = makeLineAddress(mystream->m_address, m_block_size_bits);
 
     // insert a number of prefetches into the prefetch table
     for (int k = 0; k < m_num_startup_pfs; k++) {
@@ -312,8 +314,7 @@ RubyPrefetcher::accessNonunitFilter(Addr line_addr,
                         // This stride HAS to be the multiplicative constant of
                         // dataBlockBytes (bc makeNextStrideAddress is
                         // calculated based on this multiplicative constant!)
-                        const int stride = entry.stride /
-                            RubySystem::getBlockSizeBytes();
+                        const int stride = entry.stride / m_block_size_bytes;
 
                         // clear this filter entry
                         entry.clear();
diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh
index 51e1b3c480..5627410713 100644
--- a/src/mem/ruby/structures/RubyPrefetcher.hh
+++ b/src/mem/ruby/structures/RubyPrefetcher.hh
@@ -68,10 +68,10 @@ class PrefetchEntry
 {
     public:
         /// constructor
-        PrefetchEntry()
+        PrefetchEntry(int block_size)
         {
             // default: 1 cache-line stride
-            m_stride   = (1 << RubySystem::getBlockSizeBits());
+            m_stride   = (1 << floorLog2(block_size));
             m_use_time = Cycles(0);
             m_is_valid = false;
         }
@@ -239,6 +239,16 @@ class RubyPrefetcher : public SimObject
 
         const unsigned pageShift;
 
+        int m_block_size_bits = 0;
+        int m_block_size_bytes = 0;
+
+        Addr
+        makeNextStrideAddress(Addr addr, int stride) const
+        {
+            return ruby::makeNextStrideAddress(addr, stride,
+                                               m_block_size_bytes);
+        }
+
         struct RubyPrefetcherStats : public statistics::Group
         {
             RubyPrefetcherStats(statistics::Group *parent);
diff --git a/src/mem/ruby/structures/RubyPrefetcher.py b/src/mem/ruby/structures/RubyPrefetcher.py
index d4189ae7d5..155b7c314d 100644
--- a/src/mem/ruby/structures/RubyPrefetcher.py
+++ b/src/mem/ruby/structures/RubyPrefetcher.py
@@ -62,6 +62,9 @@ class RubyPrefetcher(SimObject):
     page_shift = Param.UInt32(
         12, "Number of bits to mask to get a page number"
     )
+    block_size = Param.UInt32(
+        "Size of block to prefetch, usually cache line size"
+    )
 
 
 class Prefetcher(RubyPrefetcher):
diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.cc b/src/mem/ruby/structures/RubyPrefetcherProxy.cc
index 2a29fbc88e..a6fed8258c 100644
--- a/src/mem/ruby/structures/RubyPrefetcherProxy.cc
+++ b/src/mem/ruby/structures/RubyPrefetcherProxy.cc
@@ -66,7 +66,7 @@ RubyPrefetcherProxy::RubyPrefetcherProxy(AbstractController* _parent,
         prefetcher->setParentInfo(
             cacheCntrl->params().system,
             cacheCntrl->getProbeManager(),
-            RubySystem::getBlockSizeBytes());
+            cacheCntrl->m_ruby_system->getBlockSizeBytes());
     }
 }
 
@@ -112,7 +112,7 @@ RubyPrefetcherProxy::issuePrefetch()
 
         if (pkt) {
             DPRINTF(HWPrefetch, "Next prefetch ready %s\n", pkt->print());
-            unsigned blk_size = RubySystem::getBlockSizeBytes();
+            unsigned blk_size = cacheCntrl->m_ruby_system->getBlockSizeBytes();
             Addr line_addr = pkt->getBlockAddr(blk_size);
 
             if (issuedPfPkts.count(line_addr) == 0) {
@@ -126,6 +126,8 @@ RubyPrefetcherProxy::issuePrefetch()
 
                 std::shared_ptr<RubyRequest> msg =
                     std::make_shared<RubyRequest>(cacheCntrl->clockEdge(),
+                                                  blk_size,
+                                                  cacheCntrl->m_ruby_system,
                                                   pkt->getAddr(),
                                                   blk_size,
                                                   0, // pc
@@ -136,7 +138,10 @@ RubyPrefetcherProxy::issuePrefetch()
 
                 // enqueue request into prefetch queue to the cache
                 pfQueue->enqueue(msg, cacheCntrl->clockEdge(),
-                                    cacheCntrl->cyclesToTicks(Cycles(1)));
+                                 cacheCntrl->cyclesToTicks(Cycles(1)),
+                                 cacheCntrl->m_ruby_system->getRandomization(),
+                                 cacheCntrl->m_ruby_system->getWarmupEnabled()
+                                );
 
                 // track all pending PF requests
                 issuedPfPkts[line_addr] = pkt;
@@ -230,5 +235,19 @@ RubyPrefetcherProxy::regProbePoints()
             cacheCntrl->getProbeManager(), "Data Update");
 }
 
+Addr
+RubyPrefetcherProxy::makeLineAddress(Addr addr) const
+{
+    return ruby::makeLineAddress(addr,
+                           cacheCntrl->m_ruby_system->getBlockSizeBits());
+}
+
+Addr
+RubyPrefetcherProxy::getOffset(Addr addr) const
+{
+    return ruby::getOffset(addr,
+                           cacheCntrl->m_ruby_system->getBlockSizeBits());
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.hh b/src/mem/ruby/structures/RubyPrefetcherProxy.hh
index 34c40154b6..e7c044edf8 100644
--- a/src/mem/ruby/structures/RubyPrefetcherProxy.hh
+++ b/src/mem/ruby/structures/RubyPrefetcherProxy.hh
@@ -142,6 +142,9 @@ class RubyPrefetcherProxy : public CacheAccessor, public Named
      */
     ProbePointArg<CacheDataUpdateProbeArg> *ppDataUpdate;
 
+    Addr makeLineAddress(Addr addr) const;
+    Addr getOffset(Addr addr) const;
+
   public:
 
     /** Accessor functions */
diff --git a/src/mem/ruby/structures/TBETable.hh b/src/mem/ruby/structures/TBETable.hh
index 9030d52d9f..72770ce42f 100644
--- a/src/mem/ruby/structures/TBETable.hh
+++ b/src/mem/ruby/structures/TBETable.hh
@@ -70,6 +70,8 @@ class TBETable
         return (m_number_of_TBEs - m_map.size()) >= n;
     }
 
+    void setBlockSize(const int block_size) { m_block_size = block_size; }
+
     ENTRY *getNullEntry();
     ENTRY *lookup(Addr address);
 
@@ -85,7 +87,8 @@ class TBETable
     std::unordered_map<Addr, ENTRY> m_map;
 
   private:
-    int m_number_of_TBEs;
+    int m_number_of_TBEs = 0;
+    int m_block_size = 0;
 };
 
 template<class ENTRY>
@@ -101,7 +104,7 @@ template<class ENTRY>
 inline bool
 TBETable<ENTRY>::isPresent(Addr address) const
 {
-    assert(address == makeLineAddress(address));
+    assert(address == makeLineAddress(address, floorLog2(m_block_size)));
     assert(m_map.size() <= m_number_of_TBEs);
     return !!m_map.count(address);
 }
@@ -112,7 +115,8 @@ TBETable<ENTRY>::allocate(Addr address)
 {
     assert(!isPresent(address));
     assert(m_map.size() < m_number_of_TBEs);
-    m_map[address] = ENTRY();
+    assert(m_block_size > 0);
+    m_map.emplace(address, ENTRY(m_block_size));
 }
 
 template<class ENTRY>
diff --git a/src/mem/ruby/structures/TimerTable.cc b/src/mem/ruby/structures/TimerTable.cc
index f8f24dbfc0..a9ce92252e 100644
--- a/src/mem/ruby/structures/TimerTable.cc
+++ b/src/mem/ruby/structures/TimerTable.cc
@@ -70,7 +70,7 @@ TimerTable::nextAddress() const
 void
 TimerTable::set(Addr address, Tick ready_time)
 {
-    assert(address == makeLineAddress(address));
+    assert(address == makeLineAddress(address, m_block_size_bits));
     assert(!m_map.count(address));
 
     m_map[address] = ready_time;
@@ -87,7 +87,7 @@ TimerTable::set(Addr address, Tick ready_time)
 void
 TimerTable::unset(Addr address)
 {
-    assert(address == makeLineAddress(address));
+    assert(address == makeLineAddress(address, m_block_size_bits));
     assert(m_map.count(address));
     m_map.erase(address);
 
diff --git a/src/mem/ruby/structures/TimerTable.hh b/src/mem/ruby/structures/TimerTable.hh
index e676359fd4..92c485ab57 100644
--- a/src/mem/ruby/structures/TimerTable.hh
+++ b/src/mem/ruby/structures/TimerTable.hh
@@ -48,6 +48,12 @@ class TimerTable
   public:
     TimerTable();
 
+    void
+    setBlockSize(int block_size)
+    {
+        m_block_size_bits = floorLog2(block_size);
+    }
+
     void
     setConsumer(Consumer* consumer_ptr)
     {
@@ -88,6 +94,8 @@ class TimerTable
     //! Consumer to signal a wakeup()
     Consumer* m_consumer_ptr;
 
+    int m_block_size_bits = 0;
+
     std::string m_name;
 };
 
diff --git a/src/mem/ruby/structures/WireBuffer.cc b/src/mem/ruby/structures/WireBuffer.cc
index a839fe7cc7..3ebbe2a305 100644
--- a/src/mem/ruby/structures/WireBuffer.cc
+++ b/src/mem/ruby/structures/WireBuffer.cc
@@ -36,7 +36,6 @@
 
 #include "base/cprintf.hh"
 #include "base/stl_helpers.hh"
-#include "mem/ruby/system/RubySystem.hh"
 
 namespace gem5
 {
@@ -74,7 +73,8 @@ WireBuffer::~WireBuffer()
 }
 
 void
-WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta)
+WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta,
+                    bool /*ruby_is_random*/, bool /*ruby_warmup*/)
 {
     m_msg_counter++;
     Tick arrival_time = current_time + delta;
diff --git a/src/mem/ruby/structures/WireBuffer.hh b/src/mem/ruby/structures/WireBuffer.hh
index b26043b09a..75dfc154c8 100644
--- a/src/mem/ruby/structures/WireBuffer.hh
+++ b/src/mem/ruby/structures/WireBuffer.hh
@@ -78,7 +78,10 @@ class WireBuffer : public SimObject
     void setDescription(const std::string& name) { m_description = name; };
     std::string getDescription() { return m_description; };
 
-    void enqueue(MsgPtr message, Tick current_time, Tick delta);
+    // ruby_is_random and ruby_warmup are not used, but this method signature
+    // must match that of MessageBuffer.
+    void enqueue(MsgPtr message, Tick current_time, Tick delta,
+                 bool ruby_is_random = false, bool ruby_warmup = false);
     void dequeue(Tick current_time);
     const Message* peek();
     void recycle(Tick current_time, Tick recycle_latency);
diff --git a/src/mem/ruby/structures/WireBuffer.py b/src/mem/ruby/structures/WireBuffer.py
index ca67e7cb31..8cb2cfe4d6 100644
--- a/src/mem/ruby/structures/WireBuffer.py
+++ b/src/mem/ruby/structures/WireBuffer.py
@@ -35,5 +35,3 @@ class RubyWireBuffer(SimObject):
     type = "RubyWireBuffer"
     cxx_class = "gem5::ruby::WireBuffer"
     cxx_header = "mem/ruby/structures/WireBuffer.hh"
-
-    ruby_system = Param.RubySystem(Parent.any, "")
diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc
index 3326856849..426c604cb0 100644
--- a/src/mem/ruby/system/CacheRecorder.cc
+++ b/src/mem/ruby/system/CacheRecorder.cc
@@ -49,31 +49,25 @@ TraceRecord::print(std::ostream& out) const
         << m_type << ", Time: " << m_time << "]";
 }
 
-CacheRecorder::CacheRecorder()
-    : m_uncompressed_trace(NULL),
-      m_uncompressed_trace_size(0),
-      m_block_size_bytes(RubySystem::getBlockSizeBytes())
-{
-}
-
 CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
                              uint64_t uncompressed_trace_size,
                              std::vector<RubyPort*>& ruby_port_map,
-                             uint64_t block_size_bytes)
+                             uint64_t trace_block_size_bytes,
+                             uint64_t system_block_size_bytes)
     : m_uncompressed_trace(uncompressed_trace),
       m_uncompressed_trace_size(uncompressed_trace_size),
       m_ruby_port_map(ruby_port_map), m_bytes_read(0),
       m_records_read(0), m_records_flushed(0),
-      m_block_size_bytes(block_size_bytes)
+      m_block_size_bytes(trace_block_size_bytes)
 
 {
     if (m_uncompressed_trace != NULL) {
-        if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) {
+        if (m_block_size_bytes < system_block_size_bytes) {
             // Block sizes larger than when the trace was recorded are not
             // supported, as we cannot reliably turn accesses to smaller blocks
             // into larger ones.
             panic("Recorded cache block size (%d) < current block size (%d) !!",
-                    m_block_size_bytes, RubySystem::getBlockSizeBytes());
+                    m_block_size_bytes, system_block_size_bytes);
         }
     }
 }
@@ -125,7 +119,7 @@ CacheRecorder::enqueueNextFetchRequest()
         DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord);
 
         for (int rec_bytes_read = 0; rec_bytes_read < m_block_size_bytes;
-                rec_bytes_read += RubySystem::getBlockSizeBytes()) {
+                rec_bytes_read += m_block_size_bytes) {
             RequestPtr req;
             MemCmd::Command requestType;
 
@@ -133,19 +127,19 @@ CacheRecorder::enqueueNextFetchRequest()
                 requestType = MemCmd::ReadReq;
                 req = std::make_shared<Request>(
                     traceRecord->m_data_address + rec_bytes_read,
-                    RubySystem::getBlockSizeBytes(), 0,
+                    m_block_size_bytes, 0,
                                     Request::funcRequestorId);
             }   else if (traceRecord->m_type == RubyRequestType_IFETCH) {
                 requestType = MemCmd::ReadReq;
                 req = std::make_shared<Request>(
                         traceRecord->m_data_address + rec_bytes_read,
-                        RubySystem::getBlockSizeBytes(),
+                        m_block_size_bytes,
                         Request::INST_FETCH, Request::funcRequestorId);
             }   else {
                 requestType = MemCmd::WriteReq;
                 req = std::make_shared<Request>(
                     traceRecord->m_data_address + rec_bytes_read,
-                    RubySystem::getBlockSizeBytes(), 0,
+                    m_block_size_bytes, 0,
                                 Request::funcRequestorId);
             }
 
diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh
index 021da6a4da..982e8b0592 100644
--- a/src/mem/ruby/system/CacheRecorder.hh
+++ b/src/mem/ruby/system/CacheRecorder.hh
@@ -73,13 +73,15 @@ class TraceRecord
 class CacheRecorder
 {
   public:
-    CacheRecorder();
-    ~CacheRecorder();
-
+    // Construction requires block size.
+    CacheRecorder() = delete;
     CacheRecorder(uint8_t* uncompressed_trace,
                   uint64_t uncompressed_trace_size,
                   std::vector<RubyPort*>& ruby_port_map,
-                  uint64_t block_size_bytes);
+                  uint64_t trace_block_size_bytes,
+                  uint64_t system_block_size_bytes);
+    ~CacheRecorder();
+
     void addRecord(int cntrl, Addr data_addr, Addr pc_addr,
                    RubyRequestType type, Tick time, DataBlock& data);
 
diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc
index aa3fc66814..cd9d62d12a 100644
--- a/src/mem/ruby/system/DMASequencer.cc
+++ b/src/mem/ruby/system/DMASequencer.cc
@@ -73,7 +73,7 @@ void
 DMASequencer::init()
 {
     RubyPort::init();
-    m_data_block_mask = mask(RubySystem::getBlockSizeBits());
+    m_data_block_mask = mask(m_ruby_system->getBlockSizeBits());
 }
 
 RequestStatus
@@ -110,8 +110,10 @@ DMASequencer::makeRequest(PacketPtr pkt)
 
     DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len);
 
+    int blk_size = m_ruby_system->getBlockSizeBytes();
+
     std::shared_ptr<SequencerMsg> msg =
-        std::make_shared<SequencerMsg>(clockEdge());
+        std::make_shared<SequencerMsg>(clockEdge(), blk_size, m_ruby_system);
     msg->getPhysicalAddress() = paddr;
     msg->getLineAddress() = line_addr;
 
@@ -145,8 +147,8 @@ DMASequencer::makeRequest(PacketPtr pkt)
 
     int offset = paddr & m_data_block_mask;
 
-    msg->getLen() = (offset + len) <= RubySystem::getBlockSizeBytes() ?
-        len : RubySystem::getBlockSizeBytes() - offset;
+    msg->getLen() = (offset + len) <= m_ruby_system->getBlockSizeBytes() ?
+        len : m_ruby_system->getBlockSizeBytes() - offset;
 
     if (write && (data != NULL)) {
         if (active_request.data != NULL) {
@@ -157,7 +159,8 @@ DMASequencer::makeRequest(PacketPtr pkt)
     m_outstanding_count++;
 
     assert(m_mandatory_q_ptr != NULL);
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)),
+        m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());
     active_request.bytes_issued += msg->getLen();
 
     return RequestStatus_Issued;
@@ -183,8 +186,10 @@ DMASequencer::issueNext(const Addr& address)
         return;
     }
 
+    int blk_size = m_ruby_system->getBlockSizeBytes();
+
     std::shared_ptr<SequencerMsg> msg =
-        std::make_shared<SequencerMsg>(clockEdge());
+        std::make_shared<SequencerMsg>(clockEdge(), blk_size, m_ruby_system);
     msg->getPhysicalAddress() = active_request.start_paddr +
                                 active_request.bytes_completed;
 
@@ -196,9 +201,9 @@ DMASequencer::issueNext(const Addr& address)
 
     msg->getLen() =
         (active_request.len -
-         active_request.bytes_completed < RubySystem::getBlockSizeBytes() ?
+         active_request.bytes_completed < m_ruby_system->getBlockSizeBytes() ?
          active_request.len - active_request.bytes_completed :
-         RubySystem::getBlockSizeBytes());
+         m_ruby_system->getBlockSizeBytes());
 
     if (active_request.write) {
         msg->getDataBlk().
@@ -207,7 +212,8 @@ DMASequencer::issueNext(const Addr& address)
     }
 
     assert(m_mandatory_q_ptr != NULL);
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)));
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)),
+        m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());
     active_request.bytes_issued += msg->getLen();
     DPRINTF(RubyDma,
             "DMA request bytes issued %d, bytes completed %d, total len %d\n",
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
index 072c63efd7..4d66dc6c1b 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -142,8 +142,8 @@ UncoalescedTable::updateResources()
             // are accessed directly using the makeRequest() command
             // instead of accessing through the port. This makes
             // sending tokens through the port unnecessary
-            if (!RubySystem::getWarmupEnabled()
-                    && !RubySystem::getCooldownEnabled()) {
+            if (!coalescer->getRubySystem()->getWarmupEnabled() &&
+                !coalescer->getRubySystem()->getCooldownEnabled()) {
                 if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) {
                     DPRINTF(GPUCoalescer,
                             "Returning token seqNum %d\n", seq_num);
@@ -177,7 +177,7 @@ UncoalescedTable::printRequestTable(std::stringstream& ss)
     ss << "Listing pending packets from " << instMap.size() << " instructions";
 
     for (auto& inst : instMap) {
-        ss << "\tAddr: " << printAddress(inst.first) << " with "
+        ss << "\tAddr: " << coalescer->printAddress(inst.first) << " with "
            << inst.second.size() << " pending packets" << std::endl;
     }
 }
@@ -590,7 +590,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
         // When the Ruby system is cooldown phase, the requests come from
         // the cache recorder. These requests do not get coalesced and
         // do not return valid data.
-        if (RubySystem::getCooldownEnabled())
+        if (m_ruby_system->getCooldownEnabled())
             continue;
 
         if (pkt->getPtr<uint8_t>()) {
@@ -700,8 +700,8 @@ GPUCoalescer::makeRequest(PacketPtr pkt)
         // When Ruby is in warmup or cooldown phase, the requests come from
         // the cache recorder. There is no dynamic instruction associated
         // with these requests either
-        if (!RubySystem::getWarmupEnabled()
-                && !RubySystem::getCooldownEnabled()) {
+        if (!m_ruby_system->getWarmupEnabled()
+                && !m_ruby_system->getCooldownEnabled()) {
             if (!m_usingRubyTester) {
                 num_packets = 0;
                 for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) {
@@ -985,8 +985,8 @@ GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
         // When Ruby is in warmup or cooldown phase, the requests come
         // from the cache recorder. They do not track which port to use
         // and do not need to send the response back
-        if (!RubySystem::getWarmupEnabled()
-                && !RubySystem::getCooldownEnabled()) {
+        if (!m_ruby_system->getWarmupEnabled()
+                && !m_ruby_system->getCooldownEnabled()) {
             RubyPort::SenderState *ss =
                 safe_cast<RubyPort::SenderState *>(pkt->senderState);
             MemResponsePort *port = ss->port;
@@ -1015,9 +1015,9 @@ GPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist)
     }
 
     RubySystem *rs = m_ruby_system;
-    if (RubySystem::getWarmupEnabled()) {
+    if (m_ruby_system->getWarmupEnabled()) {
         rs->m_cache_recorder->enqueueNextFetchRequest();
-    } else if (RubySystem::getCooldownEnabled()) {
+    } else if (m_ruby_system->getCooldownEnabled()) {
         rs->m_cache_recorder->enqueueNextFlushRequest();
     } else {
         testDrainComplete();
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
index 42efe41cb7..08412baad1 100644
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -341,6 +341,8 @@ class GPUCoalescer : public RubyPort
 
     void insertKernel(int wavefront_id, PacketPtr pkt);
 
+    RubySystem *getRubySystem() { return m_ruby_system; }
+
     GMTokenPort& getGMTokenPort() { return gmTokenPort; }
 
     statistics::Histogram& getOutstandReqHist() { return m_outstandReqHist; }
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 2630a6a27c..127f3c7802 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -326,6 +326,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
         panic("Ruby supports atomic accesses only in noncaching mode\n");
     }
 
+    RubySystem *rs = owner.m_ruby_system;
+
     // Check for pio requests and directly send them to the dedicated
     // pio port.
     if (pkt->cmd != MemCmd::MemSyncReq) {
@@ -343,12 +345,11 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
             return owner.ticksToCycles(req_ticks);
         }
 
-        assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
-               RubySystem::getBlockSizeBytes());
+        assert(owner.getOffset(pkt->getAddr()) + pkt->getSize() <=
+               rs->getBlockSizeBytes());
     }
 
     // Find the machine type of memory controller interface
-    RubySystem *rs = owner.m_ruby_system;
     static int mem_interface_type = -1;
     if (mem_interface_type == -1) {
         if (rs->m_abstract_controls[MachineType_Directory].size() != 0) {
@@ -404,7 +405,7 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt)
     }
 
     assert(pkt->getAddr() + pkt->getSize() <=
-           makeLineAddress(pkt->getAddr()) + RubySystem::getBlockSizeBytes());
+           owner.makeLineAddress(pkt->getAddr()) + rs->getBlockSizeBytes());
 
     if (access_backing_store) {
         // The attached physmem contains the official version of data.
@@ -501,7 +502,7 @@ RubyPort::ruby_stale_translation_callback(Addr txnId)
     // assumed they will not be modified or deleted by receivers.
     // TODO: should this really be using funcRequestorId?
     auto request = std::make_shared<Request>(
-        0, RubySystem::getBlockSizeBytes(), Request::TLBI_EXT_SYNC,
+        0, m_ruby_system->getBlockSizeBytes(), Request::TLBI_EXT_SYNC,
         Request::funcRequestorId);
     // Store the txnId in extraData instead of the address
     request->setExtraData(txnId);
@@ -701,7 +702,7 @@ RubyPort::ruby_eviction_callback(Addr address)
     // assumed they will not be modified or deleted by receivers.
     // TODO: should this really be using funcRequestorId?
     auto request = std::make_shared<Request>(
-        address, RubySystem::getBlockSizeBytes(), 0,
+        address, m_ruby_system->getBlockSizeBytes(), 0,
         Request::funcRequestorId);
 
     // Use a single packet to signal all snooping ports of the invalidation.
@@ -739,5 +740,23 @@ RubyPort::functionalWrite(Packet *func_pkt)
     return num_written;
 }
 
+Addr
+RubyPort::getOffset(Addr addr) const
+{
+    return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits());
+}
+
+Addr
+RubyPort::makeLineAddress(Addr addr) const
+{
+    return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits());
+}
+
+std::string
+RubyPort::printAddress(Addr addr) const
+{
+    return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits());
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index 66fe0a7686..39535930b3 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -181,6 +181,11 @@ class RubyPort : public ClockedObject
 
     virtual int functionalWrite(Packet *func_pkt);
 
+    // Helper methods for commonly used functions called in common/address.hh
+    Addr getOffset(Addr addr) const;
+    Addr makeLineAddress(Addr addr) const;
+    std::string printAddress(Addr addr) const;
+
   protected:
     void trySendRetries();
     void ruby_hit_callback(PacketPtr pkt);
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index 21062eac14..fd7b262cb1 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -66,15 +66,8 @@ namespace gem5
 namespace ruby
 {
 
-bool RubySystem::m_randomization;
-uint32_t RubySystem::m_block_size_bytes;
-uint32_t RubySystem::m_block_size_bits;
-uint32_t RubySystem::m_memory_size_bits;
-bool RubySystem::m_warmup_enabled = false;
 // To look forward to allowing multiple RubySystem instances, track the number
 // of RubySystems that need to be warmed up on checkpoint restore.
-unsigned RubySystem::m_systems_to_warmup = 0;
-bool RubySystem::m_cooldown_enabled = false;
 
 RubySystem::RubySystem(const Params &p)
     : ClockedObject(p), m_access_backing_store(p.access_backing_store),
@@ -212,8 +205,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace,
 
     // Create the CacheRecorder and record the cache trace
     m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
-                                         ruby_port_map,
-                                         block_size_bytes);
+                                         ruby_port_map, block_size_bytes,
+                                         m_block_size_bytes);
 }
 
 void
@@ -331,7 +324,7 @@ RubySystem::serialize(CheckpointOut &cp) const
     // Store the cache-block size, so we are able to restore on systems
     // with a different cache-block size. CacheRecorder depends on the
     // correct cache-block size upon unserializing.
-    uint64_t block_size_bytes = getBlockSizeBytes();
+    uint64_t block_size_bytes = m_block_size_bytes;
     SERIALIZE_SCALAR(block_size_bytes);
 
     // Check that there's a valid trace to use.  If not, then memory won't
@@ -416,7 +409,6 @@ RubySystem::unserialize(CheckpointIn &cp)
     readCompressedTrace(cache_trace_file, uncompressed_trace,
                         cache_trace_size);
     m_warmup_enabled = true;
-    m_systems_to_warmup++;
 
     // Create the cache recorder that will hang around until startup.
     makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes);
@@ -467,10 +459,7 @@ RubySystem::startup()
 
         delete m_cache_recorder;
         m_cache_recorder = NULL;
-        m_systems_to_warmup--;
-        if (m_systems_to_warmup == 0) {
-            m_warmup_enabled = false;
-        }
+        m_warmup_enabled = false;
 
         // Restore eventq head
         eventq->replaceHead(eventq_head);
@@ -509,7 +498,7 @@ bool
 RubySystem::functionalRead(PacketPtr pkt)
 {
     Addr address(pkt->getAddr());
-    Addr line_address = makeLineAddress(address);
+    Addr line_address = makeLineAddress(address, m_block_size_bits);
 
     AccessPermission access_perm = AccessPermission_NotPresent;
 
@@ -625,7 +614,7 @@ bool
 RubySystem::functionalRead(PacketPtr pkt)
 {
     Addr address(pkt->getAddr());
-    Addr line_address = makeLineAddress(address);
+    Addr line_address = makeLineAddress(address, m_block_size_bits);
 
     DPRINTF(RubySystem, "Functional Read request for %#x\n", address);
 
@@ -726,7 +715,7 @@ bool
 RubySystem::functionalWrite(PacketPtr pkt)
 {
     Addr addr(pkt->getAddr());
-    Addr line_addr = makeLineAddress(addr);
+    Addr line_addr = makeLineAddress(addr, m_block_size_bits);
     AccessPermission access_perm = AccessPermission_NotPresent;
 
     DPRINTF(RubySystem, "Functional Write request for %#x\n", addr);
diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh
index e16d699204..7e18770230 100644
--- a/src/mem/ruby/system/RubySystem.hh
+++ b/src/mem/ruby/system/RubySystem.hh
@@ -68,12 +68,12 @@ class RubySystem : public ClockedObject
     ~RubySystem();
 
     // config accessors
-    static int getRandomization() { return m_randomization; }
-    static uint32_t getBlockSizeBytes() { return m_block_size_bytes; }
-    static uint32_t getBlockSizeBits() { return m_block_size_bits; }
-    static uint32_t getMemorySizeBits() { return m_memory_size_bits; }
-    static bool getWarmupEnabled() { return m_warmup_enabled; }
-    static bool getCooldownEnabled() { return m_cooldown_enabled; }
+    int getRandomization() { return m_randomization; }
+    uint32_t getBlockSizeBytes() { return m_block_size_bytes; }
+    uint32_t getBlockSizeBits() { return m_block_size_bits; }
+    uint32_t getMemorySizeBits() { return m_memory_size_bits; }
+    bool getWarmupEnabled() { return m_warmup_enabled; }
+    bool getCooldownEnabled() { return m_cooldown_enabled; }
 
     memory::SimpleMemory *getPhysMem() { return m_phys_mem; }
     Cycles getStartCycle() { return m_start_cycle; }
@@ -134,14 +134,13 @@ class RubySystem : public ClockedObject
     void processRubyEvent();
   private:
     // configuration parameters
-    static bool m_randomization;
-    static uint32_t m_block_size_bytes;
-    static uint32_t m_block_size_bits;
-    static uint32_t m_memory_size_bits;
+    bool m_randomization;
+    uint32_t m_block_size_bytes;
+    uint32_t m_block_size_bits;
+    uint32_t m_memory_size_bits;
 
-    static bool m_warmup_enabled;
-    static unsigned m_systems_to_warmup;
-    static bool m_cooldown_enabled;
+    bool m_warmup_enabled = false;
+    bool m_cooldown_enabled = false;
     memory::SimpleMemory *m_phys_mem;
     const bool m_access_backing_store;
 
@@ -158,6 +157,11 @@ class RubySystem : public ClockedObject
     Profiler* m_profiler;
     CacheRecorder* m_cache_recorder;
     std::vector<std::map<uint32_t, AbstractController *> > m_abstract_controls;
+    std::map<MachineType, uint32_t> m_num_controllers;
+
+    // These are auto-generated by SLICC based on the built protocol.
+    int MachineType_base_count(const MachineType& obj);
+    int MachineType_base_number(const MachineType& obj);
 };
 
 } // namespace ruby
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 4b0c6a239c..e2f49f5dff 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -73,6 +73,8 @@ Sequencer::Sequencer(const Params &p)
 {
     m_outstanding_count = 0;
 
+    m_ruby_system = p.ruby_system;
+
     m_dataCache_ptr = p.dcache;
     m_max_outstanding_requests = p.max_outstanding_requests;
     m_deadlock_threshold = p.deadlock_threshold;
@@ -726,7 +728,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
                          printAddress(request_address));
 
     // update the data unless it is a non-data-carrying flush
-    if (RubySystem::getWarmupEnabled()) {
+    if (m_ruby_system->getWarmupEnabled()) {
         data.setData(pkt);
     } else if (!pkt->isFlush()) {
         if ((type == RubyRequestType_LD) ||
@@ -782,11 +784,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
     }
 
     RubySystem *rs = m_ruby_system;
-    if (RubySystem::getWarmupEnabled()) {
+    if (m_ruby_system->getWarmupEnabled()) {
         assert(pkt->req);
         delete pkt;
         rs->m_cache_recorder->enqueueNextFetchRequest();
-    } else if (RubySystem::getCooldownEnabled()) {
+    } else if (m_ruby_system->getCooldownEnabled()) {
         delete pkt;
         rs->m_cache_recorder->enqueueNextFlushRequest();
     } else {
@@ -852,8 +854,8 @@ Sequencer::completeHitCallback(std::vector<PacketPtr> & mylist)
         // When Ruby is in warmup or cooldown phase, the requests come
         // from the cache recorder. They do not track which port to use
         // and do not need to send the response back
-        if (!RubySystem::getWarmupEnabled()
-                && !RubySystem::getCooldownEnabled()) {
+        if (!m_ruby_system->getWarmupEnabled()
+                && !m_ruby_system->getCooldownEnabled()) {
             RubyPort::SenderState *ss =
                 safe_cast<RubyPort::SenderState *>(pkt->senderState);
             MemResponsePort *port = ss->port;
@@ -873,9 +875,9 @@ Sequencer::completeHitCallback(std::vector<PacketPtr> & mylist)
     }
 
     RubySystem *rs = m_ruby_system;
-    if (RubySystem::getWarmupEnabled()) {
+    if (m_ruby_system->getWarmupEnabled()) {
         rs->m_cache_recorder->enqueueNextFetchRequest();
-    } else if (RubySystem::getCooldownEnabled()) {
+    } else if (m_ruby_system->getCooldownEnabled()) {
         rs->m_cache_recorder->enqueueNextFlushRequest();
     } else {
         testDrainComplete();
@@ -910,14 +912,16 @@ Sequencer::invL1()
         // Evict Read-only data
         RubyRequestType request_type = RubyRequestType_REPLACEMENT;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
-            clockEdge(), addr, 0, 0,
-            request_type, RubyAccessMode_Supervisor,
+            clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system,
+            addr, 0, 0, request_type, RubyAccessMode_Supervisor,
             nullptr);
         DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
             m_controller->mandatoryQueueLatency(request_type));
-        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
+                                   m_ruby_system->getRandomization(),
+                                   m_ruby_system->getWarmupEnabled());
         m_num_pending_invs++;
     }
     DPRINTF(RubySequencer,
@@ -1080,11 +1084,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
         pc = pkt->req->getPC();
     }
 
+    int blk_size = m_ruby_system->getBlockSizeBytes();
+
     // check if the packet has data as for example prefetch and flush
     // requests do not
     std::shared_ptr<RubyRequest> msg;
     if (pkt->req->isMemMgmt()) {
-        msg = std::make_shared<RubyRequest>(clockEdge(),
+        msg = std::make_shared<RubyRequest>(clockEdge(), blk_size,
+                                            m_ruby_system,
                                             pc, secondary_type,
                                             RubyAccessMode_Supervisor, pkt,
                                             proc_id, core_id);
@@ -1111,8 +1118,10 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
                     msg->m_tlbiTransactionUid);
         }
     } else {
-        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
-                                            pkt->getSize(), pc, secondary_type,
+        msg = std::make_shared<RubyRequest>(clockEdge(), blk_size,
+                                            m_ruby_system,
+                                            pkt->getAddr(), pkt->getSize(),
+                                            pc, secondary_type,
                                             RubyAccessMode_Supervisor, pkt,
                                             PrefetchBit_No, proc_id, core_id);
 
@@ -1147,7 +1156,9 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
     assert(latency > 0);
 
     assert(m_mandatory_q_ptr != NULL);
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
+                               m_ruby_system->getRandomization(),
+                               m_ruby_system->getWarmupEnabled());
 }
 
 template <class KEY, class VALUE>
@@ -1194,7 +1205,7 @@ Sequencer::incrementUnaddressedTransactionCnt()
     // Limit m_unaddressedTransactionCnt to 32 bits,
     // top 32 bits should always be zeroed out
     uint64_t aligned_txid = \
-        m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits();
+        m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits();
 
     if (aligned_txid > 0xFFFFFFFFull) {
         m_unaddressedTransactionCnt = 0;
@@ -1206,7 +1217,7 @@ Sequencer::getCurrentUnaddressedTransactionID() const
 {
     return (
         uint64_t(m_version & 0xFFFFFFFF) << 32) |
-        (m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits()
+        (m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits()
     );
 }
 
diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh
index 1f60d2638f..ee16d2fe2e 100644
--- a/src/mem/ruby/system/Sequencer.hh
+++ b/src/mem/ruby/system/Sequencer.hh
@@ -254,6 +254,8 @@ class Sequencer : public RubyPort
                                         RubyRequestType primary_type,
                                         RubyRequestType secondary_type);
 
+    RubySystem *m_ruby_system;
+
   private:
     int m_max_outstanding_requests;
 
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py
index 3f570fb952..0994bb4afe 100644
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -83,7 +83,7 @@ class RubyPort(ClockedObject):
 
     using_ruby_tester = Param.Bool(False, "")
     no_retry_on_stall = Param.Bool(False, "")
-    ruby_system = Param.RubySystem(Parent.any, "")
+    ruby_system = Param.RubySystem("Parent RubySystem object")
     system = Param.System(Parent.any, "system object")
     support_data_reqs = Param.Bool(True, "data cache requests supported")
     support_inst_reqs = Param.Bool(True, "inst cache requests supported")
diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc
index 47ceced3a7..67dd88fb2e 100644
--- a/src/mem/ruby/system/VIPERCoalescer.cc
+++ b/src/mem/ruby/system/VIPERCoalescer.cc
@@ -135,9 +135,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest)
     // Creating WriteMask that records written bytes
     // and atomic operations. This enables partial writes
     // and partial reads of those writes
-    DataBlock dataBlock;
+    uint32_t blockSize = m_ruby_system->getBlockSizeBytes();
+    DataBlock dataBlock(blockSize);
     dataBlock.clear();
-    uint32_t blockSize = RubySystem::getBlockSizeBytes();
     std::vector<bool> accessMask(blockSize,false);
     std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
     uint32_t tableSize = crequest->getPackets().size();
@@ -159,15 +159,17 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest)
     }
     std::shared_ptr<RubyRequest> msg;
     if (pkt->isAtomicOp()) {
-        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
-                              pkt->getSize(), pc, crequest->getRubyType(),
+        msg = std::make_shared<RubyRequest>(clockEdge(), blockSize,
+                              m_ruby_system, pkt->getAddr(), pkt->getSize(),
+                              pc, crequest->getRubyType(),
                               RubyAccessMode_Supervisor, pkt,
                               PrefetchBit_No, proc_id, 100,
                               blockSize, accessMask,
                               dataBlock, atomicOps, crequest->getSeqNum());
     } else {
-        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
-                              pkt->getSize(), pc, crequest->getRubyType(),
+        msg = std::make_shared<RubyRequest>(clockEdge(), blockSize,
+                              m_ruby_system, pkt->getAddr(), pkt->getSize(),
+                              pc, crequest->getRubyType(),
                               RubyAccessMode_Supervisor, pkt,
                               PrefetchBit_No, proc_id, 100,
                               blockSize, accessMask,
@@ -195,7 +197,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest)
     assert(m_mandatory_q_ptr);
     Tick latency = cyclesToTicks(
         m_controller->mandatoryQueueLatency(crequest->getRubyType()));
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
+                               m_ruby_system->getRandomization(),
+                               m_ruby_system->getWarmupEnabled());
 }
 
 void
@@ -241,7 +245,7 @@ VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum)
         std::remove_if(
             m_writeCompletePktMap[key].begin(),
             m_writeCompletePktMap[key].end(),
-            [addr](PacketPtr writeCompletePkt) -> bool {
+            [this,addr](PacketPtr writeCompletePkt) -> bool {
                 if (makeLineAddress(writeCompletePkt->getAddr()) == addr) {
                     RubyPort::SenderState *ss =
                         safe_cast<RubyPort::SenderState *>
@@ -296,14 +300,15 @@ VIPERCoalescer::invTCP()
         // Evict Read-only data
         RubyRequestType request_type = RubyRequestType_REPLACEMENT;
         std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
-            clockEdge(), addr, 0, 0,
-            request_type, RubyAccessMode_Supervisor,
-            nullptr);
+            clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system,
+            addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr);
         DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr);
         assert(m_mandatory_q_ptr != NULL);
         Tick latency = cyclesToTicks(
             m_controller->mandatoryQueueLatency(request_type));
-        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
+        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
+                                   m_ruby_system->getRandomization(),
+                                   m_ruby_system->getWarmupEnabled());
         m_num_pending_invs++;
     }
     DPRINTF(GPUCoalescer,
@@ -343,16 +348,17 @@ VIPERCoalescer::invTCC(PacketPtr pkt)
     RubyRequestType request_type = RubyRequestType_InvL2;
 
     std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
-        clockEdge(), addr, 0, 0,
-        request_type, RubyAccessMode_Supervisor,
-        nullptr);
+        clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system,
+        addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr);
 
     DPRINTF(GPUCoalescer, "Sending L2 invalidate to 0x%x\n", addr);
 
     assert(m_mandatory_q_ptr);
     Tick latency = cyclesToTicks(
         m_controller->mandatoryQueueLatency(request_type));
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency,
+                               m_ruby_system->getRandomization(),
+                               m_ruby_system->getWarmupEnabled());
 
     m_pending_invl2s[addr].push_back(pkt);
 }
diff --git a/src/mem/ruby/system/VIPERSequencer.cc b/src/mem/ruby/system/VIPERSequencer.cc
index ac840777d4..b8b806aa9c 100644
--- a/src/mem/ruby/system/VIPERSequencer.cc
+++ b/src/mem/ruby/system/VIPERSequencer.cc
@@ -81,8 +81,8 @@ VIPERSequencer::hitCallback(SequencerRequest* srequest, DataBlock& data,
     // subBlock with the recieved data.  The tester will later access
     // this state.
     assert(!m_usingRubyTester);
-    assert(!RubySystem::getWarmupEnabled());
-    assert(!RubySystem::getCooldownEnabled());
+    assert(!m_ruby_system->getWarmupEnabled());
+    assert(!m_ruby_system->getCooldownEnabled());
     ruby_hit_callback(pkt);
     testDrainComplete();
 }
diff --git a/src/mem/slicc/ast/CheckProbeStatementAST.py b/src/mem/slicc/ast/CheckProbeStatementAST.py
index 10945cfc30..14f6f7e4fa 100644
--- a/src/mem/slicc/ast/CheckProbeStatementAST.py
+++ b/src/mem/slicc/ast/CheckProbeStatementAST.py
@@ -49,7 +49,8 @@ class CheckProbeStatementAST(StatementAST):
     if (m_is_blocking &&
         (m_block_map.count($address_code) == 1) &&
         (m_block_map[$address_code] == &$in_port_code)) {
-            $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1)));
+            $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1)),
+                m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());
             continue;
         }
         """
diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
index 14b2e48cd3..4bb446aee2 100644
--- a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
+++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py
@@ -68,7 +68,8 @@ class DeferEnqueueingStatementAST(StatementAST):
         # Declare message
         code(
             "std::shared_ptr<${{msg_type.c_ident}}> out_msg = "
-            "std::make_shared<${{msg_type.c_ident}}>(clockEdge());"
+            "std::make_shared<${{msg_type.c_ident}}>(clockEdge(),"
+            "    m_ruby_system->getBlockSizeBytes(), m_ruby_system);"
         )
 
         # The other statements
diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py
index c2d47af9ce..b026f6e7a9 100644
--- a/src/mem/slicc/ast/EnqueueStatementAST.py
+++ b/src/mem/slicc/ast/EnqueueStatementAST.py
@@ -76,7 +76,8 @@ class EnqueueStatementAST(StatementAST):
         # Declare message
         code(
             "std::shared_ptr<${{msg_type.c_ident}}> out_msg = "
-            "std::make_shared<${{msg_type.c_ident}}>(clockEdge());"
+            "std::make_shared<${{msg_type.c_ident}}>(clockEdge(), "
+            "    m_ruby_system->getBlockSizeBytes(), m_ruby_system);"
         )
 
         # The other statements
@@ -89,17 +90,21 @@ class EnqueueStatementAST(StatementAST):
                 bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False)
                 code(
                     "(${{self.queue_name.var.code}}).enqueue("
-                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);"
+                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), "
+                    "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled(), "
+                    "$bypass_strict_fifo_code);"
                 )
             else:
                 code(
                     "(${{self.queue_name.var.code}}).enqueue("
-                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));"
+                    "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), "
+                    "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());"
                 )
         else:
             code(
                 "(${{self.queue_name.var.code}}).enqueue(out_msg, "
-                "clockEdge(), cyclesToTicks(Cycles(1)));"
+                "clockEdge(), cyclesToTicks(Cycles(1)),"
+                "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());"
             )
 
         # End scope
diff --git a/src/mem/slicc/ast/LocalVariableAST.py b/src/mem/slicc/ast/LocalVariableAST.py
index b4ac8f446b..43ab110a67 100644
--- a/src/mem/slicc/ast/LocalVariableAST.py
+++ b/src/mem/slicc/ast/LocalVariableAST.py
@@ -73,6 +73,8 @@ class LocalVariableAST(StatementAST):
             )
         ):
             code += f"{type.c_ident}* {ident}"
+        elif "implicit_ctor" in type:
+            code += f"{type.c_ident} {ident}({type['implicit_ctor']})"
         else:
             code += f"{type.c_ident} {ident}"
         return type
diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py
index 00edff4e7b..415f4ec465 100644
--- a/src/mem/slicc/ast/PeekStatementAST.py
+++ b/src/mem/slicc/ast/PeekStatementAST.py
@@ -93,7 +93,8 @@ class PeekStatementAST(StatementAST):
     if (m_is_blocking &&
         (m_block_map.count(in_msg_ptr->m_$address_field) == 1) &&
         (m_block_map[in_msg_ptr->m_$address_field] != &$qcode)) {
-            $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1)));
+            $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1)),
+            m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());
             continue;
     }
             """
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index b523522501..6202d2d239 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -352,7 +352,6 @@ class $c_ident : public AbstractController
   public:
     typedef ${c_ident}Params Params;
     $c_ident(const Params &p);
-    static int getNumControllers();
     void init();
 
     MessageBuffer *getMandatoryQueue() const;
@@ -449,9 +448,8 @@ int m_counters[${ident}_State_NUM][${ident}_Event_NUM];
 int m_event_counters[${ident}_Event_NUM];
 bool m_possible[${ident}_State_NUM][${ident}_Event_NUM];
 
-static std::vector<statistics::Vector *> eventVec;
-static std::vector<std::vector<statistics::Vector *> > transVec;
-static int m_num_controllers;
+std::vector<statistics::Vector *> eventVec;
+std::vector<std::vector<statistics::Vector *> > transVec;
 
 // Internal functions
 """
@@ -625,10 +623,6 @@ namespace gem5
 namespace ruby
 {
 
-int $c_ident::m_num_controllers = 0;
-std::vector<statistics::Vector *>  $c_ident::eventVec;
-std::vector<std::vector<statistics::Vector *> >  $c_ident::transVec;
-
 // for adding information to the protocol debug trace
 std::stringstream ${ident}_transitionComment;
 
@@ -644,8 +638,9 @@ $c_ident::$c_ident(const Params &p)
 {
     m_machineID.type = MachineType_${ident};
     m_machineID.num = m_version;
-    m_num_controllers++;
+    p.ruby_system->m_num_controllers[MachineType_${ident}]++;
     p.ruby_system->registerAbstractController(this);
+    m_ruby_system = p.ruby_system;
 
     m_in_ports = $num_in_ports;
 """
@@ -699,7 +694,7 @@ void
 $c_ident::initNetQueues()
 {
     MachineType machine_type = string_to_MachineType("${{self.ident}}");
-    [[maybe_unused]] int base = MachineType_base_number(machine_type);
+    [[maybe_unused]] int base = m_ruby_system->MachineType_base_number(machine_type);
 
 """
         )
@@ -776,6 +771,17 @@ $c_ident::init()
                         comment = f"Type {vtype.ident} default"
                         code('*$vid = ${{vtype["default"]}}; // $comment')
 
+                    # For objects that require knowing the cache line size,
+                    # set the value here.
+                    if vtype.c_ident in ("TBETable"):
+                        block_size_func = "m_ruby_system->getBlockSizeBytes()"
+                        code(f"(*{vid}).setBlockSize({block_size_func});")
+
+        for param in self.config_parameters:
+            if param.type_ast.type.ident == "CacheMemory":
+                assert param.pointer
+                code(f"m_{param.ident}_ptr->setRubySystem(m_ruby_system);")
+
         # Set the prefetchers
         code()
         for prefetcher in self.prefetchers:
@@ -942,7 +948,9 @@ $c_ident::regStats()
                 "${c_ident}." + ${ident}_Event_to_string(event);
             statistics::Vector *t =
                 new statistics::Vector(profilerStatsPtr, stat_name.c_str());
-            t->init(m_num_controllers);
+            int num_controllers =
+                m_ruby_system->m_num_controllers[MachineType_${ident}];
+            t->init(num_controllers);
             t->flags(statistics::pdf | statistics::total |
                 statistics::oneline | statistics::nozero);
 
@@ -961,7 +969,9 @@ $c_ident::regStats()
                     "." + ${ident}_Event_to_string(event);
                 statistics::Vector *t = new statistics::Vector(
                     profilerStatsPtr, stat_name.c_str());
-                t->init(m_num_controllers);
+                int num_controllers =
+                    m_ruby_system->m_num_controllers[MachineType_${ident}];
+                t->init(num_controllers);
                 t->flags(statistics::pdf | statistics::total |
                     statistics::oneline | statistics::nozero);
                 transVec[state].push_back(t);
@@ -1062,9 +1072,12 @@ $c_ident::regStats()
 void
 $c_ident::collateStats()
 {
+    int num_controllers =
+        m_ruby_system->m_num_controllers[MachineType_${ident}];
+
     for (${ident}_Event event = ${ident}_Event_FIRST;
          event < ${ident}_Event_NUM; ++event) {
-        for (unsigned int i = 0; i < m_num_controllers; ++i) {
+        for (unsigned int i = 0; i < num_controllers; ++i) {
             RubySystem *rs = params().ruby_system;
             std::map<uint32_t, AbstractController *>::iterator it =
                      rs->m_abstract_controls[MachineType_${ident}].find(i);
@@ -1080,7 +1093,7 @@ $c_ident::collateStats()
         for (${ident}_Event event = ${ident}_Event_FIRST;
              event < ${ident}_Event_NUM; ++event) {
 
-            for (unsigned int i = 0; i < m_num_controllers; ++i) {
+            for (unsigned int i = 0; i < num_controllers; ++i) {
                 RubySystem *rs = params().ruby_system;
                 std::map<uint32_t, AbstractController *>::iterator it =
                          rs->m_abstract_controls[MachineType_${ident}].find(i);
@@ -1125,12 +1138,6 @@ $c_ident::getTransitionCount(${ident}_State state,
     return m_counters[state][event];
 }
 
-int
-$c_ident::getNumControllers()
-{
-    return m_num_controllers;
-}
-
 MessageBuffer*
 $c_ident::getMandatoryQueue() const
 {
@@ -1181,6 +1188,7 @@ void
 $c_ident::set_cache_entry(${{self.EntryType.c_ident}}*& m_cache_entry_ptr, AbstractCacheEntry* m_new_cache_entry)
 {
   m_cache_entry_ptr = (${{self.EntryType.c_ident}}*)m_new_cache_entry;
+  m_cache_entry_ptr->setRubySystem(m_ruby_system);
 }
 
 void
@@ -1200,6 +1208,7 @@ void
 $c_ident::set_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr, ${{self.TBEType.c_ident}}* m_new_tbe)
 {
   m_tbe_ptr = m_new_tbe;
+  m_tbe_ptr->setRubySystem(m_ruby_system);
 }
 
 void
diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py
index 535a4165b3..53c8ff877e 100644
--- a/src/mem/slicc/symbols/Type.py
+++ b/src/mem/slicc/symbols/Type.py
@@ -119,6 +119,10 @@ class Type(Symbol):
     def isMessage(self):
         return "message" in self
 
+    @property
+    def isTBE(self):
+        return "tbe" in self
+
     @property
     def isBuffer(self):
         return "buffer" in self
@@ -250,18 +254,54 @@ namespace gem5
 namespace ruby
 {
 
+class RubySystem;
+
 $klass ${{self.c_ident}}$parent
 {
   public:
-    ${{self.c_ident}}
 """,
             klass="class",
         )
 
         if self.isMessage:
-            code("(Tick curTime) : %s(curTime) {" % self["interface"])
+            code(
+                "${{self.c_ident}}(Tick curTime, int blockSize, RubySystem* rs) : %s(curTime, blockSize, rs)"
+                % self["interface"]
+            )
+
+            for dm in self.data_members.values():
+                if dm.real_c_type in ("DataBlock", "WriteMask"):
+                    code(f"\t\t, m_{dm.ident}(blockSize)")
+
+            code("{")
+        elif self.isTBE:
+            code("${{self.c_ident}}(int block_size)")
+
+            ctor_count = 0
+            for dm in self.data_members.values():
+                if dm.real_c_type in ("DataBlock", "WriteMask"):
+                    if ctor_count == 0:
+                        code("\t:")
+                    else:
+                        code("\t, ")
+                    code(f"\t\tm_{dm.ident}(block_size)")
+                    ctor_count += 1
+
+            code("{")
         else:
-            code("()\n\t\t{")
+            code("${{self.c_ident}}()")
+
+            ctor_count = 0
+            for dm in self.data_members.values():
+                if dm.real_c_type in ("DataBlock", "WriteMask"):
+                    if ctor_count == 0:
+                        code("\t:")
+                    else:
+                        code("\t, ")
+                    code(f"\t\tm_{dm.ident}(0)")
+                    ctor_count += 1
+
+            code("{")
 
         code.indent()
         if not self.isGlobal:
@@ -280,6 +320,12 @@ $klass ${{self.c_ident}}$parent
                     code(" // default value of $tid")
                 else:
                     code("// m_$ident has no default")
+
+                # These parts of Messages need RubySystem pointers. For things
+                # like Entry which only store NetDest, RubySystem is not needed.
+                if self.isMessage and dm.real_c_type == "NetDest":
+                    code("// m_$ident requires RubySystem")
+                    code("m_$ident.setRubySystem(rs);")
             code.dedent()
         code("}")
 
@@ -300,21 +346,45 @@ $klass ${{self.c_ident}}$parent
             params = ", ".join(params)
 
             if self.isMessage:
-                params = "const Tick curTime, " + params
+                params = (
+                    "const Tick curTime, const int blockSize, const RubySystem *rs, "
+                    + params
+                )
 
             code("${{self.c_ident}}($params)")
 
             # Call superclass constructor
             if "interface" in self:
                 if self.isMessage:
-                    code('    : ${{self["interface"]}}(curTime)')
+                    code(
+                        '    : ${{self["interface"]}}(curTime, blockSize, rs)'
+                    )
+
+                    for dm in self.data_members.values():
+                        if dm.real_c_type in ("DataBlock", "WriteMask"):
+                            code(f"\t\t, m_{dm.ident}(blockSize)")
                 else:
                     code('    : ${{self["interface"]}}()')
 
+                    for dm in self.data_members.values():
+                        if dm.real_c_type in ("DataBlock", "WriteMask"):
+                            code(f"\t\t, m_{dm.ident}(local_{dm.ident})")
+            else:
+                ctor_count = 0
+                for dm in self.data_members.values():
+                    if dm.real_c_type in ("DataBlock", "WriteMask"):
+                        if ctor_count == 0:
+                            code("\t:")
+                        else:
+                            code("\t, ")
+                        code(f"\t\tm_{dm.ident}(local_{dm.ident})")
+                        ctor_count += 1
+
             code("{")
             code.indent()
             for dm in self.data_members.values():
-                code("m_${{dm.ident}} = local_${{dm.ident}};")
+                if not dm.real_c_type in ("DataBlock", "WriteMask"):
+                    code("m_${{dm.ident}} = local_${{dm.ident}};")
 
             code.dedent()
             code("}")
@@ -342,6 +412,35 @@ clone() const
             )
 
         if not self.isGlobal:
+            # Block size setter for fields that require block size
+            # Intentionally do not begin function name with "set" in case
+            # the user has a field named BlockSize which would conflict
+            # with the method generated below.
+            code("\nvoid initBlockSize(int block_size)")
+            code("{")
+            code("\tblock_size_bits = floorLog2(block_size);")
+
+            needs_block_size = (
+                "DataBlock",
+                "WriteMask",
+                "PersistentTable",
+                "TimerTable",
+                "PerfectCacheMemory",
+            )
+
+            for dm in self.data_members.values():
+                if dm.real_c_type in needs_block_size:
+                    code(f"\tm_{dm.ident}.setBlockSize(block_size);")
+            code("}\n")
+
+            code("\nvoid setRubySystem(RubySystem *ruby_system)")
+            code("{")
+            for dm in self.data_members.values():
+                if dm.real_c_type in ("NetDest"):
+                    code(f"// m_{dm.ident} requires RubySystem")
+                    code(f"\tm_{dm.ident}.setRubySystem(ruby_system);")
+            code("}\n")
+
             # const Get methods for each field
             code("// Const accessors methods for each field")
             for dm in self.data_members.values():
@@ -393,6 +492,9 @@ set${{dm.ident}}(const ${{dm.real_c_type}}& local_${{dm.ident}})
         code("  //private:")
         code.indent()
 
+        # block_size_bits for print methods
+        code("int block_size_bits = 0;")
+
         # Data members for each field
         for dm in self.data_members.values():
             if "abstract" not in dm:
@@ -473,7 +575,7 @@ ${{self.c_ident}}::print(std::ostream& out) const
             if dm.type.c_ident == "Addr":
                 code(
                     """
-out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}) << " ";"""
+out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}, block_size_bits) << " ";"""
                 )
             else:
                 code('out << "${{dm.ident}} = " << m_${{dm.ident}} << " ";' "")
@@ -846,7 +948,7 @@ ${{self.c_ident}}_from_base_level(int type)
  * \\return the base number of components for each machine
  */
 int
-${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj)
+RubySystem::${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj)
 {
     int base = 0;
     switch(obj) {
@@ -860,7 +962,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj)
                 # Check if there is a defined machine with this type
                 if enum.primary:
                     code(
-                        "    base += ${{enum.ident}}_Controller::getNumControllers();"
+                        "\tbase += m_num_controllers[${{self.c_ident}}_${{enum.ident}}];"
                     )
                 else:
                     code("    base += 0;")
@@ -882,7 +984,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj)
  * \\return the total number of components for each machine
  */
 int
-${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj)
+RubySystem::${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj)
 {
     switch(obj) {
 """
@@ -893,7 +995,7 @@ ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj)
                 code("case ${{self.c_ident}}_${{enum.ident}}:")
                 if enum.primary:
                     code(
-                        "return ${{enum.ident}}_Controller::getNumControllers();"
+                        "return m_num_controllers[${{self.c_ident}}_${{enum.ident}}];"
                     )
                 else:
                     code("return 0;")
diff --git a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
index 29df2a969c..a469fead61 100644
--- a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py
@@ -137,7 +137,9 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy):
 
         # Set up a proxy port for the system_port. Used for load binaries and
         # other functional-only things.
-        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
         board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
 
     def _create_core_cluster(
@@ -167,12 +169,16 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy):
         )
 
         cluster.icache.sequencer = RubySequencer(
-            version=core_num, dcache=NULL, clk_domain=cluster.icache.clk_domain
+            version=core_num,
+            dcache=NULL,
+            clk_domain=cluster.icache.clk_domain,
+            ruby_system=self.ruby_system,
         )
         cluster.dcache.sequencer = RubySequencer(
             version=core_num,
             dcache=cluster.dcache.cache,
             clk_domain=cluster.dcache.clk_domain,
+            ruby_system=self.ruby_system,
         )
 
         if board.has_io_bus():
@@ -223,7 +229,11 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy):
                 board.get_clock_domain(),
             )
             version = len(board.get_processor().get_cores()) + i
-            ctrl.sequencer = RubySequencer(version=version, in_ports=port)
+            ctrl.sequencer = RubySequencer(
+                version=version,
+                in_ports=port,
+                ruby_system=self.ruby_system,
+            )
             ctrl.sequencer.dcache = NULL
 
             ctrl.ruby_system = self.ruby_system
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
index 4840e3b264..d0c54840fc 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
@@ -37,7 +37,7 @@ class Directory(AbstractDirectory):
     def __init__(self, network, cache_line_size, mem_range, port):
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(block_size=cache_line_size)
         # Connect this directory to the memory side.
         self.memory_out_port = port
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
index 6d203f978a..ef90ac79f6 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -80,7 +80,7 @@ class L1Cache(L0Cache_Controller):
             replacement_policy=LRURP(),
         )
         self.clk_domain = clk_domain
-        self.prefetcher = RubyPrefetcher()
+        self.prefetcher = RubyPrefetcher(block_size=cache_line_size)
         self.send_evictions = core.requires_send_evicts()
         self.transitions_per_cycle = 32
         self.enable_prefetch = False
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
index ff2b8e3dd9..7c473f8be9 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -75,7 +75,7 @@ class L2Cache(L1Cache_Controller):
         self.l2_select_num_bits = int(math.log(num_l3Caches, 2))
         self.cluster_id = cluster_id
         self.clk_domain = clk_domain
-        self.prefetcher = RubyPrefetcher()
+        self.prefetcher = RubyPrefetcher(block_size=cache_line_size)
         self.transitions_per_cycle = 32
         # l1_request_latency, l1_response_latency, to_l2_latency are
         # ruby backend terminology.
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
index 4840e3b264..d0c54840fc 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py
@@ -37,7 +37,7 @@ class Directory(AbstractDirectory):
     def __init__(self, network, cache_line_size, mem_range, port):
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(block_size=cache_line_size)
         # Connect this directory to the memory side.
         self.memory_out_port = port
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py
index 7787644c9b..13625beea7 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py
@@ -73,7 +73,7 @@ class L1Cache(AbstractL1Cache):
         )
         self.l2_select_num_bits = int(math.log(num_l2Caches, 2))
         self.clk_domain = clk_domain
-        self.prefetcher = RubyPrefetcher()
+        self.prefetcher = RubyPrefetcher(block_size=cache_line_size)
         self.send_evictions = core.requires_send_evicts()
         self.transitions_per_cycle = 4
         self.enable_prefetch = False
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
index 3d1ae54104..79e40e9e01 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py
@@ -41,7 +41,7 @@ class Directory(AbstractDirectory):
     def __init__(self, network, cache_line_size, mem_range, port):
         super().__init__(network, cache_line_size)
         self.addr_ranges = [mem_range]
-        self.directory = RubyDirectoryMemory()
+        self.directory = RubyDirectoryMemory(block_size=cache_line_size)
         # Connect this directory to the memory side.
         self.memory_out_port = port
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py
index 9aa0dc4a36..212c06c4c3 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py
@@ -143,6 +143,7 @@ class CoreComplex(SubSystem, RubyNetworkComponent):
             version=core_id,
             dcache=cluster.l1_cache.Dcache,
             clk_domain=cluster.l1_cache.clk_domain,
+            ruby_system=self._ruby_system,
         )
 
         if self._board.has_io_bus():
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py
index f7d4d63de1..83137ce15a 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py
@@ -151,7 +151,9 @@ class OctopiCache(
 
         # Set up a proxy port for the system_port. Used for load binaries and
         # other functional-only things.
-        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
         board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
 
     def _create_directory_controllers(self, board):
@@ -228,7 +230,11 @@ class OctopiCache(
         if board.has_dma_ports():
             self.ruby_system.dma_controllers = [
                 DMAController(
-                    dma_sequencer=DMASequencer(version=i + 1, in_ports=port),
+                    dma_sequencer=DMASequencer(
+                        version=i + 1,
+                        in_ports=port,
+                        ruby_system=self.ruby_system,
+                    ),
                     ruby_system=self.ruby_system,
                 )
                 for i, port in enumerate(board.get_dma_ports())
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
index 66fea95636..92e8860a24 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
@@ -118,6 +118,7 @@ class MESIThreeLevelCacheHierarchy(
                 version=core_idx,
                 dcache=l1_cache.Dcache,
                 clk_domain=l1_cache.clk_domain,
+                ruby_system=self.ruby_system,
             )
 
             if board.has_io_bus():
@@ -196,7 +197,12 @@ class MESIThreeLevelCacheHierarchy(
             dma_ports = board.get_dma_ports()
             for i, port in enumerate(dma_ports):
                 ctrl = DMAController(
-                    DMASequencer(version=i, in_ports=port), self.ruby_system
+                    DMASequencer(
+                        version=i,
+                        in_ports=port,
+                        ruby_system=self.ruby_system,
+                    ),
+                    self.ruby_system,
                 )
                 self._dma_controllers.append(ctrl)
 
@@ -223,5 +229,7 @@ class MESIThreeLevelCacheHierarchy(
 
         # Set up a proxy port for the system_port. Used for load binaries and
         # other functional-only things.
-        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
         board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
index 004c2ff9d2..efe714c23c 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
@@ -109,7 +109,10 @@ class MESITwoLevelCacheHierarchy(
             )
 
             cache.sequencer = RubySequencer(
-                version=i, dcache=cache.L1Dcache, clk_domain=cache.clk_domain
+                version=i,
+                dcache=cache.L1Dcache,
+                clk_domain=cache.clk_domain,
+                ruby_system=self.ruby_system,
             )
 
             if board.has_io_bus():
@@ -163,7 +166,11 @@ class MESITwoLevelCacheHierarchy(
             dma_ports = board.get_dma_ports()
             for i, port in enumerate(dma_ports):
                 ctrl = DMAController(self.ruby_system.network, cache_line_size)
-                ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+                ctrl.dma_sequencer = DMASequencer(
+                    version=i,
+                    in_ports=port,
+                    ruby_system=self.ruby_system,
+                )
                 self._dma_controllers.append(ctrl)
                 ctrl.ruby_system = self.ruby_system
 
@@ -188,5 +195,7 @@ class MESITwoLevelCacheHierarchy(
 
         # Set up a proxy port for the system_port. Used for load binaries and
         # other functional-only things.
-        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
         board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
index 478c793560..56e620ff0c 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py
@@ -95,6 +95,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy):
                 version=i,
                 dcache=cache.cacheMemory,
                 clk_domain=cache.clk_domain,
+                ruby_system=self.ruby_system,
             )
 
             if board.has_io_bus():
@@ -140,7 +141,11 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy):
                 ctrl = DMAController(
                     self.ruby_system.network, board.get_cache_line_size()
                 )
-                ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+                ctrl.dma_sequencer = DMASequencer(
+                    version=i,
+                    in_ports=port,
+                    ruby_system=self.ruby_system,
+                )
 
                 ctrl.ruby_system = self.ruby_system
                 ctrl.dma_sequencer.ruby_system = self.ruby_system
@@ -167,5 +172,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy):
 
         # Set up a proxy port for the system_port. Used for load binaries and
         # other functional-only things.
-        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        self.ruby_system.sys_port_proxy = RubyPortProxy(
+            ruby_system=self.ruby_system
+        )
         board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)