From 4f7b3ed82741a6adc198d1b0cf818f6fa2c93bde Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 8 Oct 2024 08:14:50 -0700 Subject: [PATCH] mem-ruby: Remove static methods from RubySystem (#1453) There are several parts to this PR to work towards #1349 . (1) Make RubySystem::getBlockSizeBytes non-static by providing ways to access the block size or passing the block size explicitly to classes. The main changes are: - DataBlocks must be explicitly allocated. A default ctor still exists to avoid needing to heavily modify SLICC. The size can be set using a realloc function, operator=, or copy ctor. This is handled completely transparently meaning no protocol or config changes are required. - WriteMask now requires block size to be set. This is also handled transparently by modifying the SLICC parser to identify WriteMask types and call setBlockSize(). - AbstractCacheEntry and TBE classes now require block size to be set. This is handled transparently by modifying the SLICC parser to identify these classes and call initBlockSize() which calls setBlockSize() for any DataBlock or WriteMask. - All AbstractControllers now have a pointer to RubySystem. This is assigned in SLICC generated code and requires no changes to protocol or configs. - The Ruby Message class now requires block size in all constructors. This is added to the argument list automatically by the SLICC parser. (2) Relax dependence on common functions in src/mem/ruby/common/Address.hh so that RubySystem::getBlockSizeBits is no longer static. Many classes already have a way to get block size from the previous commit, so they simply multiple by 8 to get the number of bits. For handling SLICC and reducing the number of changes, define makeCacheLine, getOffset, etc. in RubyPort and AbstractController. The only protocol changes required are to change any "RubySystem::foo()" calls with "m_ruby_system->foo()". For classes which do not have a way to get access to block size but still used makeLineAddress, getOffset, etc., the block size must be passed to that class. This requires some changes to the SimObject interface for two commonly used classes: DirectoryMemory and RubyPrefecther, resulting in user-facing API changes User-facing API changes: - DirectoryMemory and RubyPrefetcher now require the cache line size as a non-optional argument. - RubySequencer SimObjects now require RubySystem as a non-optional argument. - TesterThread in the GPU ruby tester now requires the cache line size as a non-optional argument. (3) Removes static member variables in RubySystem which control randomization, cooldown, and warmup. These are mostly used by the Ruby Network. The network classes are modified to take these former static variables as parameters which are passed to the corresponding method (e.g., enqueue, delayHead, etc.) rather than needing a RubySystem object at all. Change-Id: Ia63c2ad5cf0bf9d1cbdffba5d3a679bb4d3b1220 (4) There are two major SLICC generated static methods: getNumControllers() on each cache controller which returns the number of controllers created by the configs at run time and the functions which access this method, which are MachineType_base_count and MachineType_base_number. These need to be removed to create multiple RubySystem objects otherwise NetDest, version value, and other objects are incorrect. To remove the static requirement, MachineType_base_count and MachineType_base_number are moved to RubySystem. Any class which needs to call these methods must now have a pointer to a RubySystem. To enable that, several changes are made: - RubyRequest and Message now require a RubySystem pointer in the constructor. The pointer is passed to fields in the Message class which require a RubySystem pointer (e.g., NetDest). SLICC is modified to do this automatically. - SLICC structures may now optionally take an "implicit constructor" which can be used to call a non-default constructor for locally defined variables (e.g., temporary variables within SLICC actions). A statement such as "NetDest bcast_dest;" in SLICC will implicitly append a call to the NetDest constructor taking RubySystem, for example. - RubySystem gets passed to Ruby network objects (Network, Topology). --- configs/example/ruby_gpu_random_test.py | 2 + configs/learning_gem5/part3/msi_caches.py | 5 +- .../part3/ruby_caches_MI_example.py | 5 +- configs/learning_gem5/part3/test_caches.py | 1 + configs/ruby/AMD_Base_Constructor.py | 4 +- configs/ruby/GPU_VIPER.py | 18 +-- configs/ruby/MESI_Three_Level.py | 1 + configs/ruby/MESI_Three_Level_HTM.py | 1 + configs/ruby/MESI_Two_Level.py | 2 +- configs/ruby/MOESI_AMD_Base.py | 8 +- configs/ruby/Ruby.py | 8 +- src/cpu/testers/gpu_ruby_test/TesterThread.py | 1 + .../testers/gpu_ruby_test/address_manager.cc | 4 +- src/cpu/testers/gpu_ruby_test/dma_thread.cc | 6 +- .../testers/gpu_ruby_test/gpu_wavefront.cc | 8 +- .../testers/gpu_ruby_test/tester_thread.cc | 13 +- .../testers/gpu_ruby_test/tester_thread.hh | 2 + src/cpu/testers/rubytest/Check.cc | 18 ++- src/cpu/testers/rubytest/Check.hh | 1 + src/cpu/testers/rubytest/RubyTester.hh | 4 +- src/mem/ruby/common/Address.cc | 22 ++-- src/mem/ruby/common/Address.hh | 8 +- src/mem/ruby/common/DataBlock.cc | 91 ++++++++++--- src/mem/ruby/common/DataBlock.hh | 21 ++- src/mem/ruby/common/NetDest.cc | 57 +++++++- src/mem/ruby/common/NetDest.hh | 11 ++ src/mem/ruby/common/SubBlock.cc | 7 +- src/mem/ruby/common/SubBlock.hh | 3 +- src/mem/ruby/common/WriteMask.cc | 5 +- src/mem/ruby/common/WriteMask.hh | 24 ++++ src/mem/ruby/network/MessageBuffer.cc | 12 +- src/mem/ruby/network/MessageBuffer.hh | 9 +- src/mem/ruby/network/Network.cc | 26 +++- src/mem/ruby/network/Network.hh | 11 ++ src/mem/ruby/network/Topology.cc | 27 ++-- src/mem/ruby/network/Topology.hh | 7 +- .../ruby/network/garnet/NetworkInterface.cc | 15 ++- .../ruby/network/garnet/NetworkInterface.hh | 2 + src/mem/ruby/network/simple/PerfectSwitch.cc | 3 +- src/mem/ruby/network/simple/Switch.hh | 1 + src/mem/ruby/network/simple/Throttle.cc | 4 +- src/mem/ruby/profiler/AddressProfiler.cc | 4 +- src/mem/ruby/protocol/GPU_VIPER-SQC.sm | 2 +- src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 2 +- .../ruby/protocol/MESI_Three_Level-L1cache.sm | 2 +- .../ruby/protocol/MESI_Two_Level-L1cache.sm | 2 +- .../protocol/MOESI_AMD_Base-RegionBuffer.sm | 6 +- .../ruby/protocol/MOESI_AMD_Base-RegionDir.sm | 6 +- src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm | 2 +- .../protocol/MOESI_AMD_Base-probeFilter.sm | 2 +- .../ruby/protocol/MOESI_CMP_directory-dir.sm | 2 +- .../ruby/protocol/MOESI_CMP_token-L1cache.sm | 2 +- src/mem/ruby/protocol/MOESI_CMP_token-dir.sm | 2 +- src/mem/ruby/protocol/RubySlicc_Exports.sm | 2 + src/mem/ruby/protocol/RubySlicc_MemControl.sm | 4 +- src/mem/ruby/protocol/RubySlicc_Types.sm | 2 +- src/mem/ruby/protocol/RubySlicc_Util.sm | 1 + src/mem/ruby/protocol/chi/CHI-cache.sm | 2 +- .../ruby/protocol/chi/CHI-dvm-misc-node.sm | 2 +- src/mem/ruby/protocol/chi/CHI-mem.sm | 2 +- .../slicc_interface/AbstractCacheEntry.hh | 11 +- .../slicc_interface/AbstractController.cc | 56 +++++++- .../slicc_interface/AbstractController.hh | 13 ++ src/mem/ruby/slicc_interface/Message.hh | 14 +- src/mem/ruby/slicc_interface/RubyRequest.hh | 41 ++++-- .../RubySlicc_ComponentMapping.hh | 17 --- .../ruby/slicc_interface/RubySlicc_Util.hh | 16 ++- src/mem/ruby/structures/ALUFreeListArray.cc | 10 +- src/mem/ruby/structures/ALUFreeListArray.hh | 27 +++- src/mem/ruby/structures/BankedArray.cc | 7 +- src/mem/ruby/structures/BankedArray.hh | 5 +- src/mem/ruby/structures/CacheMemory.cc | 31 +++-- src/mem/ruby/structures/CacheMemory.hh | 10 ++ src/mem/ruby/structures/DirectoryMemory.cc | 8 +- src/mem/ruby/structures/DirectoryMemory.hh | 3 + src/mem/ruby/structures/DirectoryMemory.py | 4 + src/mem/ruby/structures/PerfectCacheMemory.hh | 25 ++-- src/mem/ruby/structures/PersistentTable.hh | 14 ++ src/mem/ruby/structures/RubyCache.py | 1 - src/mem/ruby/structures/RubyPrefetcher.cc | 13 +- src/mem/ruby/structures/RubyPrefetcher.hh | 14 +- src/mem/ruby/structures/RubyPrefetcher.py | 3 + .../ruby/structures/RubyPrefetcherProxy.cc | 25 +++- .../ruby/structures/RubyPrefetcherProxy.hh | 3 + src/mem/ruby/structures/TBETable.hh | 10 +- src/mem/ruby/structures/TimerTable.cc | 4 +- src/mem/ruby/structures/TimerTable.hh | 8 ++ src/mem/ruby/structures/WireBuffer.cc | 4 +- src/mem/ruby/structures/WireBuffer.hh | 5 +- src/mem/ruby/structures/WireBuffer.py | 2 - src/mem/ruby/system/CacheRecorder.cc | 24 ++-- src/mem/ruby/system/CacheRecorder.hh | 10 +- src/mem/ruby/system/DMASequencer.cc | 24 ++-- src/mem/ruby/system/GPUCoalescer.cc | 20 +-- src/mem/ruby/system/GPUCoalescer.hh | 2 + src/mem/ruby/system/RubyPort.cc | 31 ++++- src/mem/ruby/system/RubyPort.hh | 5 + src/mem/ruby/system/RubySystem.cc | 25 +--- src/mem/ruby/system/RubySystem.hh | 30 +++-- src/mem/ruby/system/Sequencer.cc | 43 +++--- src/mem/ruby/system/Sequencer.hh | 2 + src/mem/ruby/system/Sequencer.py | 2 +- src/mem/ruby/system/VIPERCoalescer.cc | 38 +++--- src/mem/ruby/system/VIPERSequencer.cc | 4 +- src/mem/slicc/ast/CheckProbeStatementAST.py | 3 +- .../slicc/ast/DeferEnqueueingStatementAST.py | 3 +- src/mem/slicc/ast/EnqueueStatementAST.py | 13 +- src/mem/slicc/ast/LocalVariableAST.py | 2 + src/mem/slicc/ast/PeekStatementAST.py | 3 +- src/mem/slicc/symbols/StateMachine.py | 49 ++++--- src/mem/slicc/symbols/Type.py | 124 ++++++++++++++++-- .../chi/private_l1_cache_hierarchy.py | 16 ++- .../ruby/caches/mesi_three_level/directory.py | 2 +- .../ruby/caches/mesi_three_level/l1_cache.py | 2 +- .../ruby/caches/mesi_three_level/l2_cache.py | 2 +- .../ruby/caches/mesi_two_level/directory.py | 2 +- .../ruby/caches/mesi_two_level/l1_cache.py | 2 +- .../ruby/caches/mi_example/directory.py | 2 +- .../prebuilt/octopi_cache/core_complex.py | 1 + .../caches/prebuilt/octopi_cache/octopi.py | 10 +- .../ruby/mesi_three_level_cache_hierarchy.py | 12 +- .../ruby/mesi_two_level_cache_hierarchy.py | 15 ++- .../ruby/mi_example_cache_hierarchy.py | 11 +- 123 files changed, 1066 insertions(+), 399 deletions(-) diff --git a/configs/example/ruby_gpu_random_test.py b/configs/example/ruby_gpu_random_test.py index bfcd2c953d..eb7dd3acbd 100644 --- a/configs/example/ruby_gpu_random_test.py +++ b/configs/example/ruby_gpu_random_test.py @@ -371,6 +371,7 @@ for dma_idx in range(n_DMAs): num_lanes=1, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 @@ -393,6 +394,7 @@ for cu_idx in range(n_CUs): num_lanes=args.wf_size, clk_domain=thread_clock, deadlock_threshold=tester_deadlock_threshold, + cache_line_size=system.cache_line_size, ) ) g_thread_idx += 1 diff --git a/configs/learning_gem5/part3/msi_caches.py b/configs/learning_gem5/part3/msi_caches.py index c198662c5e..b719c7ab60 100644 --- a/configs/learning_gem5/part3/msi_caches.py +++ b/configs/learning_gem5/part3/msi_caches.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -191,7 +192,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/ruby_caches_MI_example.py b/configs/learning_gem5/part3/ruby_caches_MI_example.py index baee120bb9..583041a674 100644 --- a/configs/learning_gem5/part3/ruby_caches_MI_example.py +++ b/configs/learning_gem5/part3/ruby_caches_MI_example.py @@ -84,6 +84,7 @@ class MyCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.controllers[i].clk_domain, + ruby_system=self, ) for i in range(len(cpus)) ] @@ -180,7 +181,9 @@ class DirController(Directory_Controller): self.version = self.versionCount() self.addr_ranges = ranges self.ruby_system = ruby_system - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) # Connect this directory to the memory side. self.memory = mem_ctrls[0].port self.connectQueues(ruby_system) diff --git a/configs/learning_gem5/part3/test_caches.py b/configs/learning_gem5/part3/test_caches.py index 4e8e8febda..be2d46253e 100644 --- a/configs/learning_gem5/part3/test_caches.py +++ b/configs/learning_gem5/part3/test_caches.py @@ -79,6 +79,7 @@ class TestCacheSystem(RubySystem): # I/D cache is combined and grab from ctrl dcache=self.controllers[i].cacheMemory, clk_domain=self.clk_domain, + ruby_system=self, ) for i in range(num_testers) ] diff --git a/configs/ruby/AMD_Base_Constructor.py b/configs/ruby/AMD_Base_Constructor.py index ff4246a7e0..7d40862517 100644 --- a/configs/ruby/AMD_Base_Constructor.py +++ b/configs/ruby/AMD_Base_Constructor.py @@ -84,14 +84,14 @@ class CPCntrl(AMD_Base_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index 313d1d514a..15108bb674 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -114,14 +114,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options.l2_size, options.l2_assoc, options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -169,7 +169,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): # TCP_Controller inherits this from RubyController self.mandatory_queue_latency = options.mandatory_queue_latency - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -182,7 +182,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): options.max_coalesces_per_cycle ) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -211,7 +211,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.L1cache.create(options) self.issue_latency = 1 - self.coalescer = VIPERCoalescer() + self.coalescer = VIPERCoalescer(ruby_system=ruby_system) self.coalescer.version = self.seqCount() self.coalescer.icache = self.L1cache self.coalescer.dcache = self.L1cache @@ -219,7 +219,7 @@ class TCPCntrl(TCP_Controller, CntrlBase): self.coalescer.support_inst_reqs = False self.coalescer.is_cpu_sequencer = False - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1cache self.sequencer.ruby_system = ruby_system @@ -387,7 +387,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -686,7 +688,7 @@ def construct_gpudirs(options, system, ruby_system, network): dir_cntrl.addr_ranges = dram_intf.range # Append - exec("system.ruby.gpu_dir_cntrl%d = dir_cntrl" % i) + exec("ruby_system.gpu_dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) mem_ctrls.append(mem_ctrl) diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py index e0de4e0636..9054fefc01 100644 --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Three_Level_HTM.py b/configs/ruby/MESI_Three_Level_HTM.py index e6c4e81f91..d7ad3bdc04 100644 --- a/configs/ruby/MESI_Three_Level_HTM.py +++ b/configs/ruby/MESI_Three_Level_HTM.py @@ -148,6 +148,7 @@ def create_system( train_misses=5, num_startup_pfs=4, cross_page=True, + block_size=options.cacheline_size, ) l0_cntrl = L0Cache_Controller( diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py index 500afbc199..6e1e0b97f3 100644 --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -94,7 +94,7 @@ def create_system( is_icache=False, ) - prefetcher = RubyPrefetcher() + prefetcher = RubyPrefetcher(block_size=options.cacheline_size) clk_domain = cpus[i].clk_domain diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py index aeab96a85f..1095defc57 100644 --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -112,14 +112,14 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache = L2Cache() self.L2cache.create(options) - self.sequencer = RubySequencer() + self.sequencer = RubySequencer(ruby_system=ruby_system) self.sequencer.version = self.seqCount() self.sequencer.dcache = self.L1D0cache self.sequencer.ruby_system = ruby_system self.sequencer.coreid = 0 self.sequencer.is_cpu_sequencer = True - self.sequencer1 = RubySequencer() + self.sequencer1 = RubySequencer(ruby_system=ruby_system) self.sequencer1.version = self.seqCount() self.sequencer1.dcache = self.L1D1cache self.sequencer1.ruby_system = ruby_system @@ -194,7 +194,9 @@ class DirCntrl(Directory_Controller, CntrlBase): self.response_latency = 30 self.addr_ranges = dir_ranges - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index e427a39de8..0a6671aa4b 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -308,7 +308,9 @@ def create_directories(options, bootmem, ruby_system, system): for i in range(options.num_dirs): dir_cntrl = Directory_Controller() dir_cntrl.version = i - dir_cntrl.directory = RubyDirectoryMemory() + dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) dir_cntrl.ruby_system = ruby_system exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) @@ -316,7 +318,9 @@ def create_directories(options, bootmem, ruby_system, system): if bootmem is not None: rom_dir_cntrl = Directory_Controller() - rom_dir_cntrl.directory = RubyDirectoryMemory() + rom_dir_cntrl.directory = RubyDirectoryMemory( + block_size=ruby_system.block_size_bytes + ) rom_dir_cntrl.ruby_system = ruby_system rom_dir_cntrl.version = i + 1 rom_dir_cntrl.memory = bootmem.port diff --git a/src/cpu/testers/gpu_ruby_test/TesterThread.py b/src/cpu/testers/gpu_ruby_test/TesterThread.py index 49388a76e1..6ddfc66ddc 100644 --- a/src/cpu/testers/gpu_ruby_test/TesterThread.py +++ b/src/cpu/testers/gpu_ruby_test/TesterThread.py @@ -41,3 +41,4 @@ class TesterThread(ClockedObject): thread_id = Param.Int("Unique TesterThread ID") num_lanes = Param.Int("Number of lanes this thread has") deadlock_threshold = Param.Cycles(1000000000, "Deadlock threshold") + cache_line_size = Param.UInt32("Size of cache line in cache") diff --git a/src/cpu/testers/gpu_ruby_test/address_manager.cc b/src/cpu/testers/gpu_ruby_test/address_manager.cc index a0c0670a8f..83d8a1a277 100644 --- a/src/cpu/testers/gpu_ruby_test/address_manager.cc +++ b/src/cpu/testers/gpu_ruby_test/address_manager.cc @@ -64,7 +64,9 @@ AddressManager::AddressManager(int n_atomic_locs, int n_normal_locs_per_atomic) std::shuffle( randAddressMap.begin(), randAddressMap.end(), - std::default_random_engine(random_mt.random(0,UINT_MAX)) + // TODO: This is a bug unrelated to this draft PR but the GPU tester is + // useful for testing this PR. + std::default_random_engine(random_mt.random(0,UINT_MAX-1)) ); // initialize atomic locations diff --git a/src/cpu/testers/gpu_ruby_test/dma_thread.cc b/src/cpu/testers/gpu_ruby_test/dma_thread.cc index 1d6f46c44b..2c4c610c51 100644 --- a/src/cpu/testers/gpu_ruby_test/dma_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/dma_thread.cc @@ -70,7 +70,7 @@ DmaThread::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -127,7 +127,7 @@ DmaThread::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -211,7 +211,7 @@ DmaThread::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s -" " Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - resp_cmd.toString(), ruby::printAddress(addr)); + resp_cmd.toString(), printAddress(addr)); if (resp_cmd == MemCmd::SwapResp) { // response to a pending atomic diff --git a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc index ae4078ee6c..516e77ddae 100644 --- a/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc +++ b/src/cpu/testers/gpu_ruby_test/gpu_wavefront.cc @@ -67,7 +67,7 @@ GpuWavefront::issueLoadOps() Addr address = addrManager->getAddress(location); DPRINTF(ProtocolTest, "%s Episode %d: Issuing Load - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); int load_size = sizeof(Value); @@ -124,7 +124,7 @@ GpuWavefront::issueStoreOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Store - Addr %s - " "Value %d\n", this->getName(), - curEpisode->getEpisodeId(), ruby::printAddress(address), + curEpisode->getEpisodeId(), printAddress(address), new_value); auto req = std::make_shared(address, sizeof(Value), @@ -178,7 +178,7 @@ GpuWavefront::issueAtomicOps() DPRINTF(ProtocolTest, "%s Episode %d: Issuing Atomic_Inc - Addr %s\n", this->getName(), curEpisode->getEpisodeId(), - ruby::printAddress(address)); + printAddress(address)); // must be aligned with store size assert(address % sizeof(Value) == 0); @@ -268,7 +268,7 @@ GpuWavefront::hitCallback(PacketPtr pkt) DPRINTF(ProtocolTest, "%s Episode %d: hitCallback - Command %s - " "Addr %s\n", this->getName(), curEpisode->getEpisodeId(), resp_cmd.toString(), - ruby::printAddress(addr)); + printAddress(addr)); // whether the transaction is done after this hitCallback bool isTransactionDone = true; diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.cc b/src/cpu/testers/gpu_ruby_test/tester_thread.cc index ce3a1bccc6..dbcfba8c3c 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.cc +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.cc @@ -43,6 +43,7 @@ TesterThread::TesterThread(const Params &p) : ClockedObject(p), threadEvent(this, "TesterThread tick"), deadlockCheckEvent(this), + cacheLineSize(p.cache_line_size), threadId(p.thread_id), numLanes(p.num_lanes), tester(nullptr), addrManager(nullptr), port(nullptr), @@ -383,7 +384,7 @@ TesterThread::validateAtomicResp(Location loc, int lane, Value ret_val) ss << threadName << ": Atomic Op returned unexpected value\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tAtomic Op's return value " << ret_val << "\n"; // print out basic info @@ -409,7 +410,7 @@ TesterThread::validateLoadResp(Location loc, int lane, Value ret_val) << "\tTesterThread " << threadId << "\n" << "\tEpisode " << curEpisode->getEpisodeId() << "\n" << "\tLane ID " << lane << "\n" - << "\tAddress " << ruby::printAddress(addr) << "\n" + << "\tAddress " << printAddress(addr) << "\n" << "\tLoaded value " << ret_val << "\n" << "\tLast writer " << addrManager->printLastWriter(loc) << "\n"; @@ -467,7 +468,7 @@ TesterThread::printOutstandingReqs(const OutstandingReqTable& table, for (const auto& m : table) { for (const auto& req : m.second) { - ss << "\t\t\tAddr " << ruby::printAddress(m.first) + ss << "\t\t\tAddr " << printAddress(m.first) << ": delta (curCycle - issueCycle) = " << (cur_cycle - req.issueCycle) << std::endl; } @@ -488,4 +489,10 @@ TesterThread::printAllOutstandingReqs(std::stringstream& ss) const << pendingFenceCount << std::endl; } +std::string +TesterThread::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, cacheLineSize * 8); +} + } // namespace gem5 diff --git a/src/cpu/testers/gpu_ruby_test/tester_thread.hh b/src/cpu/testers/gpu_ruby_test/tester_thread.hh index 9877d63c24..f31a5a3dea 100644 --- a/src/cpu/testers/gpu_ruby_test/tester_thread.hh +++ b/src/cpu/testers/gpu_ruby_test/tester_thread.hh @@ -132,6 +132,7 @@ class TesterThread : public ClockedObject {} }; + int cacheLineSize; // the unique global id of this thread int threadId; // width of this thread (1 for cpu thread & wf size for gpu wavefront) @@ -204,6 +205,7 @@ class TesterThread : public ClockedObject void printOutstandingReqs(const OutstandingReqTable& table, std::stringstream& ss) const; + std::string printAddress(Addr addr) const; }; } // namespace gem5 diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc index 5a83d9ca27..b9c777526a 100644 --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -124,7 +124,8 @@ Check::initiatePrefetch() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "successfully initiated prefetch.\n"); @@ -161,7 +162,8 @@ Check::initiateFlush() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating Flush - successful\n"); @@ -207,7 +209,8 @@ Check::initiateAction() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(writeAddr, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating action - successful\n"); @@ -261,7 +264,8 @@ Check::initiateCheck() // push the subblock onto the sender state. The sequencer will // update the subblock on the return - pkt->senderState = new SenderState(m_address, req->getSize()); + pkt->senderState = new SenderState(m_address, req->getSize(), + CACHE_LINE_BITS); if (port->sendTimingReq(pkt)) { DPRINTF(RubyTest, "initiating check - successful\n"); @@ -291,7 +295,9 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) // This isn't exactly right since we now have multi-byte checks // assert(getAddress() == address); - assert(ruby::makeLineAddress(m_address) == ruby::makeLineAddress(address)); + int block_size_bits = CACHE_LINE_BITS; + assert(ruby::makeLineAddress(m_address, block_size_bits) == + ruby::makeLineAddress(address, block_size_bits)); assert(data != NULL); DPRINTF(RubyTest, "RubyTester Callback\n"); @@ -342,7 +348,7 @@ Check::performCallback(ruby::NodeID proc, ruby::SubBlock* data, Cycles curTime) } DPRINTF(RubyTest, "proc: %d, Address: 0x%x\n", proc, - ruby::makeLineAddress(m_address)); + ruby::makeLineAddress(m_address, block_size_bits)); DPRINTF(RubyTest, "Callback done\n"); debugPrint(); } diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh index 78e2bda77e..0270b800d7 100644 --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -47,6 +47,7 @@ class SubBlock; const int CHECK_SIZE_BITS = 2; const int CHECK_SIZE = (1 << CHECK_SIZE_BITS); +const int CACHE_LINE_BITS = 6; class Check { diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh index 9397126180..d306c405ef 100644 --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -90,7 +90,9 @@ class RubyTester : public ClockedObject { ruby::SubBlock subBlock; - SenderState(Addr addr, int size) : subBlock(addr, size) {} + SenderState(Addr addr, int size, int cl_size) + : subBlock(addr, size, cl_size) + {} }; diff --git a/src/mem/ruby/common/Address.cc b/src/mem/ruby/common/Address.cc index fcf291af51..8b120324c7 100644 --- a/src/mem/ruby/common/Address.cc +++ b/src/mem/ruby/common/Address.cc @@ -51,37 +51,33 @@ maskLowOrderBits(Addr addr, unsigned int number) } Addr -getOffset(Addr addr) +getOffset(Addr addr, int cacheLineBits) { - return bitSelect(addr, 0, RubySystem::getBlockSizeBits() - 1); -} - -Addr -makeLineAddress(Addr addr) -{ - return mbits(addr, 63, RubySystem::getBlockSizeBits()); + assert(cacheLineBits < 64); + return bitSelect(addr, 0, cacheLineBits - 1); } Addr makeLineAddress(Addr addr, int cacheLineBits) { + assert(cacheLineBits < 64); return maskLowOrderBits(addr, cacheLineBits); } // returns the next stride address based on line address Addr -makeNextStrideAddress(Addr addr, int stride) +makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes) { - return makeLineAddress(addr) + - static_cast(RubySystem::getBlockSizeBytes()) * stride; + return makeLineAddress(addr, floorLog2(cacheLineBytes)) + + cacheLineBytes * stride; } std::string -printAddress(Addr addr) +printAddress(Addr addr, int cacheLineBits) { std::stringstream out; out << "[" << std::hex << "0x" << addr << "," << " line 0x" - << makeLineAddress(addr) << std::dec << "]"; + << makeLineAddress(addr, cacheLineBits) << std::dec << "]"; return out.str(); } diff --git a/src/mem/ruby/common/Address.hh b/src/mem/ruby/common/Address.hh index 565c3c1fb7..51e0b5417a 100644 --- a/src/mem/ruby/common/Address.hh +++ b/src/mem/ruby/common/Address.hh @@ -33,6 +33,7 @@ #include #include +#include "base/intmath.hh" #include "base/types.hh" namespace gem5 @@ -44,11 +45,10 @@ namespace ruby // selects bits inclusive Addr bitSelect(Addr addr, unsigned int small, unsigned int big); Addr maskLowOrderBits(Addr addr, unsigned int number); -Addr getOffset(Addr addr); -Addr makeLineAddress(Addr addr); +Addr getOffset(Addr addr, int cacheLineBits); Addr makeLineAddress(Addr addr, int cacheLineBits); -Addr makeNextStrideAddress(Addr addr, int stride); -std::string printAddress(Addr addr); +Addr makeNextStrideAddress(Addr addr, int stride, int cacheLineBytes); +std::string printAddress(Addr addr, int cacheLineBits); } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc index 8f47d0026b..bbc0fd21c8 100644 --- a/src/mem/ruby/common/DataBlock.cc +++ b/src/mem/ruby/common/DataBlock.cc @@ -40,8 +40,8 @@ #include "mem/ruby/common/DataBlock.hh" +#include "mem/ruby/common/Address.hh" #include "mem/ruby/common/WriteMask.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -51,17 +51,22 @@ namespace ruby DataBlock::DataBlock(const DataBlock &cp) { + assert(cp.isAlloc()); + assert(cp.getBlockSize() > 0); + assert(!m_alloc); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); - m_data = new uint8_t[block_bytes]; - memcpy(m_data, cp.m_data, block_bytes); + m_block_size = cp.getBlockSize(); + m_data = new uint8_t[m_block_size]; + memcpy(m_data, cp.m_data, m_block_size); m_alloc = true; + m_block_size = m_block_size; // If this data block is involved in an atomic operation, the effect // of applying the atomic operations on the data block are recorded in // m_atomicLog. If so, we must copy over every entry in the change log for (size_t i = 0; i < cp.m_atomicLog.size(); i++) { - block_update = new uint8_t[block_bytes]; - memcpy(block_update, cp.m_atomicLog[i], block_bytes); + block_update = new uint8_t[m_block_size]; + memcpy(block_update, cp.m_atomicLog[i], m_block_size); m_atomicLog.push_back(block_update); } } @@ -69,21 +74,44 @@ DataBlock::DataBlock(const DataBlock &cp) void DataBlock::alloc() { - m_data = new uint8_t[RubySystem::getBlockSizeBytes()]; + assert(!m_alloc); + + if (!m_block_size) { + return; + } + + m_data = new uint8_t[m_block_size]; m_alloc = true; clear(); } +void +DataBlock::realloc(int blk_size) +{ + m_block_size = blk_size; + assert(m_block_size > 0); + + if (m_alloc) { + delete [] m_data; + m_alloc = false; + } + alloc(); +} + void DataBlock::clear() { - memset(m_data, 0, RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + memset(m_data, 0, m_block_size); } bool DataBlock::equal(const DataBlock& obj) const { - size_t block_bytes = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + size_t block_bytes = m_block_size; // Check that the block contents match if (memcmp(m_data, obj.m_data, block_bytes)) { return false; @@ -102,7 +130,9 @@ DataBlock::equal(const DataBlock& obj) const void DataBlock::copyPartial(const DataBlock &dblk, const WriteMask &mask) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { if (mask.getMask(i, 1)) { m_data[i] = dblk.m_data[i]; } @@ -113,7 +143,9 @@ void DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, bool isAtomicNoReturn) { - for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) { + assert(m_alloc); + assert(m_block_size > 0); + for (int i = 0; i < m_block_size; i++) { m_data[i] = dblk.m_data[i]; } mask.performAtomic(m_data, m_atomicLog, isAtomicNoReturn); @@ -122,7 +154,9 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask, void DataBlock::print(std::ostream& out) const { - int size = RubySystem::getBlockSizeBytes(); + assert(m_alloc); + assert(m_block_size > 0); + int size = m_block_size; out << "[ "; for (int i = 0; i < size; i++) { out << std::setw(2) << std::setfill('0') << std::hex @@ -147,6 +181,7 @@ DataBlock::popAtomicLogEntryFront() void DataBlock::clearAtomicLogEntries() { + assert(m_alloc); for (auto log : m_atomicLog) { delete [] log; } @@ -156,35 +191,59 @@ DataBlock::clearAtomicLogEntries() const uint8_t* DataBlock::getData(int offset, int len) const { - assert(offset + len <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + assert(offset + len <= m_block_size); return &m_data[offset]; } uint8_t* DataBlock::getDataMod(int offset) { + assert(m_alloc); return &m_data[offset]; } void DataBlock::setData(const uint8_t *data, int offset, int len) { + assert(m_alloc); memcpy(&m_data[offset], data, len); } void DataBlock::setData(PacketPtr pkt) { - int offset = getOffset(pkt->getAddr()); - assert(offset + pkt->getSize() <= RubySystem::getBlockSizeBytes()); + assert(m_alloc); + assert(m_block_size > 0); + int offset = getOffset(pkt->getAddr(), floorLog2(m_block_size)); + assert(offset + pkt->getSize() <= m_block_size); pkt->writeData(&m_data[offset]); } DataBlock & DataBlock::operator=(const DataBlock & obj) { + // Reallocate if needed + if (m_alloc && m_block_size != obj.getBlockSize()) { + delete [] m_data; + m_block_size = obj.getBlockSize(); + alloc(); + } else if (!m_alloc) { + m_block_size = obj.getBlockSize(); + alloc(); + + // Assume this will be realloc'd later if zero. + if (m_block_size == 0) { + return *this; + } + } else { + assert(m_alloc && m_block_size == obj.getBlockSize()); + } + assert(m_block_size > 0); + uint8_t *block_update; - size_t block_bytes = RubySystem::getBlockSizeBytes(); + size_t block_bytes = m_block_size; // Copy entire block contents from obj to current block memcpy(m_data, obj.m_data, block_bytes); // If this data block is involved in an atomic operation, the effect diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh index 7456a25f3f..ebfa7d1383 100644 --- a/src/mem/ruby/common/DataBlock.hh +++ b/src/mem/ruby/common/DataBlock.hh @@ -61,8 +61,14 @@ class WriteMask; class DataBlock { public: - DataBlock() + // Ideally this should nost be called. We allow default so that protocols + // do not need to be changed. + DataBlock() = default; + + DataBlock(int blk_size) { + assert(!m_alloc); + m_block_size = blk_size; alloc(); } @@ -101,10 +107,16 @@ class DataBlock bool equal(const DataBlock& obj) const; void print(std::ostream& out) const; + int getBlockSize() const { return m_block_size; } + void setBlockSize(int block_size) { realloc(block_size); } + bool isAlloc() const { return m_alloc; } + void realloc(int blk_size); + private: void alloc(); - uint8_t *m_data; - bool m_alloc; + uint8_t *m_data = nullptr; + bool m_alloc = false; + int m_block_size = 0; // Tracks block changes when atomic ops are applied std::deque m_atomicLog; @@ -124,18 +136,21 @@ DataBlock::assign(uint8_t *data) inline uint8_t DataBlock::getByte(int whichByte) const { + assert(m_alloc); return m_data[whichByte]; } inline void DataBlock::setByte(int whichByte, uint8_t data) { + assert(m_alloc); m_data[whichByte] = data; } inline void DataBlock::copyPartial(const DataBlock & dblk, int offset, int len) { + assert(m_alloc); setData(&dblk.m_data[offset], offset, len); } diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc index ba64f2febd..944315b97f 100644 --- a/src/mem/ruby/common/NetDest.cc +++ b/src/mem/ruby/common/NetDest.cc @@ -30,6 +30,8 @@ #include +#include "mem/ruby/system/RubySystem.hh" + namespace gem5 { @@ -38,12 +40,18 @@ namespace ruby NetDest::NetDest() { - resize(); +} + +NetDest::NetDest(RubySystem *ruby_system) + : m_ruby_system(ruby_system) +{ + resize(); } void NetDest::add(MachineID newElement) { + assert(m_bits.size() > 0); assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize()); m_bits[vecIndex(newElement)].add(bitIndex(newElement.num)); } @@ -51,6 +59,7 @@ NetDest::add(MachineID newElement) void NetDest::addNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].addSet(netDest.m_bits[i]); @@ -60,6 +69,8 @@ NetDest::addNetDest(const NetDest& netDest) void NetDest::setNetDest(MachineType machine, const Set& set) { + assert(m_ruby_system != nullptr); + // assure that there is only one set of destinations for this machine assert(MachineType_base_level((MachineType)(machine + 1)) - MachineType_base_level(machine) == 1); @@ -69,12 +80,14 @@ NetDest::setNetDest(MachineType machine, const Set& set) void NetDest::remove(MachineID oldElement) { + assert(m_bits.size() > 0); m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num)); } void NetDest::removeNetDest(const NetDest& netDest) { + assert(m_bits.size() > 0); assert(m_bits.size() == netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].removeSet(netDest.m_bits[i]); @@ -84,6 +97,7 @@ NetDest::removeNetDest(const NetDest& netDest) void NetDest::clear() { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { m_bits[i].clear(); } @@ -101,6 +115,8 @@ NetDest::broadcast() void NetDest::broadcast(MachineType machineType) { + assert(m_ruby_system != nullptr); + for (NodeID i = 0; i < MachineType_base_count(machineType); i++) { MachineID mach = {machineType, i}; add(mach); @@ -111,6 +127,9 @@ NetDest::broadcast(MachineType machineType) std::vector NetDest::getAllDest() { + assert(m_ruby_system != nullptr); + assert(m_bits.size() > 0); + std::vector dest; dest.clear(); for (int i = 0; i < m_bits.size(); i++) { @@ -127,6 +146,8 @@ NetDest::getAllDest() int NetDest::count() const { + assert(m_bits.size() > 0); + int counter = 0; for (int i = 0; i < m_bits.size(); i++) { counter += m_bits[i].count(); @@ -137,12 +158,14 @@ NetDest::count() const NodeID NetDest::elementAt(MachineID index) { + assert(m_bits.size() > 0); return m_bits[vecIndex(index)].elementAt(bitIndex(index.num)); } MachineID NetDest::smallestElement() const { + assert(m_bits.size() > 0); assert(count() > 0); for (int i = 0; i < m_bits.size(); i++) { for (NodeID j = 0; j < m_bits[i].getSize(); j++) { @@ -158,6 +181,9 @@ NetDest::smallestElement() const MachineID NetDest::smallestElement(MachineType machine) const { + assert(m_bits.size() > 0); + assert(m_ruby_system != nullptr); + int size = m_bits[MachineType_base_level(machine)].getSize(); for (NodeID j = 0; j < size; j++) { if (m_bits[MachineType_base_level(machine)].isElement(j)) { @@ -173,6 +199,7 @@ NetDest::smallestElement(MachineType machine) const bool NetDest::isBroadcast() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isBroadcast()) { return false; @@ -185,6 +212,7 @@ NetDest::isBroadcast() const bool NetDest::isEmpty() const { + assert(m_bits.size() > 0); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].isEmpty()) { return false; @@ -197,8 +225,9 @@ NetDest::isEmpty() const NetDest NetDest::OR(const NetDest& orNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == orNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].OR(orNetDest.m_bits[i]); } @@ -209,8 +238,9 @@ NetDest::OR(const NetDest& orNetDest) const NetDest NetDest::AND(const NetDest& andNetDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == andNetDest.getSize()); - NetDest result; + NetDest result(m_ruby_system); for (int i = 0; i < m_bits.size(); i++) { result.m_bits[i] = m_bits[i].AND(andNetDest.m_bits[i]); } @@ -221,6 +251,7 @@ NetDest::AND(const NetDest& andNetDest) const bool NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const { + assert(m_bits.size() > 0); assert(m_bits.size() == other_netDest.getSize()); for (int i = 0; i < m_bits.size(); i++) { if (!m_bits[i].intersectionIsEmpty(other_netDest.m_bits[i])) { @@ -233,6 +264,7 @@ NetDest::intersectionIsNotEmpty(const NetDest& other_netDest) const bool NetDest::isSuperset(const NetDest& test) const { + assert(m_bits.size() > 0); assert(m_bits.size() == test.getSize()); for (int i = 0; i < m_bits.size(); i++) { @@ -246,12 +278,15 @@ NetDest::isSuperset(const NetDest& test) const bool NetDest::isElement(MachineID element) const { + assert(m_bits.size() > 0); return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num)); } void NetDest::resize() { + assert(m_ruby_system != nullptr); + m_bits.resize(MachineType_base_level(MachineType_NUM)); assert(m_bits.size() == MachineType_NUM); @@ -263,6 +298,7 @@ NetDest::resize() void NetDest::print(std::ostream& out) const { + assert(m_bits.size() > 0); out << "[NetDest (" << m_bits.size() << ") "; for (int i = 0; i < m_bits.size(); i++) { @@ -277,6 +313,7 @@ NetDest::print(std::ostream& out) const bool NetDest::isEqual(const NetDest& n) const { + assert(m_bits.size() > 0); assert(m_bits.size() == n.m_bits.size()); for (unsigned int i = 0; i < m_bits.size(); ++i) { if (!m_bits[i].isEqual(n.m_bits[i])) @@ -285,5 +322,19 @@ NetDest::isEqual(const NetDest& n) const return true; } +int +NetDest::MachineType_base_count(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(obj); +} + +int +NetDest::MachineType_base_number(const MachineType& obj) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh index e71b876754..83f340a478 100644 --- a/src/mem/ruby/common/NetDest.hh +++ b/src/mem/ruby/common/NetDest.hh @@ -41,6 +41,8 @@ namespace gem5 namespace ruby { +class RubySystem; + // NetDest specifies the network destination of a Message class NetDest { @@ -48,6 +50,7 @@ class NetDest // Constructors // creates and empty set NetDest(); + NetDest(RubySystem *ruby_system); explicit NetDest(int bit_size); NetDest& operator=(const Set& obj); @@ -98,6 +101,8 @@ class NetDest void print(std::ostream& out) const; + void setRubySystem(RubySystem *rs) { m_ruby_system = rs; resize(); } + private: // returns a value >= MachineType_base_level("this machine") // and < MachineType_base_level("next highest machine") @@ -112,6 +117,12 @@ class NetDest NodeID bitIndex(NodeID index) const { return index; } std::vector m_bits; // a vector of bit vectors - i.e. Sets + + // Needed to call MacheinType_base_count/level + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/common/SubBlock.cc b/src/mem/ruby/common/SubBlock.cc index 92cfd8b633..be0adc1233 100644 --- a/src/mem/ruby/common/SubBlock.cc +++ b/src/mem/ruby/common/SubBlock.cc @@ -38,13 +38,14 @@ namespace ruby using stl_helpers::operator<<; -SubBlock::SubBlock(Addr addr, int size) +SubBlock::SubBlock(Addr addr, int size, int cl_bits) { m_address = addr; resize(size); for (int i = 0; i < size; i++) { setByte(i, 0); } + m_cache_line_bits = cl_bits; } void @@ -52,7 +53,7 @@ SubBlock::internalMergeFrom(const DataBlock& data) { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { this->setByte(i, data.getByte(offset + i)); } @@ -63,7 +64,7 @@ SubBlock::internalMergeTo(DataBlock& data) const { int size = getSize(); assert(size > 0); - int offset = getOffset(m_address); + int offset = getOffset(m_address, m_cache_line_bits); for (int i = 0; i < size; i++) { // This will detect crossing a cache line boundary data.setByte(offset + i, this->getByte(i)); diff --git a/src/mem/ruby/common/SubBlock.hh b/src/mem/ruby/common/SubBlock.hh index e1a83600c2..3790bbac58 100644 --- a/src/mem/ruby/common/SubBlock.hh +++ b/src/mem/ruby/common/SubBlock.hh @@ -45,7 +45,7 @@ class SubBlock { public: SubBlock() { } - SubBlock(Addr addr, int size); + SubBlock(Addr addr, int size, int cl_bits); ~SubBlock() { } Addr getAddress() const { return m_address; } @@ -74,6 +74,7 @@ class SubBlock // Data Members (m_ prefix) Addr m_address; std::vector m_data; + int m_cache_line_bits; }; inline std::ostream& diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc index 1fa03c951e..f176aec9fc 100644 --- a/src/mem/ruby/common/WriteMask.cc +++ b/src/mem/ruby/common/WriteMask.cc @@ -39,13 +39,13 @@ namespace ruby { WriteMask::WriteMask() - : mSize(RubySystem::getBlockSizeBytes()), mMask(mSize, false), - mAtomic(false) + : mSize(0), mMask(mSize, false), mAtomic(false) {} void WriteMask::print(std::ostream& out) const { + assert(mSize > 0); std::string str(mSize,'0'); for (int i = 0; i < mSize; i++) { str[i] = mMask[i] ? ('1') : ('0'); @@ -59,6 +59,7 @@ void WriteMask::performAtomic(uint8_t * p, std::deque& log, bool isAtomicNoReturn) const { + assert(mSize > 0); int offset; uint8_t *block_update; // Here, operations occur in FIFO order from the mAtomicOp diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh index 8c6b8ce976..e620997cd8 100644 --- a/src/mem/ruby/common/WriteMask.hh +++ b/src/mem/ruby/common/WriteMask.hh @@ -78,6 +78,17 @@ class WriteMask ~WriteMask() {} + int getBlockSize() const { return mSize; } + void + setBlockSize(int size) + { + // This should only be used once if the default ctor was used. Probably + // by src/mem/ruby/protocol/RubySlicc_MemControl.sm. + assert(mSize == 0); + assert(size > 0); + mSize = size; + } + void clear() { @@ -87,6 +98,7 @@ class WriteMask bool test(int offset) const { + assert(mSize > 0); assert(offset < mSize); return mMask[offset]; } @@ -94,6 +106,7 @@ class WriteMask void setMask(int offset, int len, bool val = true) { + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { mMask[offset + i] = val; @@ -102,6 +115,7 @@ class WriteMask void fillMask() { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { mMask[i] = true; } @@ -111,6 +125,7 @@ class WriteMask getMask(int offset, int len) const { bool tmp = true; + assert(mSize > 0); assert(mSize >= (offset + len)); for (int i = 0; i < len; i++) { tmp = tmp & mMask.at(offset + i); @@ -122,6 +137,7 @@ class WriteMask isOverlap(const WriteMask &readMask) const { bool tmp = false; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -135,6 +151,7 @@ class WriteMask containsMask(const WriteMask &readMask) const { bool tmp = true; + assert(mSize > 0); assert(mSize == readMask.mSize); for (int i = 0; i < mSize; i++) { if (readMask.mMask.at(i)) { @@ -146,6 +163,7 @@ class WriteMask bool isEmpty() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (mMask.at(i)) { return false; @@ -157,6 +175,7 @@ class WriteMask bool isFull() const { + assert(mSize > 0); for (int i = 0; i < mSize; i++) { if (!mMask.at(i)) { return false; @@ -168,6 +187,7 @@ class WriteMask void andMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) && (writeMask.mMask.at(i)); @@ -182,6 +202,7 @@ class WriteMask void orMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = (mMask.at(i)) || (writeMask.mMask.at(i)); @@ -196,6 +217,7 @@ class WriteMask void setInvertedMask(const WriteMask & writeMask) { + assert(mSize > 0); assert(mSize == writeMask.mSize); for (int i = 0; i < mSize; i++) { mMask[i] = !writeMask.mMask.at(i); @@ -205,6 +227,7 @@ class WriteMask int firstBitSet(bool val, int offset = 0) const { + assert(mSize > 0); for (int i = offset; i < mSize; ++i) if (mMask[i] == val) return i; @@ -214,6 +237,7 @@ class WriteMask int count(int offset = 0) const { + assert(mSize > 0); int count = 0; for (int i = offset; i < mSize; ++i) count += mMask[i]; diff --git a/src/mem/ruby/network/MessageBuffer.cc b/src/mem/ruby/network/MessageBuffer.cc index 9a4439a538..8b3a724469 100644 --- a/src/mem/ruby/network/MessageBuffer.cc +++ b/src/mem/ruby/network/MessageBuffer.cc @@ -47,7 +47,6 @@ #include "base/random.hh" #include "base/stl_helpers.hh" #include "debug/RubyQueue.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -216,6 +215,7 @@ random_time() void MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO) { // record current time incase we have a pop that also adjusts my size @@ -237,7 +237,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, // is turned on and this buffer allows it if ((m_randomization == MessageRandomization::disabled) || ((m_randomization == MessageRandomization::ruby_system) && - !RubySystem::getRandomization())) { + !ruby_is_random)) { // No randomization arrival_time = current_time + delta; } else { @@ -265,7 +265,7 @@ MessageBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, } // If running a cache trace, don't worry about the last arrival checks - if (!RubySystem::getWarmupEnabled()) { + if (!ruby_warmup) { m_last_arrival_time = arrival_time; } @@ -447,7 +447,6 @@ MessageBuffer::stallMessage(Addr addr, Tick current_time) { DPRINTF(RubyQueue, "Stalling due to %#x\n", addr); assert(isReady(current_time)); - assert(getOffset(addr) == 0); MsgPtr message = m_prio_heap.front(); // Since the message will just be moved to stall map, indicate that the @@ -479,7 +478,8 @@ MessageBuffer::deferEnqueueingMessage(Addr addr, MsgPtr message) } void -MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) +MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup) { assert(!isDeferredMsgMapEmpty(addr)); std::vector& msg_vec = m_deferred_msg_map[addr]; @@ -487,7 +487,7 @@ MessageBuffer::enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay) // enqueue all deferred messages associated with this address for (MsgPtr m : msg_vec) { - enqueue(m, curTime, delay); + enqueue(m, curTime, delay, ruby_is_random, ruby_warmup); } msg_vec.clear(); diff --git a/src/mem/ruby/network/MessageBuffer.hh b/src/mem/ruby/network/MessageBuffer.hh index 03a0454433..b45e531d11 100644 --- a/src/mem/ruby/network/MessageBuffer.hh +++ b/src/mem/ruby/network/MessageBuffer.hh @@ -90,13 +90,14 @@ class MessageBuffer : public SimObject Tick readyTime() const; void - delayHead(Tick current_time, Tick delta) + delayHead(Tick current_time, Tick delta, bool ruby_is_random, + bool ruby_warmup) { MsgPtr m = m_prio_heap.front(); std::pop_heap(m_prio_heap.begin(), m_prio_heap.end(), std::greater()); m_prio_heap.pop_back(); - enqueue(m, current_time, delta); + enqueue(m, current_time, delta, ruby_is_random, ruby_warmup); } bool areNSlotsAvailable(unsigned int n, Tick curTime); @@ -124,6 +125,7 @@ class MessageBuffer : public SimObject const MsgPtr &peekMsgPtr() const { return m_prio_heap.front(); } void enqueue(MsgPtr message, Tick curTime, Tick delta, + bool ruby_is_random, bool ruby_warmup, bool bypassStrictFIFO = false); // Defer enqueueing a message to a later cycle by putting it aside and not @@ -135,7 +137,8 @@ class MessageBuffer : public SimObject // enqueue all previously deferred messages that are associated with the // input address - void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay); + void enqueueDeferredMessages(Addr addr, Tick curTime, Tick delay, + bool ruby_is_random, bool ruby_warmup); bool isDeferredMsgMapEmpty(Addr addr) const; //! Updates the delay cycles of the message at the head of the queue, diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc index 757ed9498e..480b5bcef0 100644 --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -65,7 +65,8 @@ Network::Network(const Params &p) "%s: data message size > cache line size", name()); m_data_msg_size = p.data_msg_size + m_control_msg_size; - params().ruby_system->registerNetwork(this); + m_ruby_system = p.ruby_system; + m_ruby_system->registerNetwork(this); // Populate localNodeVersions with the version of each MachineType in // this network. This will be used to compute a global to local ID. @@ -102,7 +103,8 @@ Network::Network(const Params &p) m_topology_ptr = new Topology(m_nodes, p.routers.size(), m_virtual_networks, - p.ext_links, p.int_links); + p.ext_links, p.int_links, + m_ruby_system); // Allocate to and from queues // Queues that are getting messages from protocol @@ -246,7 +248,7 @@ Network::addressToNodeID(Addr addr, MachineType mtype) } } } - return MachineType_base_count(mtype); + return m_ruby_system->MachineType_base_count(mtype); } NodeID @@ -256,5 +258,23 @@ Network::getLocalNodeID(NodeID global_id) const return globalToLocalMap.at(global_id); } +bool +Network::getRandomization() const +{ + return m_ruby_system->getRandomization(); +} + +bool +Network::getWarmupEnabled() const +{ + return m_ruby_system->getWarmupEnabled(); +} + +int +Network::MachineType_base_number(const MachineType& obj) +{ + return m_ruby_system->MachineType_base_number(obj); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/Network.hh b/src/mem/ruby/network/Network.hh index 8ca68a0279..c0d21af240 100644 --- a/src/mem/ruby/network/Network.hh +++ b/src/mem/ruby/network/Network.hh @@ -78,6 +78,7 @@ namespace ruby class NetDest; class MessageBuffer; +class RubySystem; class Network : public ClockedObject { @@ -147,6 +148,10 @@ class Network : public ClockedObject NodeID getLocalNodeID(NodeID global_id) const; + bool getRandomization() const; + bool getWarmupEnabled() const; + RubySystem *getRubySystem() const { return m_ruby_system; } + protected: // Private copy constructor and assignment operator Network(const Network& obj); @@ -176,6 +181,12 @@ class Network : public ClockedObject // Global NodeID to local node map. If there are not multiple networks in // the same RubySystem, this is a one-to-one mapping of global to local. std::unordered_map globalToLocalMap; + + // For accessing if randomization/warnup are turned on. We cannot store + // those values in the constructor in case we are constructed first. + RubySystem *m_ruby_system = nullptr; + + int MachineType_base_number(const MachineType& obj); }; inline std::ostream& diff --git a/src/mem/ruby/network/Topology.cc b/src/mem/ruby/network/Topology.cc index 39444c9023..b2cd7897f8 100644 --- a/src/mem/ruby/network/Topology.cc +++ b/src/mem/ruby/network/Topology.cc @@ -37,6 +37,7 @@ #include "mem/ruby/network/BasicLink.hh" #include "mem/ruby/network/Network.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -56,10 +57,12 @@ const int INFINITE_LATENCY = 10000; // Yes, this is a big hack Topology::Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links) - : m_nodes(MachineType_base_number(MachineType_NUM)), + const std::vector &int_links, + RubySystem *ruby_system) + : m_nodes(ruby_system->MachineType_base_number(MachineType_NUM)), m_number_of_switches(num_routers), m_vnets(num_vnets), - m_ext_link_vector(ext_links), m_int_link_vector(int_links) + m_ext_link_vector(ext_links), m_int_link_vector(int_links), + m_ruby_system(ruby_system) { // Total nodes/controllers in network assert(m_nodes > 1); @@ -78,7 +81,8 @@ Topology::Topology(uint32_t num_nodes, uint32_t num_routers, AbstractController *abs_cntrl = ext_link->params().ext_node; BasicRouter *router = ext_link->params().int_node; - int machine_base_idx = MachineType_base_number(abs_cntrl->getType()); + int machine_base_idx = + ruby_system->MachineType_base_number(abs_cntrl->getType()); int ext_idx1 = machine_base_idx + abs_cntrl->getVersion(); int ext_idx2 = ext_idx1 + m_nodes; int int_idx = router->params().router_id + 2*m_nodes; @@ -189,7 +193,7 @@ Topology::createLinks(Network *net) for (int i = 0; i < topology_weights[0].size(); i++) { for (int j = 0; j < topology_weights[0][i].size(); j++) { std::vector routingMap; - routingMap.resize(m_vnets); + routingMap.resize(m_vnets, m_ruby_system); // Not all sources and destinations are connected // by direct links. We only construct the links @@ -264,7 +268,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtInLink(src, dest - (2 * m_nodes), link, @@ -287,7 +291,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeExtOutLink(src - (2 * m_nodes), node, link, @@ -309,7 +313,7 @@ Topology::makeLink(Network *net, SwitchID src, SwitchID dest, for (int l = 0; l < links.size(); l++) { link_entry = links[l]; std::vector linkRoute; - linkRoute.resize(m_vnets); + linkRoute.resize(m_vnets, m_ruby_system); BasicLink *link = link_entry.link; if (link->mVnets.size() == 0) { net->makeInternalLink(src - (2 * m_nodes), @@ -413,16 +417,17 @@ Topology::shortest_path_to_node(SwitchID src, SwitchID next, const Matrix &weights, const Matrix &dist, int vnet) { - NetDest result; + NetDest result(m_ruby_system); int d = 0; int machines; int max_machines; machines = MachineType_NUM; - max_machines = MachineType_base_number(MachineType_NUM); + max_machines = m_ruby_system->MachineType_base_number(MachineType_NUM); for (int m = 0; m < machines; m++) { - for (NodeID i = 0; i < MachineType_base_count((MachineType)m); i++) { + for (NodeID i = 0; + i < m_ruby_system->MachineType_base_count((MachineType)m); i++) { // we use "d+max_machines" below since the "destination" // switches for the machines are numbered // [MachineType_base_number(MachineType_NUM)... diff --git a/src/mem/ruby/network/Topology.hh b/src/mem/ruby/network/Topology.hh index 301811e6ab..7ab395762a 100644 --- a/src/mem/ruby/network/Topology.hh +++ b/src/mem/ruby/network/Topology.hh @@ -80,7 +80,8 @@ class Topology public: Topology(uint32_t num_nodes, uint32_t num_routers, uint32_t num_vnets, const std::vector &ext_links, - const std::vector &int_links); + const std::vector &int_links, + RubySystem *ruby_system); uint32_t numSwitches() const { return m_number_of_switches; } void createLinks(Network *net); @@ -108,7 +109,7 @@ class Topology const Matrix &weights, const Matrix &dist, int vnet); - const uint32_t m_nodes; + uint32_t m_nodes; const uint32_t m_number_of_switches; int m_vnets; @@ -116,6 +117,8 @@ class Topology std::vector m_int_link_vector; LinkMap m_link_map; + + RubySystem *m_ruby_system = nullptr; }; inline std::ostream& diff --git a/src/mem/ruby/network/garnet/NetworkInterface.cc b/src/mem/ruby/network/garnet/NetworkInterface.cc index 31d625c4d5..8564baca6d 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.cc +++ b/src/mem/ruby/network/garnet/NetworkInterface.cc @@ -41,6 +41,7 @@ #include "mem/ruby/network/garnet/Credit.hh" #include "mem/ruby/network/garnet/flitBuffer.hh" #include "mem/ruby/slicc_interface/Message.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -244,7 +245,9 @@ NetworkInterface::wakeup() outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { // Space is available. Enqueue to protocol buffer. outNode_ptr[vnet]->enqueue(t_flit->get_msg_ptr(), curTime, - cyclesToTicks(Cycles(1))); + cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Simply send a credit back since we are not buffering // this flit in the NI @@ -332,7 +335,9 @@ NetworkInterface::checkStallQueue() if (outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { outNode_ptr[vnet]->enqueue(stallFlit->get_msg_ptr(), - curTime, cyclesToTicks(Cycles(1))); + curTime, cyclesToTicks(Cycles(1)), + m_net_ptr->getRandomization(), + m_net_ptr->getWarmupEnabled()); // Send back a credit with free signal now that the // VC is no longer stalled. @@ -699,6 +704,12 @@ NetworkInterface::functionalWrite(Packet *pkt) return num_functional_writes; } +int +NetworkInterface::MachineType_base_number(const MachineType& obj) +{ + return m_net_ptr->getRubySystem()->MachineType_base_number(obj); +} + } // namespace garnet } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/network/garnet/NetworkInterface.hh b/src/mem/ruby/network/garnet/NetworkInterface.hh index d42db5ee2a..cd7bb3b171 100644 --- a/src/mem/ruby/network/garnet/NetworkInterface.hh +++ b/src/mem/ruby/network/garnet/NetworkInterface.hh @@ -306,6 +306,8 @@ class NetworkInterface : public ClockedObject, public Consumer InputPort *getInportForVnet(int vnet); OutputPort *getOutportForVnet(int vnet); + + int MachineType_base_number(const MachineType& obj); }; } // namespace garnet diff --git a/src/mem/ruby/network/simple/PerfectSwitch.cc b/src/mem/ruby/network/simple/PerfectSwitch.cc index 74d78e3aae..20d57f04be 100644 --- a/src/mem/ruby/network/simple/PerfectSwitch.cc +++ b/src/mem/ruby/network/simple/PerfectSwitch.cc @@ -268,7 +268,8 @@ PerfectSwitch::operateMessageBuffer(MessageBuffer *buffer, int vnet) buffer->getIncomingLink(), vnet, outgoing, vnet); out_port.buffers[vnet]->enqueue(msg_ptr, current_time, - out_port.latency); + out_port.latency, m_switch->getNetPtr()->getRandomization(), + m_switch->getNetPtr()->getWarmupEnabled()); } } } diff --git a/src/mem/ruby/network/simple/Switch.hh b/src/mem/ruby/network/simple/Switch.hh index 86abfda871..e6e22022bc 100644 --- a/src/mem/ruby/network/simple/Switch.hh +++ b/src/mem/ruby/network/simple/Switch.hh @@ -104,6 +104,7 @@ class Switch : public BasicRouter void print(std::ostream& out) const; void init_net_ptr(SimpleNetwork* net_ptr) { m_network_ptr = net_ptr; } + SimpleNetwork* getNetPtr() const { return m_network_ptr; } bool functionalRead(Packet *); bool functionalRead(Packet *, WriteMask&); diff --git a/src/mem/ruby/network/simple/Throttle.cc b/src/mem/ruby/network/simple/Throttle.cc index 20cebccabb..fc5649330f 100644 --- a/src/mem/ruby/network/simple/Throttle.cc +++ b/src/mem/ruby/network/simple/Throttle.cc @@ -199,7 +199,9 @@ Throttle::operateVnet(int vnet, int channel, int &total_bw_remaining, // Move the message in->dequeue(current_time); out->enqueue(msg_ptr, current_time, - m_switch->cyclesToTicks(m_link_latency)); + m_switch->cyclesToTicks(m_link_latency), + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); // Count the message (*(throttleStats. diff --git a/src/mem/ruby/profiler/AddressProfiler.cc b/src/mem/ruby/profiler/AddressProfiler.cc index 05fc486c63..ce40c35a9f 100644 --- a/src/mem/ruby/profiler/AddressProfiler.cc +++ b/src/mem/ruby/profiler/AddressProfiler.cc @@ -34,6 +34,7 @@ #include "base/stl_helpers.hh" #include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/protocol/RubyRequest.hh" +#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -307,7 +308,8 @@ AddressProfiler::addTraceSample(Addr data_addr, Addr pc_addr, } // record data address trace info - data_addr = makeLineAddress(data_addr); + int block_size_bits = m_profiler->m_ruby_system->getBlockSizeBits(); + data_addr = makeLineAddress(data_addr, block_size_bits); lookupTraceForAddress(data_addr, m_dataAccessTrace). update(type, access_mode, id, sharing_miss); diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm index ca606a5921..43fb96c375 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm @@ -95,7 +95,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 5d98a73041..d1e1ffb7b0 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -121,7 +121,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") } TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; int WTcnt, default="0"; int Fcnt, default="0"; bool inFlush, default="false"; diff --git a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm index bcf99ff362..ed5e40cfa1 100644 --- a/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Three_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm index 2b5935dee5..29f6d8e87d 100644 --- a/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/ruby/protocol/MESI_Two_Level-L1cache.sm @@ -167,7 +167,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Cycles ticksToCycles(Tick t); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm index 5d85ad2fc6..bac7fd1b12 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -181,7 +181,7 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -195,8 +195,8 @@ machine(MachineType:RegionBuffer, "Region Buffer for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm index 2464e038ff..3f1ba2540f 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-RegionDir.sm @@ -155,7 +155,7 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") // Stores only region addresses TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -169,8 +169,8 @@ machine(MachineType:RegionDir, "Region Directory for AMD_Base-like protocol") Cycles curCycle(); MachineID mapAddressToMachine(Addr addr, MachineType mtype); - int blockBits, default="RubySystem::getBlockSizeBits()"; - int blockBytes, default="RubySystem::getBlockSizeBytes()"; + int blockBits, default="m_ruby_system->getBlockSizeBits()"; + int blockBytes, default="m_ruby_system->getBlockSizeBytes()"; int regionBits, default="log2(m_blocksPerRegion)"; // Functions diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 17a92f5f90..5b5ab3148a 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -183,7 +183,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm index 4e9e9597aa..b53ebe8ee2 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-probeFilter.sm @@ -192,7 +192,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; - int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int TCC_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm index 4a513d6d3f..b6410d12e7 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_directory-dir.sm @@ -143,7 +143,7 @@ machine(MachineType:Directory, "Directory protocol") bool isPresent(Addr); } - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // ** OBJECTS ** TBETable TBEs, template="", constructor="m_number_of_TBEs"; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm index 865fce4e3c..24f8146a02 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-L1cache.sm @@ -198,7 +198,7 @@ machine(MachineType:L1Cache, "Token protocol") TBETable L1_TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; PersistentTable persistentTable; TimerTable useTimerTable; diff --git a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm index 7f2bdf94e0..8d035a61bb 100644 --- a/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/ruby/protocol/MOESI_CMP_token-dir.sm @@ -171,7 +171,7 @@ machine(MachineType:Directory, "Token protocol") TBETable TBEs, template="", constructor="m_number_of_TBEs"; bool starving, default="false"; - int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; + int l2_select_low_bit, default="m_ruby_system->getBlockSizeBits()"; Tick clockEdge(); Tick clockEdge(Cycles c); diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm index 8f0341f328..97770e3516 100644 --- a/src/mem/ruby/protocol/RubySlicc_Exports.sm +++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm @@ -72,6 +72,8 @@ structure(WriteMask, external="yes", desc="...") { int count(); int count(int); bool test(int); + int getBlockSize(); + void setBlockSize(int); } structure(DataBlock, external = "yes", desc="..."){ diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm index 012b169dea..848ada4d12 100644 --- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm +++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm @@ -89,7 +89,9 @@ structure(MemoryMsg, desc="...", interface="Message") { if ((MessageSize == MessageSizeType:Response_Data) || (MessageSize == MessageSizeType:Writeback_Data)) { WriteMask read_mask; - read_mask.setMask(addressOffset(addr, makeLineAddress(addr)), Len, true); + read_mask.setBlockSize(mask.getBlockSize()); + read_mask.setMask(addressOffset(addr, + makeLineAddress(addr, mask.getBlockSize())), Len, true); if (MessageSize != MessageSizeType:Writeback_Data) { read_mask.setInvertedMask(mask); } diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 4e0e4f4511..848d16491d 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -94,7 +94,7 @@ structure (Set, external = "yes", non_obj="yes") { NodeID smallestElement(); } -structure (NetDest, external = "yes", non_obj="yes") { +structure (NetDest, external = "yes", non_obj="yes", implicit_ctor="m_ruby_system") { void setSize(int); void setSize(int, int); void add(NodeID); diff --git a/src/mem/ruby/protocol/RubySlicc_Util.sm b/src/mem/ruby/protocol/RubySlicc_Util.sm index 104c7c034c..93976bc4e1 100644 --- a/src/mem/ruby/protocol/RubySlicc_Util.sm +++ b/src/mem/ruby/protocol/RubySlicc_Util.sm @@ -52,6 +52,7 @@ Addr intToAddress(int addr); int addressOffset(Addr addr, Addr base); int max_tokens(); Addr makeLineAddress(Addr addr); +Addr makeLineAddress(Addr addr, int cacheLineBits); int getOffset(Addr addr); int mod(int val, int mod); Addr bitSelect(Addr addr, int small, int big); diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index dcd142ea47..a644bbe506 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -574,7 +574,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // CacheEntry structure(CacheEntry, interface="AbstractCacheEntry") { diff --git a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm index aa27c40964..f7616e9ec4 100644 --- a/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm +++ b/src/mem/ruby/protocol/chi/CHI-dvm-misc-node.sm @@ -192,7 +192,7 @@ machine(MachineType:MiscNode, "CHI Misc Node for handling and distrbuting DVM op //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // Helper class for tracking expected response and data messages structure(ExpectedMap, external ="yes") { diff --git a/src/mem/ruby/protocol/chi/CHI-mem.sm b/src/mem/ruby/protocol/chi/CHI-mem.sm index 46f57456a5..58f22d2007 100644 --- a/src/mem/ruby/protocol/chi/CHI-mem.sm +++ b/src/mem/ruby/protocol/chi/CHI-mem.sm @@ -157,7 +157,7 @@ machine(MachineType:Memory, "Memory controller interface") : //////////////////////////////////////////////////////////////////////////// // Cache block size - int blockSize, default="RubySystem::getBlockSizeBytes()"; + int blockSize, default="m_ruby_system->getBlockSizeBytes()"; // TBE fields structure(TBE, desc="...") { diff --git a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh index 0e00a60c28..1305deddce 100644 --- a/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh +++ b/src/mem/ruby/slicc_interface/AbstractCacheEntry.hh @@ -59,6 +59,8 @@ namespace gem5 namespace ruby { +class RubySystem; + class AbstractCacheEntry : public ReplaceableEntry { private: @@ -78,16 +80,15 @@ class AbstractCacheEntry : public ReplaceableEntry // The methods below are those called by ruby runtime, add when it // is absolutely necessary and should all be virtual function. - virtual DataBlock& + [[noreturn]] virtual DataBlock& getDataBlk() { panic("getDataBlk() not implemented!"); - - // Dummy return to appease the compiler - static DataBlock b; - return b; } + virtual void initBlockSize(int block_size) { }; + virtual void setRubySystem(RubySystem *rs) { }; + int validBlocks; virtual int& getNumValidBlocks() { diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 36092387ac..0bcc662629 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -89,6 +89,9 @@ AbstractController::init() getMemReqQueue()->setConsumer(this); } + downstreamDestinations.setRubySystem(m_ruby_system); + upstreamDestinations.setRubySystem(m_ruby_system); + // Initialize the addr->downstream machine mappings. Multiple machines // in downstream_destinations can have the same address range if they have // different types. If this is the case, mapAddressToDownstreamMachine @@ -268,7 +271,7 @@ AbstractController::serviceMemoryQueue() } const MemoryMsg *mem_msg = (const MemoryMsg*)mem_queue->peek(); - unsigned int req_size = RubySystem::getBlockSizeBytes(); + unsigned int req_size = m_ruby_system->getBlockSizeBytes(); if (mem_msg->m_Len > 0) { req_size = mem_msg->m_Len; } @@ -294,7 +297,7 @@ AbstractController::serviceMemoryQueue() SenderState *s = new SenderState(mem_msg->m_Sender); pkt->pushSenderState(s); - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { // Use functional rather than timing accesses during warmup mem_queue->dequeue(clockEdge()); memoryPort.sendFunctional(pkt); @@ -382,7 +385,10 @@ AbstractController::recvTimingResp(PacketPtr pkt) return false; } - std::shared_ptr msg = std::make_shared(clockEdge()); + int blk_size = m_ruby_system->getBlockSizeBytes(); + + std::shared_ptr msg = + std::make_shared(clockEdge(), blk_size, m_ruby_system); (*msg).m_addr = pkt->getAddr(); (*msg).m_Sender = m_machineID; @@ -396,7 +402,7 @@ AbstractController::recvTimingResp(PacketPtr pkt) // Copy data from the packet (*msg).m_DataBlk.setData(pkt->getPtr(), 0, - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); } else if (pkt->isWrite()) { (*msg).m_Type = MemoryRequestType_MEMORY_WB; (*msg).m_MessageSize = MessageSizeType_Writeback_Control; @@ -404,7 +410,8 @@ AbstractController::recvTimingResp(PacketPtr pkt) panic("Incorrect packet type received from memory controller!"); } - memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + memRspQueue->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); delete pkt; return true; } @@ -471,6 +478,45 @@ AbstractController::sendRetryRespToMem() { } } +Addr +AbstractController::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +AbstractController::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +AbstractController::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +NetDest +AbstractController::broadcast(MachineType type) +{ + assert(m_ruby_system != nullptr); + NodeID type_count = m_ruby_system->MachineType_base_count(type); + + NetDest dest; + for (NodeID i = 0; i < type_count; i++) { + MachineID mach = {type, i}; + dest.add(mach); + } + return dest; +} + +int +AbstractController::machineCount(MachineType machType) +{ + assert(m_ruby_system != nullptr); + return m_ruby_system->MachineType_base_count(machType); +} + bool AbstractController::MemoryPort::recvTimingResp(PacketPtr pkt) { diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index ce6a6972af..79f67073a6 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -72,6 +72,7 @@ namespace ruby class Network; class GPUCoalescer; class DMASequencer; +class RubySystem; // used to communicate that an in_port peeked the wrong message type class RejectException: public std::exception @@ -229,6 +230,11 @@ class AbstractController : public ClockedObject, public Consumer /** List of upstream destinations (towards the CPU) */ const NetDest& allUpstreamDest() const { return upstreamDestinations; } + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: //! Profiles original cache requests including PUTs void profileRequest(const std::string &request); @@ -452,6 +458,13 @@ class AbstractController : public ClockedObject, public Consumer {} }; + RubySystem *m_ruby_system = nullptr; + + // Formerly in RubySlicc_ComponentMapping.hh. Moved here to access + // RubySystem pointer. + NetDest broadcast(MachineType type); + int machineCount(MachineType machType); + private: /** The address range to which the controller responds on the CPU side. */ const AddrRangeList addrRanges; diff --git a/src/mem/ruby/slicc_interface/Message.hh b/src/mem/ruby/slicc_interface/Message.hh index 5c824c4a38..31fb5e8e92 100644 --- a/src/mem/ruby/slicc_interface/Message.hh +++ b/src/mem/ruby/slicc_interface/Message.hh @@ -62,10 +62,12 @@ typedef std::shared_ptr MsgPtr; class Message { public: - Message(Tick curTime) - : m_time(curTime), + Message(Tick curTime, int block_size, const RubySystem *rs) + : m_block_size(block_size), + m_time(curTime), m_LastEnqueueTime(curTime), - m_DelayedTicks(0), m_msg_counter(0) + m_DelayedTicks(0), m_msg_counter(0), + p_ruby_system(rs) { } Message(const Message &other) = default; @@ -121,6 +123,9 @@ class Message int getVnet() const { return vnet; } void setVnet(int net) { vnet = net; } + protected: + int m_block_size = 0; + private: Tick m_time; Tick m_LastEnqueueTime; // my last enqueue time @@ -130,6 +135,9 @@ class Message // Variables for required network traversal int incoming_link; int vnet; + + // Needed to call MacheinType_base_count/level + const RubySystem *p_ruby_system = nullptr; }; inline bool diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh index a258a18f9a..58eae229be 100644 --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -86,11 +86,12 @@ class RubyRequest : public Message bool m_isSLCSet; bool m_isSecure; - RubyRequest(Tick curTime, uint64_t _paddr, int _len, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, ContextID _proc_id = 100, ContextID _core_id = 99) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -99,13 +100,16 @@ class RubyRequest : public Message m_Prefetch(_pb), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), m_tlbiTransactionUid(0), m_isSecure(m_pkt ? m_pkt->req->isSecure() : false) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -116,10 +120,10 @@ class RubyRequest : public Message } /** RubyRequest for memory management commands */ - RubyRequest(Tick curTime, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, ContextID _proc_id, ContextID _core_id) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(0), m_Type(_type), m_ProgramCounter(_pc), @@ -128,6 +132,8 @@ class RubyRequest : public Message m_Prefetch(PrefetchBit_No), m_pkt(_pkt), m_contextId(_core_id), + m_writeMask(block_size), + m_WTData(block_size), m_htmFromTransaction(false), m_htmTransactionUid(0), m_isTlbi(false), @@ -144,14 +150,14 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -170,7 +176,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -180,15 +187,15 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime, uint64_t _paddr, int _len, - uint64_t _pc, RubyRequestType _type, + RubyRequest(Tick curTime, int block_size, RubySystem *rs, + uint64_t _paddr, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, unsigned _proc_id, unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, std::vector< std::pair > _atomicOps, uint64_t _instSeqNum = 0) - : Message(curTime), + : Message(curTime, block_size, rs), m_PhysicalAddress(_paddr), m_Type(_type), m_ProgramCounter(_pc), @@ -207,7 +214,8 @@ class RubyRequest : public Message m_tlbiTransactionUid(0), m_isSecure(m_pkt->req->isSecure()) { - m_LineAddress = makeLineAddress(m_PhysicalAddress); + int block_size_bits = floorLog2(block_size); + m_LineAddress = makeLineAddress(m_PhysicalAddress, block_size_bits); if (_pkt) { m_isGLCSet = m_pkt->req->isGLCSet(); m_isSLCSet = m_pkt->req->isSLCSet(); @@ -218,7 +226,12 @@ class RubyRequest : public Message } } - RubyRequest(Tick curTime) : Message(curTime) {} + RubyRequest(Tick curTime, int block_size, RubySystem *rs) + : Message(curTime, block_size, rs), + m_writeMask(block_size), + m_WTData(block_size) + { + } MsgPtr clone() const { return std::shared_ptr(new RubyRequest(*this)); } diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh index 9a433d1cee..1195089fc3 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -41,17 +41,6 @@ namespace gem5 namespace ruby { -inline NetDest -broadcast(MachineType type) -{ - NetDest dest; - for (NodeID i = 0; i < MachineType_base_count(type); i++) { - MachineID mach = {type, i}; - dest.add(mach); - } - return dest; -} - inline MachineID mapAddressToRange(Addr addr, MachineType type, int low_bit, int num_bits, int cluster_id = 0) @@ -77,12 +66,6 @@ machineIDToMachineType(MachineID machID) return machID.type; } -inline int -machineCount(MachineType machType) -{ - return MachineType_base_count(machType); -} - inline MachineID createMachineID(MachineType type, NodeID id) { diff --git a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh index 8df56c7013..f4a49463a8 100644 --- a/src/mem/ruby/slicc_interface/RubySlicc_Util.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_Util.hh @@ -233,8 +233,9 @@ addressOffset(Addr addr, Addr base) inline bool testAndRead(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -259,8 +260,10 @@ testAndRead(Addr addr, DataBlock& blk, Packet *pkt) inline bool testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + assert(blk.getBlockSize() == mask.getBlockSize()); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { uint8_t *data = pkt->getPtr(); @@ -288,8 +291,9 @@ testAndReadMask(Addr addr, DataBlock& blk, WriteMask& mask, Packet *pkt) inline bool testAndWrite(Addr addr, DataBlock& blk, Packet *pkt) { - Addr pktLineAddr = makeLineAddress(pkt->getAddr()); - Addr lineAddr = makeLineAddress(addr); + int block_size_bits = floorLog2(blk.getBlockSize()); + Addr pktLineAddr = makeLineAddress(pkt->getAddr(), block_size_bits); + Addr lineAddr = makeLineAddress(addr, block_size_bits); if (pktLineAddr == lineAddr) { const uint8_t *data = pkt->getConstPtr(); diff --git a/src/mem/ruby/structures/ALUFreeListArray.cc b/src/mem/ruby/structures/ALUFreeListArray.cc index 87b5cbfbd2..3e25e5b599 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.cc +++ b/src/mem/ruby/structures/ALUFreeListArray.cc @@ -57,10 +57,10 @@ namespace ruby * - The same line has been accessed in the past accessLatency ticks */ -ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Tick access_latency) +ALUFreeListArray::ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks) { this->numALUs = num_ALUs; - this->accessLatency = access_latency; + this->accessClocks = access_clocks; } bool ALUFreeListArray::tryAccess(Addr addr) @@ -85,7 +85,7 @@ bool ALUFreeListArray::tryAccess(Addr addr) } // Block access if the line is already being used - if (record.lineAddr == makeLineAddress(addr)) { + if (record.lineAddr == makeLineAddress(addr, m_block_size_bits)) { return false; } } @@ -99,7 +99,9 @@ void ALUFreeListArray::reserve(Addr addr) // the access is valid // Add record to queue - accessQueue.push_front(AccessRecord(makeLineAddress(addr), curTick())); + accessQueue.push_front( + AccessRecord(makeLineAddress(addr, m_block_size_bits), curTick()) + ); } } // namespace ruby diff --git a/src/mem/ruby/structures/ALUFreeListArray.hh b/src/mem/ruby/structures/ALUFreeListArray.hh index bed1b00b5c..5c4fdd95f9 100644 --- a/src/mem/ruby/structures/ALUFreeListArray.hh +++ b/src/mem/ruby/structures/ALUFreeListArray.hh @@ -32,6 +32,7 @@ #include +#include "base/intmath.hh" #include "mem/ruby/common/TypeDefines.hh" #include "sim/cur_tick.hh" @@ -45,7 +46,8 @@ class ALUFreeListArray { private: unsigned int numALUs; - Tick accessLatency; + Cycles accessClocks; + Tick accessLatency = 0; class AccessRecord { @@ -62,14 +64,33 @@ class ALUFreeListArray // Queue of accesses from past accessLatency cycles std::deque accessQueue; + int m_block_size_bits = 0; + public: - ALUFreeListArray(unsigned int num_ALUs, Tick access_latency); + ALUFreeListArray(unsigned int num_ALUs, Cycles access_clocks); bool tryAccess(Addr addr); void reserve(Addr addr); - Tick getLatency() const { return accessLatency; } + Tick + getLatency() const + { + assert(accessLatency > 0); + return accessLatency; + } + + void + setClockPeriod(Tick clockPeriod) + { + accessLatency = accessClocks * clockPeriod; + } + + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } }; } // namespace ruby diff --git a/src/mem/ruby/structures/BankedArray.cc b/src/mem/ruby/structures/BankedArray.cc index 0f01d5c396..2c2202dec5 100644 --- a/src/mem/ruby/structures/BankedArray.cc +++ b/src/mem/ruby/structures/BankedArray.cc @@ -42,8 +42,7 @@ namespace ruby { BankedArray::BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs) - : m_ruby_system(rs) + unsigned int startIndexBit) { this->banks = banks; this->accessLatency = accessLatency; @@ -78,6 +77,8 @@ BankedArray::reserve(int64_t idx) if (accessLatency == 0) return; + assert(clockPeriod > 0); + unsigned int bank = mapIndexToBank(idx); assert(bank < banks); @@ -95,7 +96,7 @@ BankedArray::reserve(int64_t idx) busyBanks[bank].idx = idx; busyBanks[bank].startAccess = curTick(); busyBanks[bank].endAccess = curTick() + - (accessLatency-1) * m_ruby_system->clockPeriod(); + (accessLatency-1) * clockPeriod; } unsigned int diff --git a/src/mem/ruby/structures/BankedArray.hh b/src/mem/ruby/structures/BankedArray.hh index c757759296..ecc984a617 100644 --- a/src/mem/ruby/structures/BankedArray.hh +++ b/src/mem/ruby/structures/BankedArray.hh @@ -48,6 +48,7 @@ class BankedArray private: unsigned int banks; Cycles accessLatency; + Tick clockPeriod = 0; unsigned int bankBits; unsigned int startIndexBit; RubySystem *m_ruby_system; @@ -69,7 +70,7 @@ class BankedArray public: BankedArray(unsigned int banks, Cycles accessLatency, - unsigned int startIndexBit, RubySystem *rs); + unsigned int startIndexBit); // Note: We try the access based on the cache index, not the address // This is so we don't get aliasing on blocks being replaced @@ -78,6 +79,8 @@ class BankedArray void reserve(int64_t idx); Cycles getLatency() const { return accessLatency; } + + void setClockPeriod(Tick _clockPeriod) { clockPeriod = _clockPeriod; } }; } // namespace ruby diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc index 90d67fb29b..6bc35bac7d 100644 --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -69,12 +69,9 @@ operator<<(std::ostream& out, const CacheMemory& obj) CacheMemory::CacheMemory(const Params &p) : SimObject(p), - dataArray(p.dataArrayBanks, p.dataAccessLatency, - p.start_index_bit, p.ruby_system), - tagArray(p.tagArrayBanks, p.tagAccessLatency, - p.start_index_bit, p.ruby_system), - atomicALUArray(p.atomicALUs, p.atomicLatency * - p.ruby_system->clockPeriod()), + dataArray(p.dataArrayBanks, p.dataAccessLatency, p.start_index_bit), + tagArray(p.tagArrayBanks, p.tagAccessLatency, p.start_index_bit), + atomicALUArray(p.atomicALUs, p.atomicLatency), cacheMemoryStats(this) { m_cache_size = p.size; @@ -88,12 +85,25 @@ CacheMemory::CacheMemory(const Params &p) m_replacementPolicy_ptr) ? true : false; } +void +CacheMemory::setRubySystem(RubySystem* rs) +{ + dataArray.setClockPeriod(rs->clockPeriod()); + tagArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setClockPeriod(rs->clockPeriod()); + atomicALUArray.setBlockSize(rs->getBlockSizeBytes()); + + if (m_block_size == 0) { + m_block_size = rs->getBlockSizeBytes(); + } + + m_ruby_system = rs; +} + void CacheMemory::init() { - if (m_block_size == 0) { - m_block_size = RubySystem::getBlockSizeBytes(); - } + assert(m_block_size != 0); m_cache_num_sets = (m_cache_size / m_cache_assoc) / m_block_size; assert(m_cache_num_sets > 1); m_cache_num_set_bits = floorLog2(m_cache_num_sets); @@ -286,6 +296,9 @@ CacheMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(cacheAvail(address)); DPRINTF(RubyCache, "allocating address: %#x\n", address); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); + // Find the first open slot int64_t cacheSet = addressToCacheSet(address); std::vector &set = m_cache[cacheSet]; diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh index de7c327f63..912ae22d1f 100644 --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -154,6 +154,8 @@ class CacheMemory : public SimObject void htmAbortTransaction(); void htmCommitTransaction(); + void setRubySystem(RubySystem* rs); + public: int getCacheSize() const { return m_cache_size; } int getCacheAssoc() const { return m_cache_assoc; } @@ -213,6 +215,14 @@ class CacheMemory : public SimObject */ bool m_use_occupancy; + RubySystem *m_ruby_system = nullptr; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, floorLog2(m_block_size)); + } + private: struct CacheMemoryStats : public statistics::Group { diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc index 620254b82c..7469f72451 100644 --- a/src/mem/ruby/structures/DirectoryMemory.cc +++ b/src/mem/ruby/structures/DirectoryMemory.cc @@ -64,12 +64,14 @@ DirectoryMemory::DirectoryMemory(const Params &p) } m_size_bits = floorLog2(m_size_bytes); m_num_entries = 0; + m_block_size = p.block_size; + m_ruby_system = p.ruby_system; } void DirectoryMemory::init() { - m_num_entries = m_size_bytes / RubySystem::getBlockSizeBytes(); + m_num_entries = m_size_bytes / m_block_size; m_entries = new AbstractCacheEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) m_entries[i] = NULL; @@ -108,7 +110,7 @@ DirectoryMemory::mapAddressToLocalIdx(Addr address) } ret += r.size(); } - return ret >> RubySystem::getBlockSizeBits(); + return ret >> (floorLog2(m_block_size)); } AbstractCacheEntry* @@ -133,6 +135,8 @@ DirectoryMemory::allocate(Addr address, AbstractCacheEntry *entry) assert(idx < m_num_entries); assert(m_entries[idx] == NULL); entry->changePermission(AccessPermission_Read_Only); + entry->initBlockSize(m_block_size); + entry->setRubySystem(m_ruby_system); m_entries[idx] = entry; return entry; diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh index 8a4532864d..6e77e2a4ca 100644 --- a/src/mem/ruby/structures/DirectoryMemory.hh +++ b/src/mem/ruby/structures/DirectoryMemory.hh @@ -104,6 +104,9 @@ class DirectoryMemory : public SimObject uint64_t m_size_bytes; uint64_t m_size_bits; uint64_t m_num_entries; + uint32_t m_block_size; + + RubySystem *m_ruby_system = nullptr; /** * The address range for which the directory responds. Normally diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py index 85f05367cf..202617bceb 100644 --- a/src/mem/ruby/structures/DirectoryMemory.py +++ b/src/mem/ruby/structures/DirectoryMemory.py @@ -49,3 +49,7 @@ class RubyDirectoryMemory(SimObject): addr_ranges = VectorParam.AddrRange( Parent.addr_ranges, "Address range this directory responds to" ) + block_size = Param.UInt32( + "Size of a block in bytes. Usually same as cache line size." + ) + ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh index 664d10f202..0966ca80d2 100644 --- a/src/mem/ruby/structures/PerfectCacheMemory.hh +++ b/src/mem/ruby/structures/PerfectCacheMemory.hh @@ -74,6 +74,8 @@ class PerfectCacheMemory public: PerfectCacheMemory(); + void setBlockSize(const int block_size) { m_block_size = block_size; } + // tests to see if an address is present in the cache bool isTagPresent(Addr address) const; @@ -108,6 +110,8 @@ class PerfectCacheMemory // Data Members (m_prefix) std::unordered_map > m_map; + + int m_block_size = 0; }; template @@ -130,7 +134,7 @@ template inline bool PerfectCacheMemory::isTagPresent(Addr address) const { - return m_map.count(makeLineAddress(address)) > 0; + return m_map.count(makeLineAddress(address, floorLog2(m_block_size))) > 0; } template @@ -149,7 +153,8 @@ PerfectCacheMemory::allocate(Addr address) PerfectCacheLineState line_state; line_state.m_permission = AccessPermission_Invalid; line_state.m_entry = ENTRY(); - m_map[makeLineAddress(address)] = line_state; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + m_map.emplace(line_addr, line_state); } // deallocate entry @@ -157,7 +162,8 @@ template inline void PerfectCacheMemory::deallocate(Addr address) { - [[maybe_unused]] auto num_erased = m_map.erase(makeLineAddress(address)); + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + [[maybe_unused]] auto num_erased = m_map.erase(line_addr); assert(num_erased == 1); } @@ -175,7 +181,8 @@ template inline ENTRY* PerfectCacheMemory::lookup(Addr address) { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } // looks an address up in the cache @@ -183,14 +190,16 @@ template inline const ENTRY* PerfectCacheMemory::lookup(Addr address) const { - return &m_map[makeLineAddress(address)].m_entry; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return &m_map[line_addr].m_entry; } template inline AccessPermission PerfectCacheMemory::getPermission(Addr address) const { - return m_map[makeLineAddress(address)].m_permission; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + return m_map[line_addr].m_permission; } template @@ -198,8 +207,8 @@ inline void PerfectCacheMemory::changePermission(Addr address, AccessPermission new_perm) { - Addr line_address = makeLineAddress(address); - PerfectCacheLineState& line_state = m_map[line_address]; + Addr line_addr = makeLineAddress(address, floorLog2(m_block_size)); + PerfectCacheLineState& line_state = m_map[line_addr]; line_state.m_permission = new_perm; } diff --git a/src/mem/ruby/structures/PersistentTable.hh b/src/mem/ruby/structures/PersistentTable.hh index 5382269273..1162e1dda1 100644 --- a/src/mem/ruby/structures/PersistentTable.hh +++ b/src/mem/ruby/structures/PersistentTable.hh @@ -63,6 +63,12 @@ class PersistentTable // Destructor ~PersistentTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + // Public Methods void persistentRequestLock(Addr address, MachineID locker, AccessType type); @@ -82,9 +88,17 @@ class PersistentTable PersistentTable(const PersistentTable& obj); PersistentTable& operator=(const PersistentTable& obj); + int m_block_size_bits = 0; + // Data Members (m_prefix) typedef std::unordered_map AddressMap; AddressMap m_map; + + Addr + makeLineAddress(Addr addr) const + { + return ruby::makeLineAddress(addr, m_block_size_bits); + } }; inline std::ostream& diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py index 2f457f5c4a..4b1023fc61 100644 --- a/src/mem/ruby/structures/RubyCache.py +++ b/src/mem/ruby/structures/RubyCache.py @@ -54,4 +54,3 @@ class RubyCache(SimObject): dataAccessLatency = Param.Cycles(1, "cycles for a data array access") tagAccessLatency = Param.Cycles(1, "cycles for a tag array access") resourceStalls = Param.Bool(False, "stall if there is a resource failure") - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/structures/RubyPrefetcher.cc b/src/mem/ruby/structures/RubyPrefetcher.cc index e45eff2c2f..bffcfe2327 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.cc +++ b/src/mem/ruby/structures/RubyPrefetcher.cc @@ -56,13 +56,15 @@ namespace ruby RubyPrefetcher::RubyPrefetcher(const Params &p) : SimObject(p), m_num_streams(p.num_streams), - m_array(p.num_streams), m_train_misses(p.train_misses), + m_array(p.num_streams, p.block_size), m_train_misses(p.train_misses), m_num_startup_pfs(p.num_startup_pfs), unitFilter(p.unit_filter), negativeFilter(p.unit_filter), nonUnitFilter(p.nonunit_filter), m_prefetch_cross_pages(p.cross_page), pageShift(p.page_shift), + m_block_size_bits(floorLog2(p.block_size)), + m_block_size_bytes(p.block_size), rubyPrefetcherStats(this) { assert(m_num_streams > 0); @@ -90,7 +92,7 @@ void RubyPrefetcher::observeMiss(Addr address, const RubyRequestType& type) { DPRINTF(RubyPrefetcher, "Observed miss for %#x\n", address); - Addr line_addr = makeLineAddress(address); + Addr line_addr = makeLineAddress(address, m_block_size_bits); rubyPrefetcherStats.numMissObserved++; // check to see if we have already issued a prefetch for this block @@ -214,7 +216,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // initialize the stream prefetcher PrefetchEntry *mystream = &(m_array[index]); - mystream->m_address = makeLineAddress(address); + mystream->m_address = makeLineAddress(address, m_block_size_bits); mystream->m_stride = stride; mystream->m_use_time = m_controller->curCycle(); mystream->m_is_valid = true; @@ -222,7 +224,7 @@ RubyPrefetcher::initializeStream(Addr address, int stride, // create a number of initial prefetches for this stream Addr page_addr = pageAddress(mystream->m_address); - Addr line_addr = makeLineAddress(mystream->m_address); + Addr line_addr = makeLineAddress(mystream->m_address, m_block_size_bits); // insert a number of prefetches into the prefetch table for (int k = 0; k < m_num_startup_pfs; k++) { @@ -312,8 +314,7 @@ RubyPrefetcher::accessNonunitFilter(Addr line_addr, // This stride HAS to be the multiplicative constant of // dataBlockBytes (bc makeNextStrideAddress is // calculated based on this multiplicative constant!) - const int stride = entry.stride / - RubySystem::getBlockSizeBytes(); + const int stride = entry.stride / m_block_size_bytes; // clear this filter entry entry.clear(); diff --git a/src/mem/ruby/structures/RubyPrefetcher.hh b/src/mem/ruby/structures/RubyPrefetcher.hh index 51e1b3c480..5627410713 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.hh +++ b/src/mem/ruby/structures/RubyPrefetcher.hh @@ -68,10 +68,10 @@ class PrefetchEntry { public: /// constructor - PrefetchEntry() + PrefetchEntry(int block_size) { // default: 1 cache-line stride - m_stride = (1 << RubySystem::getBlockSizeBits()); + m_stride = (1 << floorLog2(block_size)); m_use_time = Cycles(0); m_is_valid = false; } @@ -239,6 +239,16 @@ class RubyPrefetcher : public SimObject const unsigned pageShift; + int m_block_size_bits = 0; + int m_block_size_bytes = 0; + + Addr + makeNextStrideAddress(Addr addr, int stride) const + { + return ruby::makeNextStrideAddress(addr, stride, + m_block_size_bytes); + } + struct RubyPrefetcherStats : public statistics::Group { RubyPrefetcherStats(statistics::Group *parent); diff --git a/src/mem/ruby/structures/RubyPrefetcher.py b/src/mem/ruby/structures/RubyPrefetcher.py index d4189ae7d5..155b7c314d 100644 --- a/src/mem/ruby/structures/RubyPrefetcher.py +++ b/src/mem/ruby/structures/RubyPrefetcher.py @@ -62,6 +62,9 @@ class RubyPrefetcher(SimObject): page_shift = Param.UInt32( 12, "Number of bits to mask to get a page number" ) + block_size = Param.UInt32( + "Size of block to prefetch, usually cache line size" + ) class Prefetcher(RubyPrefetcher): diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.cc b/src/mem/ruby/structures/RubyPrefetcherProxy.cc index 2a29fbc88e..a6fed8258c 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.cc +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.cc @@ -66,7 +66,7 @@ RubyPrefetcherProxy::RubyPrefetcherProxy(AbstractController* _parent, prefetcher->setParentInfo( cacheCntrl->params().system, cacheCntrl->getProbeManager(), - RubySystem::getBlockSizeBytes()); + cacheCntrl->m_ruby_system->getBlockSizeBytes()); } } @@ -112,7 +112,7 @@ RubyPrefetcherProxy::issuePrefetch() if (pkt) { DPRINTF(HWPrefetch, "Next prefetch ready %s\n", pkt->print()); - unsigned blk_size = RubySystem::getBlockSizeBytes(); + unsigned blk_size = cacheCntrl->m_ruby_system->getBlockSizeBytes(); Addr line_addr = pkt->getBlockAddr(blk_size); if (issuedPfPkts.count(line_addr) == 0) { @@ -126,6 +126,8 @@ RubyPrefetcherProxy::issuePrefetch() std::shared_ptr msg = std::make_shared(cacheCntrl->clockEdge(), + blk_size, + cacheCntrl->m_ruby_system, pkt->getAddr(), blk_size, 0, // pc @@ -136,7 +138,10 @@ RubyPrefetcherProxy::issuePrefetch() // enqueue request into prefetch queue to the cache pfQueue->enqueue(msg, cacheCntrl->clockEdge(), - cacheCntrl->cyclesToTicks(Cycles(1))); + cacheCntrl->cyclesToTicks(Cycles(1)), + cacheCntrl->m_ruby_system->getRandomization(), + cacheCntrl->m_ruby_system->getWarmupEnabled() + ); // track all pending PF requests issuedPfPkts[line_addr] = pkt; @@ -230,5 +235,19 @@ RubyPrefetcherProxy::regProbePoints() cacheCntrl->getProbeManager(), "Data Update"); } +Addr +RubyPrefetcherProxy::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPrefetcherProxy::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, + cacheCntrl->m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/structures/RubyPrefetcherProxy.hh b/src/mem/ruby/structures/RubyPrefetcherProxy.hh index 34c40154b6..e7c044edf8 100644 --- a/src/mem/ruby/structures/RubyPrefetcherProxy.hh +++ b/src/mem/ruby/structures/RubyPrefetcherProxy.hh @@ -142,6 +142,9 @@ class RubyPrefetcherProxy : public CacheAccessor, public Named */ ProbePointArg *ppDataUpdate; + Addr makeLineAddress(Addr addr) const; + Addr getOffset(Addr addr) const; + public: /** Accessor functions */ diff --git a/src/mem/ruby/structures/TBETable.hh b/src/mem/ruby/structures/TBETable.hh index 9030d52d9f..72770ce42f 100644 --- a/src/mem/ruby/structures/TBETable.hh +++ b/src/mem/ruby/structures/TBETable.hh @@ -70,6 +70,8 @@ class TBETable return (m_number_of_TBEs - m_map.size()) >= n; } + void setBlockSize(const int block_size) { m_block_size = block_size; } + ENTRY *getNullEntry(); ENTRY *lookup(Addr address); @@ -85,7 +87,8 @@ class TBETable std::unordered_map m_map; private: - int m_number_of_TBEs; + int m_number_of_TBEs = 0; + int m_block_size = 0; }; template @@ -101,7 +104,7 @@ template inline bool TBETable::isPresent(Addr address) const { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, floorLog2(m_block_size))); assert(m_map.size() <= m_number_of_TBEs); return !!m_map.count(address); } @@ -112,7 +115,8 @@ TBETable::allocate(Addr address) { assert(!isPresent(address)); assert(m_map.size() < m_number_of_TBEs); - m_map[address] = ENTRY(); + assert(m_block_size > 0); + m_map.emplace(address, ENTRY(m_block_size)); } template diff --git a/src/mem/ruby/structures/TimerTable.cc b/src/mem/ruby/structures/TimerTable.cc index f8f24dbfc0..a9ce92252e 100644 --- a/src/mem/ruby/structures/TimerTable.cc +++ b/src/mem/ruby/structures/TimerTable.cc @@ -70,7 +70,7 @@ TimerTable::nextAddress() const void TimerTable::set(Addr address, Tick ready_time) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(!m_map.count(address)); m_map[address] = ready_time; @@ -87,7 +87,7 @@ TimerTable::set(Addr address, Tick ready_time) void TimerTable::unset(Addr address) { - assert(address == makeLineAddress(address)); + assert(address == makeLineAddress(address, m_block_size_bits)); assert(m_map.count(address)); m_map.erase(address); diff --git a/src/mem/ruby/structures/TimerTable.hh b/src/mem/ruby/structures/TimerTable.hh index e676359fd4..92c485ab57 100644 --- a/src/mem/ruby/structures/TimerTable.hh +++ b/src/mem/ruby/structures/TimerTable.hh @@ -48,6 +48,12 @@ class TimerTable public: TimerTable(); + void + setBlockSize(int block_size) + { + m_block_size_bits = floorLog2(block_size); + } + void setConsumer(Consumer* consumer_ptr) { @@ -88,6 +94,8 @@ class TimerTable //! Consumer to signal a wakeup() Consumer* m_consumer_ptr; + int m_block_size_bits = 0; + std::string m_name; }; diff --git a/src/mem/ruby/structures/WireBuffer.cc b/src/mem/ruby/structures/WireBuffer.cc index a839fe7cc7..3ebbe2a305 100644 --- a/src/mem/ruby/structures/WireBuffer.cc +++ b/src/mem/ruby/structures/WireBuffer.cc @@ -36,7 +36,6 @@ #include "base/cprintf.hh" #include "base/stl_helpers.hh" -#include "mem/ruby/system/RubySystem.hh" namespace gem5 { @@ -74,7 +73,8 @@ WireBuffer::~WireBuffer() } void -WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta) +WireBuffer::enqueue(MsgPtr message, Tick current_time, Tick delta, + bool /*ruby_is_random*/, bool /*ruby_warmup*/) { m_msg_counter++; Tick arrival_time = current_time + delta; diff --git a/src/mem/ruby/structures/WireBuffer.hh b/src/mem/ruby/structures/WireBuffer.hh index b26043b09a..75dfc154c8 100644 --- a/src/mem/ruby/structures/WireBuffer.hh +++ b/src/mem/ruby/structures/WireBuffer.hh @@ -78,7 +78,10 @@ class WireBuffer : public SimObject void setDescription(const std::string& name) { m_description = name; }; std::string getDescription() { return m_description; }; - void enqueue(MsgPtr message, Tick current_time, Tick delta); + // ruby_is_random and ruby_warmup are not used, but this method signature + // must match that of MessageBuffer. + void enqueue(MsgPtr message, Tick current_time, Tick delta, + bool ruby_is_random = false, bool ruby_warmup = false); void dequeue(Tick current_time); const Message* peek(); void recycle(Tick current_time, Tick recycle_latency); diff --git a/src/mem/ruby/structures/WireBuffer.py b/src/mem/ruby/structures/WireBuffer.py index ca67e7cb31..8cb2cfe4d6 100644 --- a/src/mem/ruby/structures/WireBuffer.py +++ b/src/mem/ruby/structures/WireBuffer.py @@ -35,5 +35,3 @@ class RubyWireBuffer(SimObject): type = "RubyWireBuffer" cxx_class = "gem5::ruby::WireBuffer" cxx_header = "mem/ruby/structures/WireBuffer.hh" - - ruby_system = Param.RubySystem(Parent.any, "") diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc index 3326856849..426c604cb0 100644 --- a/src/mem/ruby/system/CacheRecorder.cc +++ b/src/mem/ruby/system/CacheRecorder.cc @@ -49,31 +49,25 @@ TraceRecord::print(std::ostream& out) const << m_type << ", Time: " << m_time << "]"; } -CacheRecorder::CacheRecorder() - : m_uncompressed_trace(NULL), - m_uncompressed_trace_size(0), - m_block_size_bytes(RubySystem::getBlockSizeBytes()) -{ -} - CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes) + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes) : m_uncompressed_trace(uncompressed_trace), m_uncompressed_trace_size(uncompressed_trace_size), m_ruby_port_map(ruby_port_map), m_bytes_read(0), m_records_read(0), m_records_flushed(0), - m_block_size_bytes(block_size_bytes) + m_block_size_bytes(trace_block_size_bytes) { if (m_uncompressed_trace != NULL) { - if (m_block_size_bytes < RubySystem::getBlockSizeBytes()) { + if (m_block_size_bytes < system_block_size_bytes) { // Block sizes larger than when the trace was recorded are not // supported, as we cannot reliably turn accesses to smaller blocks // into larger ones. panic("Recorded cache block size (%d) < current block size (%d) !!", - m_block_size_bytes, RubySystem::getBlockSizeBytes()); + m_block_size_bytes, system_block_size_bytes); } } } @@ -125,7 +119,7 @@ CacheRecorder::enqueueNextFetchRequest() DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord); for (int rec_bytes_read = 0; rec_bytes_read < m_block_size_bytes; - rec_bytes_read += RubySystem::getBlockSizeBytes()) { + rec_bytes_read += m_block_size_bytes) { RequestPtr req; MemCmd::Command requestType; @@ -133,19 +127,19 @@ CacheRecorder::enqueueNextFetchRequest() requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } else if (traceRecord->m_type == RubyRequestType_IFETCH) { requestType = MemCmd::ReadReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), + m_block_size_bytes, Request::INST_FETCH, Request::funcRequestorId); } else { requestType = MemCmd::WriteReq; req = std::make_shared( traceRecord->m_data_address + rec_bytes_read, - RubySystem::getBlockSizeBytes(), 0, + m_block_size_bytes, 0, Request::funcRequestorId); } diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh index 021da6a4da..982e8b0592 100644 --- a/src/mem/ruby/system/CacheRecorder.hh +++ b/src/mem/ruby/system/CacheRecorder.hh @@ -73,13 +73,15 @@ class TraceRecord class CacheRecorder { public: - CacheRecorder(); - ~CacheRecorder(); - + // Construction requires block size. + CacheRecorder() = delete; CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& ruby_port_map, - uint64_t block_size_bytes); + uint64_t trace_block_size_bytes, + uint64_t system_block_size_bytes); + ~CacheRecorder(); + void addRecord(int cntrl, Addr data_addr, Addr pc_addr, RubyRequestType type, Tick time, DataBlock& data); diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc index aa3fc66814..cd9d62d12a 100644 --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -73,7 +73,7 @@ void DMASequencer::init() { RubyPort::init(); - m_data_block_mask = mask(RubySystem::getBlockSizeBits()); + m_data_block_mask = mask(m_ruby_system->getBlockSizeBits()); } RequestStatus @@ -110,8 +110,10 @@ DMASequencer::makeRequest(PacketPtr pkt) DPRINTF(RubyDma, "DMA req created: addr %p, len %d\n", line_addr, len); + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = paddr; msg->getLineAddress() = line_addr; @@ -145,8 +147,8 @@ DMASequencer::makeRequest(PacketPtr pkt) int offset = paddr & m_data_block_mask; - msg->getLen() = (offset + len) <= RubySystem::getBlockSizeBytes() ? - len : RubySystem::getBlockSizeBytes() - offset; + msg->getLen() = (offset + len) <= m_ruby_system->getBlockSizeBytes() ? + len : m_ruby_system->getBlockSizeBytes() - offset; if (write && (data != NULL)) { if (active_request.data != NULL) { @@ -157,7 +159,8 @@ DMASequencer::makeRequest(PacketPtr pkt) m_outstanding_count++; assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); return RequestStatus_Issued; @@ -183,8 +186,10 @@ DMASequencer::issueNext(const Addr& address) return; } + int blk_size = m_ruby_system->getBlockSizeBytes(); + std::shared_ptr msg = - std::make_shared(clockEdge()); + std::make_shared(clockEdge(), blk_size, m_ruby_system); msg->getPhysicalAddress() = active_request.start_paddr + active_request.bytes_completed; @@ -196,9 +201,9 @@ DMASequencer::issueNext(const Addr& address) msg->getLen() = (active_request.len - - active_request.bytes_completed < RubySystem::getBlockSizeBytes() ? + active_request.bytes_completed < m_ruby_system->getBlockSizeBytes() ? active_request.len - active_request.bytes_completed : - RubySystem::getBlockSizeBytes()); + m_ruby_system->getBlockSizeBytes()); if (active_request.write) { msg->getDataBlk(). @@ -207,7 +212,8 @@ DMASequencer::issueNext(const Addr& address) } assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1))); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); active_request.bytes_issued += msg->getLen(); DPRINTF(RubyDma, "DMA request bytes issued %d, bytes completed %d, total len %d\n", diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 072c63efd7..4d66dc6c1b 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -142,8 +142,8 @@ UncoalescedTable::updateResources() // are accessed directly using the makeRequest() command // instead of accessing through the port. This makes // sending tokens through the port unnecessary - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!coalescer->getRubySystem()->getWarmupEnabled() && + !coalescer->getRubySystem()->getCooldownEnabled()) { if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) { DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num); @@ -177,7 +177,7 @@ UncoalescedTable::printRequestTable(std::stringstream& ss) ss << "Listing pending packets from " << instMap.size() << " instructions"; for (auto& inst : instMap) { - ss << "\tAddr: " << printAddress(inst.first) << " with " + ss << "\tAddr: " << coalescer->printAddress(inst.first) << " with " << inst.second.size() << " pending packets" << std::endl; } } @@ -590,7 +590,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, // When the Ruby system is cooldown phase, the requests come from // the cache recorder. These requests do not get coalesced and // do not return valid data. - if (RubySystem::getCooldownEnabled()) + if (m_ruby_system->getCooldownEnabled()) continue; if (pkt->getPtr()) { @@ -700,8 +700,8 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // When Ruby is in warmup or cooldown phase, the requests come from // the cache recorder. There is no dynamic instruction associated // with these requests either - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { if (!m_usingRubyTester) { num_packets = 0; for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { @@ -985,8 +985,8 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -1015,9 +1015,9 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 42efe41cb7..08412baad1 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -341,6 +341,8 @@ class GPUCoalescer : public RubyPort void insertKernel(int wavefront_id, PacketPtr pkt); + RubySystem *getRubySystem() { return m_ruby_system; } + GMTokenPort& getGMTokenPort() { return gmTokenPort; } statistics::Histogram& getOutstandReqHist() { return m_outstandReqHist; } diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 2630a6a27c..127f3c7802 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -326,6 +326,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) panic("Ruby supports atomic accesses only in noncaching mode\n"); } + RubySystem *rs = owner.m_ruby_system; + // Check for pio requests and directly send them to the dedicated // pio port. if (pkt->cmd != MemCmd::MemSyncReq) { @@ -343,12 +345,11 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt) return owner.ticksToCycles(req_ticks); } - assert(getOffset(pkt->getAddr()) + pkt->getSize() <= - RubySystem::getBlockSizeBytes()); + assert(owner.getOffset(pkt->getAddr()) + pkt->getSize() <= + rs->getBlockSizeBytes()); } // Find the machine type of memory controller interface - RubySystem *rs = owner.m_ruby_system; static int mem_interface_type = -1; if (mem_interface_type == -1) { if (rs->m_abstract_controls[MachineType_Directory].size() != 0) { @@ -404,7 +405,7 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt) } assert(pkt->getAddr() + pkt->getSize() <= - makeLineAddress(pkt->getAddr()) + RubySystem::getBlockSizeBytes()); + owner.makeLineAddress(pkt->getAddr()) + rs->getBlockSizeBytes()); if (access_backing_store) { // The attached physmem contains the official version of data. @@ -501,7 +502,7 @@ RubyPort::ruby_stale_translation_callback(Addr txnId) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - 0, RubySystem::getBlockSizeBytes(), Request::TLBI_EXT_SYNC, + 0, m_ruby_system->getBlockSizeBytes(), Request::TLBI_EXT_SYNC, Request::funcRequestorId); // Store the txnId in extraData instead of the address request->setExtraData(txnId); @@ -701,7 +702,7 @@ RubyPort::ruby_eviction_callback(Addr address) // assumed they will not be modified or deleted by receivers. // TODO: should this really be using funcRequestorId? auto request = std::make_shared( - address, RubySystem::getBlockSizeBytes(), 0, + address, m_ruby_system->getBlockSizeBytes(), 0, Request::funcRequestorId); // Use a single packet to signal all snooping ports of the invalidation. @@ -739,5 +740,23 @@ RubyPort::functionalWrite(Packet *func_pkt) return num_written; } +Addr +RubyPort::getOffset(Addr addr) const +{ + return ruby::getOffset(addr, m_ruby_system->getBlockSizeBits()); +} + +Addr +RubyPort::makeLineAddress(Addr addr) const +{ + return ruby::makeLineAddress(addr, m_ruby_system->getBlockSizeBits()); +} + +std::string +RubyPort::printAddress(Addr addr) const +{ + return ruby::printAddress(addr, m_ruby_system->getBlockSizeBits()); +} + } // namespace ruby } // namespace gem5 diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 66fe0a7686..39535930b3 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -181,6 +181,11 @@ class RubyPort : public ClockedObject virtual int functionalWrite(Packet *func_pkt); + // Helper methods for commonly used functions called in common/address.hh + Addr getOffset(Addr addr) const; + Addr makeLineAddress(Addr addr) const; + std::string printAddress(Addr addr) const; + protected: void trySendRetries(); void ruby_hit_callback(PacketPtr pkt); diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc index 21062eac14..fd7b262cb1 100644 --- a/src/mem/ruby/system/RubySystem.cc +++ b/src/mem/ruby/system/RubySystem.cc @@ -66,15 +66,8 @@ namespace gem5 namespace ruby { -bool RubySystem::m_randomization; -uint32_t RubySystem::m_block_size_bytes; -uint32_t RubySystem::m_block_size_bits; -uint32_t RubySystem::m_memory_size_bits; -bool RubySystem::m_warmup_enabled = false; // To look forward to allowing multiple RubySystem instances, track the number // of RubySystems that need to be warmed up on checkpoint restore. -unsigned RubySystem::m_systems_to_warmup = 0; -bool RubySystem::m_cooldown_enabled = false; RubySystem::RubySystem(const Params &p) : ClockedObject(p), m_access_backing_store(p.access_backing_store), @@ -212,8 +205,8 @@ RubySystem::makeCacheRecorder(uint8_t *uncompressed_trace, // Create the CacheRecorder and record the cache trace m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size, - ruby_port_map, - block_size_bytes); + ruby_port_map, block_size_bytes, + m_block_size_bytes); } void @@ -331,7 +324,7 @@ RubySystem::serialize(CheckpointOut &cp) const // Store the cache-block size, so we are able to restore on systems // with a different cache-block size. CacheRecorder depends on the // correct cache-block size upon unserializing. - uint64_t block_size_bytes = getBlockSizeBytes(); + uint64_t block_size_bytes = m_block_size_bytes; SERIALIZE_SCALAR(block_size_bytes); // Check that there's a valid trace to use. If not, then memory won't @@ -416,7 +409,6 @@ RubySystem::unserialize(CheckpointIn &cp) readCompressedTrace(cache_trace_file, uncompressed_trace, cache_trace_size); m_warmup_enabled = true; - m_systems_to_warmup++; // Create the cache recorder that will hang around until startup. makeCacheRecorder(uncompressed_trace, cache_trace_size, block_size_bytes); @@ -467,10 +459,7 @@ RubySystem::startup() delete m_cache_recorder; m_cache_recorder = NULL; - m_systems_to_warmup--; - if (m_systems_to_warmup == 0) { - m_warmup_enabled = false; - } + m_warmup_enabled = false; // Restore eventq head eventq->replaceHead(eventq_head); @@ -509,7 +498,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; @@ -625,7 +614,7 @@ bool RubySystem::functionalRead(PacketPtr pkt) { Addr address(pkt->getAddr()); - Addr line_address = makeLineAddress(address); + Addr line_address = makeLineAddress(address, m_block_size_bits); DPRINTF(RubySystem, "Functional Read request for %#x\n", address); @@ -726,7 +715,7 @@ bool RubySystem::functionalWrite(PacketPtr pkt) { Addr addr(pkt->getAddr()); - Addr line_addr = makeLineAddress(addr); + Addr line_addr = makeLineAddress(addr, m_block_size_bits); AccessPermission access_perm = AccessPermission_NotPresent; DPRINTF(RubySystem, "Functional Write request for %#x\n", addr); diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh index e16d699204..7e18770230 100644 --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -68,12 +68,12 @@ class RubySystem : public ClockedObject ~RubySystem(); // config accessors - static int getRandomization() { return m_randomization; } - static uint32_t getBlockSizeBytes() { return m_block_size_bytes; } - static uint32_t getBlockSizeBits() { return m_block_size_bits; } - static uint32_t getMemorySizeBits() { return m_memory_size_bits; } - static bool getWarmupEnabled() { return m_warmup_enabled; } - static bool getCooldownEnabled() { return m_cooldown_enabled; } + int getRandomization() { return m_randomization; } + uint32_t getBlockSizeBytes() { return m_block_size_bytes; } + uint32_t getBlockSizeBits() { return m_block_size_bits; } + uint32_t getMemorySizeBits() { return m_memory_size_bits; } + bool getWarmupEnabled() { return m_warmup_enabled; } + bool getCooldownEnabled() { return m_cooldown_enabled; } memory::SimpleMemory *getPhysMem() { return m_phys_mem; } Cycles getStartCycle() { return m_start_cycle; } @@ -134,14 +134,13 @@ class RubySystem : public ClockedObject void processRubyEvent(); private: // configuration parameters - static bool m_randomization; - static uint32_t m_block_size_bytes; - static uint32_t m_block_size_bits; - static uint32_t m_memory_size_bits; + bool m_randomization; + uint32_t m_block_size_bytes; + uint32_t m_block_size_bits; + uint32_t m_memory_size_bits; - static bool m_warmup_enabled; - static unsigned m_systems_to_warmup; - static bool m_cooldown_enabled; + bool m_warmup_enabled = false; + bool m_cooldown_enabled = false; memory::SimpleMemory *m_phys_mem; const bool m_access_backing_store; @@ -158,6 +157,11 @@ class RubySystem : public ClockedObject Profiler* m_profiler; CacheRecorder* m_cache_recorder; std::vector > m_abstract_controls; + std::map m_num_controllers; + + // These are auto-generated by SLICC based on the built protocol. + int MachineType_base_count(const MachineType& obj); + int MachineType_base_number(const MachineType& obj); }; } // namespace ruby diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 4b0c6a239c..e2f49f5dff 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -73,6 +73,8 @@ Sequencer::Sequencer(const Params &p) { m_outstanding_count = 0; + m_ruby_system = p.ruby_system; + m_dataCache_ptr = p.dcache; m_max_outstanding_requests = p.max_outstanding_requests; m_deadlock_threshold = p.deadlock_threshold; @@ -726,7 +728,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, printAddress(request_address)); // update the data unless it is a non-data-carrying flush - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { data.setData(pkt); } else if (!pkt->isFlush()) { if ((type == RubyRequestType_LD) || @@ -782,11 +784,11 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { assert(pkt->req); delete pkt; rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { delete pkt; rs->m_cache_recorder->enqueueNextFlushRequest(); } else { @@ -852,8 +854,8 @@ Sequencer::completeHitCallback(std::vector & mylist) // When Ruby is in warmup or cooldown phase, the requests come // from the cache recorder. They do not track which port to use // and do not need to send the response back - if (!RubySystem::getWarmupEnabled() - && !RubySystem::getCooldownEnabled()) { + if (!m_ruby_system->getWarmupEnabled() + && !m_ruby_system->getCooldownEnabled()) { RubyPort::SenderState *ss = safe_cast(pkt->senderState); MemResponsePort *port = ss->port; @@ -873,9 +875,9 @@ Sequencer::completeHitCallback(std::vector & mylist) } RubySystem *rs = m_ruby_system; - if (RubySystem::getWarmupEnabled()) { + if (m_ruby_system->getWarmupEnabled()) { rs->m_cache_recorder->enqueueNextFetchRequest(); - } else if (RubySystem::getCooldownEnabled()) { + } else if (m_ruby_system->getCooldownEnabled()) { rs->m_cache_recorder->enqueueNextFlushRequest(); } else { testDrainComplete(); @@ -910,14 +912,16 @@ Sequencer::invL1() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(RubySequencer, @@ -1080,11 +1084,14 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) pc = pkt->req->getPC(); } + int blk_size = m_ruby_system->getBlockSizeBytes(); + // check if the packet has data as for example prefetch and flush // requests do not std::shared_ptr msg; if (pkt->req->isMemMgmt()) { - msg = std::make_shared(clockEdge(), + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, pc, secondary_type, RubyAccessMode_Supervisor, pkt, proc_id, core_id); @@ -1111,8 +1118,10 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) msg->m_tlbiTransactionUid); } } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, secondary_type, + msg = std::make_shared(clockEdge(), blk_size, + m_ruby_system, + pkt->getAddr(), pkt->getSize(), + pc, secondary_type, RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, core_id); @@ -1147,7 +1156,9 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) assert(latency > 0); assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } template @@ -1194,7 +1205,7 @@ Sequencer::incrementUnaddressedTransactionCnt() // Limit m_unaddressedTransactionCnt to 32 bits, // top 32 bits should always be zeroed out uint64_t aligned_txid = \ - m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits(); + m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits(); if (aligned_txid > 0xFFFFFFFFull) { m_unaddressedTransactionCnt = 0; @@ -1206,7 +1217,7 @@ Sequencer::getCurrentUnaddressedTransactionID() const { return ( uint64_t(m_version & 0xFFFFFFFF) << 32) | - (m_unaddressedTransactionCnt << RubySystem::getBlockSizeBits() + (m_unaddressedTransactionCnt << m_ruby_system->getBlockSizeBits() ); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 1f60d2638f..ee16d2fe2e 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -254,6 +254,8 @@ class Sequencer : public RubyPort RubyRequestType primary_type, RubyRequestType secondary_type); + RubySystem *m_ruby_system; + private: int m_max_outstanding_requests; diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 3f570fb952..0994bb4afe 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -83,7 +83,7 @@ class RubyPort(ClockedObject): using_ruby_tester = Param.Bool(False, "") no_retry_on_stall = Param.Bool(False, "") - ruby_system = Param.RubySystem(Parent.any, "") + ruby_system = Param.RubySystem("Parent RubySystem object") system = Param.System(Parent.any, "system object") support_data_reqs = Param.Bool(True, "data cache requests supported") support_inst_reqs = Param.Bool(True, "inst cache requests supported") diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc index 47ceced3a7..67dd88fb2e 100644 --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -135,9 +135,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) // Creating WriteMask that records written bytes // and atomic operations. This enables partial writes // and partial reads of those writes - DataBlock dataBlock; + uint32_t blockSize = m_ruby_system->getBlockSizeBytes(); + DataBlock dataBlock(blockSize); dataBlock.clear(); - uint32_t blockSize = RubySystem::getBlockSizeBytes(); std::vector accessMask(blockSize,false); std::vector< std::pair > atomicOps; uint32_t tableSize = crequest->getPackets().size(); @@ -159,15 +159,17 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) } std::shared_ptr msg; if (pkt->isAtomicOp()) { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, dataBlock, atomicOps, crequest->getSeqNum()); } else { - msg = std::make_shared(clockEdge(), pkt->getAddr(), - pkt->getSize(), pc, crequest->getRubyType(), + msg = std::make_shared(clockEdge(), blockSize, + m_ruby_system, pkt->getAddr(), pkt->getSize(), + pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, @@ -195,7 +197,9 @@ VIPERCoalescer::issueRequest(CoalescedRequest* crequest) assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(crequest->getRubyType())); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); } void @@ -241,7 +245,7 @@ VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum) std::remove_if( m_writeCompletePktMap[key].begin(), m_writeCompletePktMap[key].end(), - [addr](PacketPtr writeCompletePkt) -> bool { + [this,addr](PacketPtr writeCompletePkt) -> bool { if (makeLineAddress(writeCompletePkt->getAddr()) == addr) { RubyPort::SenderState *ss = safe_cast @@ -296,14 +300,15 @@ VIPERCoalescer::invTCP() // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_num_pending_invs++; } DPRINTF(GPUCoalescer, @@ -343,16 +348,17 @@ VIPERCoalescer::invTCC(PacketPtr pkt) RubyRequestType request_type = RubyRequestType_InvL2; std::shared_ptr msg = std::make_shared( - clockEdge(), addr, 0, 0, - request_type, RubyAccessMode_Supervisor, - nullptr); + clockEdge(), m_ruby_system->getBlockSizeBytes(), m_ruby_system, + addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Sending L2 invalidate to 0x%x\n", addr); assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency, + m_ruby_system->getRandomization(), + m_ruby_system->getWarmupEnabled()); m_pending_invl2s[addr].push_back(pkt); } diff --git a/src/mem/ruby/system/VIPERSequencer.cc b/src/mem/ruby/system/VIPERSequencer.cc index ac840777d4..b8b806aa9c 100644 --- a/src/mem/ruby/system/VIPERSequencer.cc +++ b/src/mem/ruby/system/VIPERSequencer.cc @@ -81,8 +81,8 @@ VIPERSequencer::hitCallback(SequencerRequest* srequest, DataBlock& data, // subBlock with the recieved data. The tester will later access // this state. assert(!m_usingRubyTester); - assert(!RubySystem::getWarmupEnabled()); - assert(!RubySystem::getCooldownEnabled()); + assert(!m_ruby_system->getWarmupEnabled()); + assert(!m_ruby_system->getCooldownEnabled()); ruby_hit_callback(pkt); testDrainComplete(); } diff --git a/src/mem/slicc/ast/CheckProbeStatementAST.py b/src/mem/slicc/ast/CheckProbeStatementAST.py index 10945cfc30..14f6f7e4fa 100644 --- a/src/mem/slicc/ast/CheckProbeStatementAST.py +++ b/src/mem/slicc/ast/CheckProbeStatementAST.py @@ -49,7 +49,8 @@ class CheckProbeStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count($address_code) == 1) && (m_block_map[$address_code] == &$in_port_code)) { - $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $in_port_code.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py index 14b2e48cd3..4bb446aee2 100644 --- a/src/mem/slicc/ast/DeferEnqueueingStatementAST.py +++ b/src/mem/slicc/ast/DeferEnqueueingStatementAST.py @@ -68,7 +68,8 @@ class DeferEnqueueingStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge()," + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements diff --git a/src/mem/slicc/ast/EnqueueStatementAST.py b/src/mem/slicc/ast/EnqueueStatementAST.py index c2d47af9ce..b026f6e7a9 100644 --- a/src/mem/slicc/ast/EnqueueStatementAST.py +++ b/src/mem/slicc/ast/EnqueueStatementAST.py @@ -76,7 +76,8 @@ class EnqueueStatementAST(StatementAST): # Declare message code( "std::shared_ptr<${{msg_type.c_ident}}> out_msg = " - "std::make_shared<${{msg_type.c_ident}}>(clockEdge());" + "std::make_shared<${{msg_type.c_ident}}>(clockEdge(), " + " m_ruby_system->getBlockSizeBytes(), m_ruby_system);" ) # The other statements @@ -89,17 +90,21 @@ class EnqueueStatementAST(StatementAST): bypass_strict_fifo_code = self.bypass_strict_fifo.inline(False) code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), $bypass_strict_fifo_code);" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled(), " + "$bypass_strict_fifo_code);" ) else: code( "(${{self.queue_name.var.code}}).enqueue(" - "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)));" + "out_msg, clockEdge(), cyclesToTicks(Cycles($rcode)), " + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) else: code( "(${{self.queue_name.var.code}}).enqueue(out_msg, " - "clockEdge(), cyclesToTicks(Cycles(1)));" + "clockEdge(), cyclesToTicks(Cycles(1))," + "m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled());" ) # End scope diff --git a/src/mem/slicc/ast/LocalVariableAST.py b/src/mem/slicc/ast/LocalVariableAST.py index b4ac8f446b..43ab110a67 100644 --- a/src/mem/slicc/ast/LocalVariableAST.py +++ b/src/mem/slicc/ast/LocalVariableAST.py @@ -73,6 +73,8 @@ class LocalVariableAST(StatementAST): ) ): code += f"{type.c_ident}* {ident}" + elif "implicit_ctor" in type: + code += f"{type.c_ident} {ident}({type['implicit_ctor']})" else: code += f"{type.c_ident} {ident}" return type diff --git a/src/mem/slicc/ast/PeekStatementAST.py b/src/mem/slicc/ast/PeekStatementAST.py index 00edff4e7b..415f4ec465 100644 --- a/src/mem/slicc/ast/PeekStatementAST.py +++ b/src/mem/slicc/ast/PeekStatementAST.py @@ -93,7 +93,8 @@ class PeekStatementAST(StatementAST): if (m_is_blocking && (m_block_map.count(in_msg_ptr->m_$address_field) == 1) && (m_block_map[in_msg_ptr->m_$address_field] != &$qcode)) { - $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1))); + $qcode.delayHead(clockEdge(), cyclesToTicks(Cycles(1)), + m_ruby_system->getRandomization(), m_ruby_system->getWarmupEnabled()); continue; } """ diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index b523522501..6202d2d239 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -352,7 +352,6 @@ class $c_ident : public AbstractController public: typedef ${c_ident}Params Params; $c_ident(const Params &p); - static int getNumControllers(); void init(); MessageBuffer *getMandatoryQueue() const; @@ -449,9 +448,8 @@ int m_counters[${ident}_State_NUM][${ident}_Event_NUM]; int m_event_counters[${ident}_Event_NUM]; bool m_possible[${ident}_State_NUM][${ident}_Event_NUM]; -static std::vector eventVec; -static std::vector > transVec; -static int m_num_controllers; +std::vector eventVec; +std::vector > transVec; // Internal functions """ @@ -625,10 +623,6 @@ namespace gem5 namespace ruby { -int $c_ident::m_num_controllers = 0; -std::vector $c_ident::eventVec; -std::vector > $c_ident::transVec; - // for adding information to the protocol debug trace std::stringstream ${ident}_transitionComment; @@ -644,8 +638,9 @@ $c_ident::$c_ident(const Params &p) { m_machineID.type = MachineType_${ident}; m_machineID.num = m_version; - m_num_controllers++; + p.ruby_system->m_num_controllers[MachineType_${ident}]++; p.ruby_system->registerAbstractController(this); + m_ruby_system = p.ruby_system; m_in_ports = $num_in_ports; """ @@ -699,7 +694,7 @@ void $c_ident::initNetQueues() { MachineType machine_type = string_to_MachineType("${{self.ident}}"); - [[maybe_unused]] int base = MachineType_base_number(machine_type); + [[maybe_unused]] int base = m_ruby_system->MachineType_base_number(machine_type); """ ) @@ -776,6 +771,17 @@ $c_ident::init() comment = f"Type {vtype.ident} default" code('*$vid = ${{vtype["default"]}}; // $comment') + # For objects that require knowing the cache line size, + # set the value here. + if vtype.c_ident in ("TBETable"): + block_size_func = "m_ruby_system->getBlockSizeBytes()" + code(f"(*{vid}).setBlockSize({block_size_func});") + + for param in self.config_parameters: + if param.type_ast.type.ident == "CacheMemory": + assert param.pointer + code(f"m_{param.ident}_ptr->setRubySystem(m_ruby_system);") + # Set the prefetchers code() for prefetcher in self.prefetchers: @@ -942,7 +948,9 @@ $c_ident::regStats() "${c_ident}." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector(profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); @@ -961,7 +969,9 @@ $c_ident::regStats() "." + ${ident}_Event_to_string(event); statistics::Vector *t = new statistics::Vector( profilerStatsPtr, stat_name.c_str()); - t->init(m_num_controllers); + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + t->init(num_controllers); t->flags(statistics::pdf | statistics::total | statistics::oneline | statistics::nozero); transVec[state].push_back(t); @@ -1062,9 +1072,12 @@ $c_ident::regStats() void $c_ident::collateStats() { + int num_controllers = + m_ruby_system->m_num_controllers[MachineType_${ident}]; + for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1080,7 +1093,7 @@ $c_ident::collateStats() for (${ident}_Event event = ${ident}_Event_FIRST; event < ${ident}_Event_NUM; ++event) { - for (unsigned int i = 0; i < m_num_controllers; ++i) { + for (unsigned int i = 0; i < num_controllers; ++i) { RubySystem *rs = params().ruby_system; std::map::iterator it = rs->m_abstract_controls[MachineType_${ident}].find(i); @@ -1125,12 +1138,6 @@ $c_ident::getTransitionCount(${ident}_State state, return m_counters[state][event]; } -int -$c_ident::getNumControllers() -{ - return m_num_controllers; -} - MessageBuffer* $c_ident::getMandatoryQueue() const { @@ -1181,6 +1188,7 @@ void $c_ident::set_cache_entry(${{self.EntryType.c_ident}}*& m_cache_entry_ptr, AbstractCacheEntry* m_new_cache_entry) { m_cache_entry_ptr = (${{self.EntryType.c_ident}}*)m_new_cache_entry; + m_cache_entry_ptr->setRubySystem(m_ruby_system); } void @@ -1200,6 +1208,7 @@ void $c_ident::set_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr, ${{self.TBEType.c_ident}}* m_new_tbe) { m_tbe_ptr = m_new_tbe; + m_tbe_ptr->setRubySystem(m_ruby_system); } void diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py index 535a4165b3..53c8ff877e 100644 --- a/src/mem/slicc/symbols/Type.py +++ b/src/mem/slicc/symbols/Type.py @@ -119,6 +119,10 @@ class Type(Symbol): def isMessage(self): return "message" in self + @property + def isTBE(self): + return "tbe" in self + @property def isBuffer(self): return "buffer" in self @@ -250,18 +254,54 @@ namespace gem5 namespace ruby { +class RubySystem; + $klass ${{self.c_ident}}$parent { public: - ${{self.c_ident}} """, klass="class", ) if self.isMessage: - code("(Tick curTime) : %s(curTime) {" % self["interface"]) + code( + "${{self.c_ident}}(Tick curTime, int blockSize, RubySystem* rs) : %s(curTime, blockSize, rs)" + % self["interface"] + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") + + code("{") + elif self.isTBE: + code("${{self.c_ident}}(int block_size)") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(block_size)") + ctor_count += 1 + + code("{") else: - code("()\n\t\t{") + code("${{self.c_ident}}()") + + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(0)") + ctor_count += 1 + + code("{") code.indent() if not self.isGlobal: @@ -280,6 +320,12 @@ $klass ${{self.c_ident}}$parent code(" // default value of $tid") else: code("// m_$ident has no default") + + # These parts of Messages need RubySystem pointers. For things + # like Entry which only store NetDest, RubySystem is not needed. + if self.isMessage and dm.real_c_type == "NetDest": + code("// m_$ident requires RubySystem") + code("m_$ident.setRubySystem(rs);") code.dedent() code("}") @@ -300,21 +346,45 @@ $klass ${{self.c_ident}}$parent params = ", ".join(params) if self.isMessage: - params = "const Tick curTime, " + params + params = ( + "const Tick curTime, const int blockSize, const RubySystem *rs, " + + params + ) code("${{self.c_ident}}($params)") # Call superclass constructor if "interface" in self: if self.isMessage: - code(' : ${{self["interface"]}}(curTime)') + code( + ' : ${{self["interface"]}}(curTime, blockSize, rs)' + ) + + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(blockSize)") else: code(' : ${{self["interface"]}}()') + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + code(f"\t\t, m_{dm.ident}(local_{dm.ident})") + else: + ctor_count = 0 + for dm in self.data_members.values(): + if dm.real_c_type in ("DataBlock", "WriteMask"): + if ctor_count == 0: + code("\t:") + else: + code("\t, ") + code(f"\t\tm_{dm.ident}(local_{dm.ident})") + ctor_count += 1 + code("{") code.indent() for dm in self.data_members.values(): - code("m_${{dm.ident}} = local_${{dm.ident}};") + if not dm.real_c_type in ("DataBlock", "WriteMask"): + code("m_${{dm.ident}} = local_${{dm.ident}};") code.dedent() code("}") @@ -342,6 +412,35 @@ clone() const ) if not self.isGlobal: + # Block size setter for fields that require block size + # Intentionally do not begin function name with "set" in case + # the user has a field named BlockSize which would conflict + # with the method generated below. + code("\nvoid initBlockSize(int block_size)") + code("{") + code("\tblock_size_bits = floorLog2(block_size);") + + needs_block_size = ( + "DataBlock", + "WriteMask", + "PersistentTable", + "TimerTable", + "PerfectCacheMemory", + ) + + for dm in self.data_members.values(): + if dm.real_c_type in needs_block_size: + code(f"\tm_{dm.ident}.setBlockSize(block_size);") + code("}\n") + + code("\nvoid setRubySystem(RubySystem *ruby_system)") + code("{") + for dm in self.data_members.values(): + if dm.real_c_type in ("NetDest"): + code(f"// m_{dm.ident} requires RubySystem") + code(f"\tm_{dm.ident}.setRubySystem(ruby_system);") + code("}\n") + # const Get methods for each field code("// Const accessors methods for each field") for dm in self.data_members.values(): @@ -393,6 +492,9 @@ set${{dm.ident}}(const ${{dm.real_c_type}}& local_${{dm.ident}}) code(" //private:") code.indent() + # block_size_bits for print methods + code("int block_size_bits = 0;") + # Data members for each field for dm in self.data_members.values(): if "abstract" not in dm: @@ -473,7 +575,7 @@ ${{self.c_ident}}::print(std::ostream& out) const if dm.type.c_ident == "Addr": code( """ -out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}) << " ";""" +out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}, block_size_bits) << " ";""" ) else: code('out << "${{dm.ident}} = " << m_${{dm.ident}} << " ";' "") @@ -846,7 +948,7 @@ ${{self.c_ident}}_from_base_level(int type) * \\return the base number of components for each machine */ int -${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) { int base = 0; switch(obj) { @@ -860,7 +962,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) # Check if there is a defined machine with this type if enum.primary: code( - " base += ${{enum.ident}}_Controller::getNumControllers();" + "\tbase += m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code(" base += 0;") @@ -882,7 +984,7 @@ ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) * \\return the total number of components for each machine */ int -${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) +RubySystem::${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) { switch(obj) { """ @@ -893,7 +995,7 @@ ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) code("case ${{self.c_ident}}_${{enum.ident}}:") if enum.primary: code( - "return ${{enum.ident}}_Controller::getNumControllers();" + "return m_num_controllers[${{self.c_ident}}_${{enum.ident}}];" ) else: code("return 0;") diff --git a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py index 29df2a969c..a469fead61 100644 --- a/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/chi/private_l1_cache_hierarchy.py @@ -137,7 +137,9 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_core_cluster( @@ -167,12 +169,16 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): ) cluster.icache.sequencer = RubySequencer( - version=core_num, dcache=NULL, clk_domain=cluster.icache.clk_domain + version=core_num, + dcache=NULL, + clk_domain=cluster.icache.clk_domain, + ruby_system=self.ruby_system, ) cluster.dcache.sequencer = RubySequencer( version=core_num, dcache=cluster.dcache.cache, clk_domain=cluster.dcache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -223,7 +229,11 @@ class PrivateL1CacheHierarchy(AbstractRubyCacheHierarchy): board.get_clock_domain(), ) version = len(board.get_processor().get_cores()) + i - ctrl.sequencer = RubySequencer(version=version, in_ports=port) + ctrl.sequencer = RubySequencer( + version=version, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.sequencer.dcache = NULL ctrl.ruby_system = self.ruby_system diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py index 6d203f978a..ef90ac79f6 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py @@ -80,7 +80,7 @@ class L1Cache(L0Cache_Controller): replacement_policy=LRURP(), ) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 32 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py index ff2b8e3dd9..7c473f8be9 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py @@ -75,7 +75,7 @@ class L2Cache(L1Cache_Controller): self.l2_select_num_bits = int(math.log(num_l3Caches, 2)) self.cluster_id = cluster_id self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.transitions_per_cycle = 32 # l1_request_latency, l1_response_latency, to_l2_latency are # ruby backend terminology. diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py index 4840e3b264..d0c54840fc 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/directory.py @@ -37,7 +37,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py index 7787644c9b..13625beea7 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py @@ -73,7 +73,7 @@ class L1Cache(AbstractL1Cache): ) self.l2_select_num_bits = int(math.log(num_l2Caches, 2)) self.clk_domain = clk_domain - self.prefetcher = RubyPrefetcher() + self.prefetcher = RubyPrefetcher(block_size=cache_line_size) self.send_evictions = core.requires_send_evicts() self.transitions_per_cycle = 4 self.enable_prefetch = False diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py index 3d1ae54104..79e40e9e01 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mi_example/directory.py @@ -41,7 +41,7 @@ class Directory(AbstractDirectory): def __init__(self, network, cache_line_size, mem_range, port): super().__init__(network, cache_line_size) self.addr_ranges = [mem_range] - self.directory = RubyDirectoryMemory() + self.directory = RubyDirectoryMemory(block_size=cache_line_size) # Connect this directory to the memory side. self.memory_out_port = port diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py index 9aa0dc4a36..212c06c4c3 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/core_complex.py @@ -143,6 +143,7 @@ class CoreComplex(SubSystem, RubyNetworkComponent): version=core_id, dcache=cluster.l1_cache.Dcache, clk_domain=cluster.l1_cache.clk_domain, + ruby_system=self._ruby_system, ) if self._board.has_io_bus(): diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py index f7d4d63de1..83137ce15a 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py +++ b/src/python/gem5/components/cachehierarchies/ruby/caches/prebuilt/octopi_cache/octopi.py @@ -151,7 +151,9 @@ class OctopiCache( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) def _create_directory_controllers(self, board): @@ -228,7 +230,11 @@ class OctopiCache( if board.has_dma_ports(): self.ruby_system.dma_controllers = [ DMAController( - dma_sequencer=DMASequencer(version=i + 1, in_ports=port), + dma_sequencer=DMASequencer( + version=i + 1, + in_ports=port, + ruby_system=self.ruby_system, + ), ruby_system=self.ruby_system, ) for i, port in enumerate(board.get_dma_ports()) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py index 66fea95636..92e8860a24 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py @@ -118,6 +118,7 @@ class MESIThreeLevelCacheHierarchy( version=core_idx, dcache=l1_cache.Dcache, clk_domain=l1_cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -196,7 +197,12 @@ class MESIThreeLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController( - DMASequencer(version=i, in_ports=port), self.ruby_system + DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ), + self.ruby_system, ) self._dma_controllers.append(ctrl) @@ -223,5 +229,7 @@ class MESIThreeLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py index 004c2ff9d2..efe714c23c 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py @@ -109,7 +109,10 @@ class MESITwoLevelCacheHierarchy( ) cache.sequencer = RubySequencer( - version=i, dcache=cache.L1Dcache, clk_domain=cache.clk_domain + version=i, + dcache=cache.L1Dcache, + clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -163,7 +166,11 @@ class MESITwoLevelCacheHierarchy( dma_ports = board.get_dma_ports() for i, port in enumerate(dma_ports): ctrl = DMAController(self.ruby_system.network, cache_line_size) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) self._dma_controllers.append(ctrl) ctrl.ruby_system = self.ruby_system @@ -188,5 +195,7 @@ class MESITwoLevelCacheHierarchy( # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py index 478c793560..56e620ff0c 100644 --- a/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py +++ b/src/python/gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py @@ -95,6 +95,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): version=i, dcache=cache.cacheMemory, clk_domain=cache.clk_domain, + ruby_system=self.ruby_system, ) if board.has_io_bus(): @@ -140,7 +141,11 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): ctrl = DMAController( self.ruby_system.network, board.get_cache_line_size() ) - ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + ctrl.dma_sequencer = DMASequencer( + version=i, + in_ports=port, + ruby_system=self.ruby_system, + ) ctrl.ruby_system = self.ruby_system ctrl.dma_sequencer.ruby_system = self.ruby_system @@ -167,5 +172,7 @@ class MIExampleCacheHierarchy(AbstractRubyCacheHierarchy): # Set up a proxy port for the system_port. Used for load binaries and # other functional-only things. - self.ruby_system.sys_port_proxy = RubyPortProxy() + self.ruby_system.sys_port_proxy = RubyPortProxy( + ruby_system=self.ruby_system + ) board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)