# Copyright (c) 2021,2022 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall # not be construed as granting a license to any other intellectual # property including but not limited to intellectual property relating # to a hardware implementation of the functionality of the software # licensed hereunder. You may use the software subject to the license # terms below provided that you ensure that this notice is replicated # unmodified and in its entirety in all distributions of the software, # modified or unmodified, in source code or in binary form. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer; # redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution; # neither the name of the copyright holders nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Definitions for CHI nodes and controller types. These are used by create_system in configs/ruby/CHI.py or may be used in custom configuration scripts. When used with create_system, the user may provide an additional configuration file as the --chi-config parameter to specialize the classes defined here. When using the CustomMesh topology, --chi-config must be provided with specialization of the NoC_Param classes defining the NoC dimensions and node to router binding. See configs/example/noc_config/2x4.py for an example. """ import math import m5 from m5.objects import * class Versions: """ Helper class to obtain unique ids for a given controller class. These are passed as the 'version' parameter when creating the controller. """ _seqs = 0 @classmethod def getSeqId(cls): val = cls._seqs cls._seqs += 1 return val _version = {} @classmethod def getVersion(cls, tp): if tp not in cls._version: cls._version[tp] = 0 val = cls._version[tp] cls._version[tp] = val + 1 return val class NoC_Params: """ Default parameters for the interconnect. The value of data_width is also used to set the data_channel_size for all CHI controllers. (see configs/ruby/CHI.py) """ router_link_latency = 1 node_link_latency = 1 router_latency = 1 router_buffer_size = 4 cntrl_msg_size = 8 data_width = 32 cross_links = [] cross_link_latency = 0 class CHI_Node(SubSystem): """ Base class with common functions for setting up Cache or Memory controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes. Notice getNetworkSideControllers and getAllControllers must be implemented in the derived classes. """ class NoC_Params: """ NoC config. parameters and bindings required for CustomMesh topology. Maps 'num_nodes_per_router' CHI nodes to each router provided in 'router_list'. This assumes len(router_list)*num_nodes_per_router equals the number of nodes If 'num_nodes_per_router' is left undefined, we circulate around 'router_list' until all nodes are mapped. See 'distributeNodes' in configs/topologies/CustomMesh.py """ num_nodes_per_router = None router_list = None def __init__(self, ruby_system): super(CHI_Node, self).__init__() self._ruby_system = ruby_system self._network = ruby_system.network def getNetworkSideControllers(self): """ Returns all ruby controllers that need to be connected to the network """ raise NotImplementedError() def getAllControllers(self): """ Returns all ruby controllers associated with this node """ raise NotImplementedError() def setDownstream(self, cntrls): """ Sets cntrls as the downstream list of all controllers in this node """ for c in self.getNetworkSideControllers(): c.downstream_destinations = cntrls def connectController(self, cntrl): """ Creates and configures the messages buffers for the CHI input/output ports that connect to the network """ cntrl.reqOut = MessageBuffer() cntrl.rspOut = MessageBuffer() cntrl.snpOut = MessageBuffer() cntrl.datOut = MessageBuffer() cntrl.reqIn = MessageBuffer() cntrl.rspIn = MessageBuffer() cntrl.snpIn = MessageBuffer() cntrl.datIn = MessageBuffer() # All CHI ports are always connected to the network. # Controllers that are not part of the getNetworkSideControllers list # still communicate using internal routers, thus we need to wire-up the # ports cntrl.reqOut.out_port = self._network.in_port cntrl.rspOut.out_port = self._network.in_port cntrl.snpOut.out_port = self._network.in_port cntrl.datOut.out_port = self._network.in_port cntrl.reqIn.in_port = self._network.out_port cntrl.rspIn.in_port = self._network.out_port cntrl.snpIn.in_port = self._network.out_port cntrl.datIn.in_port = self._network.out_port class TriggerMessageBuffer(MessageBuffer): """ MessageBuffer for triggering internal controller events. These buffers should not be affected by the Ruby tester randomization and allow poping messages enqueued in the same cycle. """ randomization = "disabled" allow_zero_latency = True class OrderedTriggerMessageBuffer(TriggerMessageBuffer): ordered = True class MemCtrlMessageBuffer(MessageBuffer): """ MessageBuffer exchanging messages with the memory These buffers should also not be affected by the Ruby tester randomization. """ randomization = "disabled" ordered = True class CHI_Cache_Controller(Cache_Controller): """ Default parameters for a Cache controller The Cache_Controller can also be used as a DMA requester or as a pure directory if all cache allocation policies are disabled. """ def __init__(self, ruby_system): super(CHI_Cache_Controller, self).__init__( version=Versions.getVersion(Cache_Controller), ruby_system=ruby_system, mandatoryQueue=MessageBuffer(), prefetchQueue=MessageBuffer(), triggerQueue=TriggerMessageBuffer(), retryTriggerQueue=OrderedTriggerMessageBuffer(), replTriggerQueue=OrderedTriggerMessageBuffer(), reqRdy=TriggerMessageBuffer(), snpRdy=TriggerMessageBuffer(), ) # Set somewhat large number since we really a lot on internal # triggers. To limit the controller performance, tweak other # params such as: input port buffer size, cache banks, and output # port latency self.transitions_per_cycle = 1024 # This should be set to true in the data cache controller to enable # timeouts on unique lines when a store conditional fails self.sc_lock_enabled = False class CHI_L1Controller(CHI_Cache_Controller): """ Default parameters for a L1 Cache controller """ def __init__(self, ruby_system, sequencer, cache, prefetcher): super(CHI_L1Controller, self).__init__(ruby_system) self.sequencer = sequencer self.cache = cache self.use_prefetcher = False self.send_evictions = True self.is_HN = False self.enable_DMT = False self.enable_DCT = False # Strict inclusive MOESI self.allow_SD = True self.alloc_on_seq_acc = True self.alloc_on_seq_line_write = False self.alloc_on_readshared = True self.alloc_on_readunique = True self.alloc_on_readonce = True self.alloc_on_writeback = True self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = True self.dealloc_backinv_shared = True # Some reasonable default TBE params self.number_of_TBEs = 16 self.number_of_repl_TBEs = 16 self.number_of_snoop_TBEs = 4 self.number_of_DVM_TBEs = 16 self.number_of_DVM_snoop_TBEs = 4 self.unify_repl_TBEs = False class CHI_L2Controller(CHI_Cache_Controller): """ Default parameters for a L2 Cache controller """ def __init__(self, ruby_system, cache, prefetcher): super(CHI_L2Controller, self).__init__(ruby_system) self.sequencer = NULL self.cache = cache self.use_prefetcher = False self.allow_SD = True self.is_HN = False self.enable_DMT = False self.enable_DCT = False self.send_evictions = False # Strict inclusive MOESI self.alloc_on_seq_acc = False self.alloc_on_seq_line_write = False self.alloc_on_readshared = True self.alloc_on_readunique = True self.alloc_on_readonce = True self.alloc_on_writeback = True self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = True self.dealloc_backinv_shared = True # Some reasonable default TBE params self.number_of_TBEs = 32 self.number_of_repl_TBEs = 32 self.number_of_snoop_TBEs = 16 self.number_of_DVM_TBEs = 1 # should not receive any dvm self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm self.unify_repl_TBEs = False class CHI_HNFController(CHI_Cache_Controller): """ Default parameters for a coherent home node (HNF) cache controller """ def __init__(self, ruby_system, cache, prefetcher, addr_ranges): super(CHI_HNFController, self).__init__(ruby_system) self.sequencer = NULL self.cache = cache self.use_prefetcher = False self.addr_ranges = addr_ranges self.allow_SD = True self.is_HN = True self.enable_DMT = True self.enable_DCT = True self.send_evictions = False # MOESI / Mostly inclusive for shared / Exclusive for unique self.alloc_on_seq_acc = False self.alloc_on_seq_line_write = False self.alloc_on_readshared = True self.alloc_on_readunique = False self.alloc_on_readonce = True self.alloc_on_writeback = True self.dealloc_on_unique = True self.dealloc_on_shared = False self.dealloc_backinv_unique = False self.dealloc_backinv_shared = False # Some reasonable default TBE params self.number_of_TBEs = 32 self.number_of_repl_TBEs = 32 self.number_of_snoop_TBEs = 1 # should not receive any snoop self.number_of_DVM_TBEs = 1 # should not receive any dvm self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm self.unify_repl_TBEs = False class CHI_MNController(MiscNode_Controller): """ Default parameters for a Misc Node """ def __init__( self, ruby_system, addr_range, l1d_caches, early_nonsync_comp ): super(CHI_MNController, self).__init__( version=Versions.getVersion(MiscNode_Controller), ruby_system=ruby_system, mandatoryQueue=MessageBuffer(), triggerQueue=TriggerMessageBuffer(), retryTriggerQueue=TriggerMessageBuffer(), schedRspTriggerQueue=TriggerMessageBuffer(), reqRdy=TriggerMessageBuffer(), snpRdy=TriggerMessageBuffer(), ) # Set somewhat large number since we really a lot on internal # triggers. To limit the controller performance, tweak other # params such as: input port buffer size, cache banks, and output # port latency self.transitions_per_cycle = 1024 self.addr_ranges = [addr_range] # 16 total transaction buffer entries, but 1 is reserved for DVMNonSync self.number_of_DVM_TBEs = 16 self.number_of_non_sync_TBEs = 1 self.early_nonsync_comp = early_nonsync_comp # "upstream_destinations" = targets for DVM snoops self.upstream_destinations = l1d_caches class CHI_DMAController(CHI_Cache_Controller): """ Default parameters for a DMA controller """ def __init__(self, ruby_system, sequencer): super(CHI_DMAController, self).__init__(ruby_system) self.sequencer = sequencer class DummyCache(RubyCache): dataAccessLatency = 0 tagAccessLatency = 1 size = "128" assoc = 1 self.use_prefetcher = False self.cache = DummyCache() self.sequencer.dcache = NULL # All allocations are false # Deallocations are true (don't really matter) self.allow_SD = False self.is_HN = False self.enable_DMT = False self.enable_DCT = False self.alloc_on_seq_acc = False self.alloc_on_seq_line_write = False self.alloc_on_readshared = False self.alloc_on_readunique = False self.alloc_on_readonce = False self.alloc_on_writeback = False self.dealloc_on_unique = False self.dealloc_on_shared = False self.dealloc_backinv_unique = False self.dealloc_backinv_shared = False self.send_evictions = False self.number_of_TBEs = 16 self.number_of_repl_TBEs = 1 self.number_of_snoop_TBEs = 1 # should not receive any snoop self.number_of_DVM_TBEs = 1 # should not receive any dvm self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm self.unify_repl_TBEs = False class CPUSequencerWrapper: """ Other generic configuration scripts assume a matching number of sequencers and cpus. This wraps the instruction and data sequencer so they are compatible with the other scripts. This assumes all scripts are using connectCpuPorts/connectIOPorts to bind ports """ def __init__(self, iseq, dseq): # use this style due to __setattr__ override below self.__dict__["inst_seq"] = iseq self.__dict__["data_seq"] = dseq self.__dict__["support_data_reqs"] = True self.__dict__["support_inst_reqs"] = True # Compatibility with certain scripts that wire up ports # without connectCpuPorts self.__dict__["in_ports"] = dseq.in_ports def connectCpuPorts(self, cpu): assert isinstance(cpu, BaseCPU) cpu.icache_port = self.inst_seq.in_ports for p in cpu._cached_ports: if str(p) != "icache_port": exec("cpu.%s = self.data_seq.in_ports" % p) cpu.connectUncachedPorts( self.data_seq.in_ports, self.data_seq.interrupt_out_port ) def connectIOPorts(self, piobus): self.data_seq.connectIOPorts(piobus) def __setattr__(self, name, value): setattr(self.inst_seq, name, value) setattr(self.data_seq, name, value) class CHI_RNF(CHI_Node): """ Defines a CHI request node. Notice all contollers and sequencers are set as children of the cpus, so this object acts more like a proxy for seting things up and has no topology significance unless the cpus are set as its children at the top level """ def __init__( self, cpus, ruby_system, l1Icache_type, l1Dcache_type, cache_line_size, l1Iprefetcher_type=None, l1Dprefetcher_type=None, ): super(CHI_RNF, self).__init__(ruby_system) self._block_size_bits = int(math.log(cache_line_size, 2)) # All sequencers and controllers self._seqs = [] self._cntrls = [] # Last level controllers in this node, i.e., the ones that will send # requests to the home nodes self._ll_cntrls = [] self._cpus = cpus # First creates L1 caches and sequencers for cpu in self._cpus: cpu.inst_sequencer = RubySequencer( version=Versions.getSeqId(), ruby_system=ruby_system ) cpu.data_sequencer = RubySequencer( version=Versions.getSeqId(), ruby_system=ruby_system ) self._seqs.append( CPUSequencerWrapper(cpu.inst_sequencer, cpu.data_sequencer) ) # caches l1i_cache = l1Icache_type( start_index_bit=self._block_size_bits, is_icache=True ) l1d_cache = l1Dcache_type( start_index_bit=self._block_size_bits, is_icache=False ) # Placeholders for future prefetcher support if l1Iprefetcher_type != None or l1Dprefetcher_type != None: m5.fatal("Prefetching not supported yet") l1i_pf = NULL l1d_pf = NULL # cache controllers cpu.l1i = CHI_L1Controller( ruby_system, cpu.inst_sequencer, l1i_cache, l1i_pf ) cpu.l1d = CHI_L1Controller( ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf ) cpu.inst_sequencer.dcache = NULL cpu.data_sequencer.dcache = cpu.l1d.cache cpu.l1d.sc_lock_enabled = True cpu._ll_cntrls = [cpu.l1i, cpu.l1d] for c in cpu._ll_cntrls: self._cntrls.append(c) self.connectController(c) self._ll_cntrls.append(c) def getSequencers(self): return self._seqs def getAllControllers(self): return self._cntrls def getNetworkSideControllers(self): return self._cntrls def setDownstream(self, cntrls): for c in self._ll_cntrls: c.downstream_destinations = cntrls def getCpus(self): return self._cpus # Adds a private L2 for each cpu def addPrivL2Cache(self, cache_type, pf_type=None): self._ll_cntrls = [] for cpu in self._cpus: l2_cache = cache_type( start_index_bit=self._block_size_bits, is_icache=False ) if pf_type != None: m5.fatal("Prefetching not supported yet") l2_pf = NULL cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf) self._cntrls.append(cpu.l2) self.connectController(cpu.l2) self._ll_cntrls.append(cpu.l2) for c in cpu._ll_cntrls: c.downstream_destinations = [cpu.l2] cpu._ll_cntrls = [cpu.l2] class CHI_HNF(CHI_Node): """ Encapsulates an HNF cache/directory controller. Before the first controller is created, the class method CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object to set-up the interleaved address ranges used by the HNFs """ class NoC_Params(CHI_Node.NoC_Params): """HNFs may also define the 'pairing' parameter to allow pairing""" pairing = None _addr_ranges = {} @classmethod def createAddrRanges(cls, sys_mem_ranges, cache_line_size, hnfs): # Create the HNFs interleaved addr ranges block_size_bits = int(math.log(cache_line_size, 2)) llc_bits = int(math.log(len(hnfs), 2)) numa_bit = block_size_bits + llc_bits - 1 for i, hnf in enumerate(hnfs): ranges = [] for r in sys_mem_ranges: addr_range = AddrRange( r.start, size=r.size(), intlvHighBit=numa_bit, intlvBits=llc_bits, intlvMatch=i, ) ranges.append(addr_range) cls._addr_ranges[hnf] = (ranges, numa_bit) @classmethod def getAddrRanges(cls, hnf_idx): assert len(cls._addr_ranges) != 0 return cls._addr_ranges[hnf_idx] # The CHI controller can be a child of this object or another if # 'parent' if specified def __init__(self, hnf_idx, ruby_system, llcache_type, parent): super(CHI_HNF, self).__init__(ruby_system) addr_ranges, intlvHighBit = self.getAddrRanges(hnf_idx) # All ranges should have the same interleaving assert len(addr_ranges) >= 1 ll_cache = llcache_type(start_index_bit=intlvHighBit + 1) self._cntrl = CHI_HNFController( ruby_system, ll_cache, NULL, addr_ranges ) if parent == None: self.cntrl = self._cntrl else: parent.cntrl = self._cntrl self.connectController(self._cntrl) def getAllControllers(self): return [self._cntrl] def getNetworkSideControllers(self): return [self._cntrl] class CHI_MN(CHI_Node): """ Encapsulates a Misc Node controller. """ class NoC_Params(CHI_Node.NoC_Params): """HNFs may also define the 'pairing' parameter to allow pairing""" pairing = None # The CHI controller can be a child of this object or another if # 'parent' if specified def __init__(self, ruby_system, l1d_caches, early_nonsync_comp=False): super(CHI_MN, self).__init__(ruby_system) # MiscNode has internal address range starting at 0 addr_range = AddrRange(0, size="1kB") self._cntrl = CHI_MNController( ruby_system, addr_range, l1d_caches, early_nonsync_comp ) self.cntrl = self._cntrl self.connectController(self._cntrl) def connectController(self, cntrl): CHI_Node.connectController(self, cntrl) def getAllControllers(self): return [self._cntrl] def getNetworkSideControllers(self): return [self._cntrl] class CHI_SNF_Base(CHI_Node): """ Creates CHI node controllers for the memory controllers """ # The CHI controller can be a child of this object or another if # 'parent' if specified def __init__(self, ruby_system, parent): super(CHI_SNF_Base, self).__init__(ruby_system) self._cntrl = Memory_Controller( version=Versions.getVersion(Memory_Controller), ruby_system=ruby_system, triggerQueue=TriggerMessageBuffer(), responseFromMemory=MemCtrlMessageBuffer(), requestToMemory=MemCtrlMessageBuffer(), reqRdy=TriggerMessageBuffer(), transitions_per_cycle=1024, ) self.connectController(self._cntrl) if parent: parent.cntrl = self._cntrl else: self.cntrl = self._cntrl def getAllControllers(self): return [self._cntrl] def getNetworkSideControllers(self): return [self._cntrl] def getMemRange(self, mem_ctrl): # TODO need some kind of transparent API for # MemCtrl+DRAM vs SimpleMemory if hasattr(mem_ctrl, "range"): return mem_ctrl.range else: return mem_ctrl.dram.range class CHI_SNF_BootMem(CHI_SNF_Base): """ Create the SNF for the boot memory """ def __init__(self, ruby_system, parent, bootmem): super(CHI_SNF_BootMem, self).__init__(ruby_system, parent) self._cntrl.memory_out_port = bootmem.port self._cntrl.addr_ranges = self.getMemRange(bootmem) class CHI_SNF_MainMem(CHI_SNF_Base): """ Create the SNF for a list main memory controllers """ def __init__(self, ruby_system, parent, mem_ctrl=None): super(CHI_SNF_MainMem, self).__init__(ruby_system, parent) if mem_ctrl: self._cntrl.memory_out_port = mem_ctrl.port self._cntrl.addr_ranges = self.getMemRange(mem_ctrl) # else bind ports and range later class CHI_RNI_Base(CHI_Node): """ Request node without cache / DMA """ # The CHI controller can be a child of this object or another if # 'parent' if specified def __init__(self, ruby_system, parent): super(CHI_RNI_Base, self).__init__(ruby_system) self._sequencer = RubySequencer( version=Versions.getSeqId(), ruby_system=ruby_system, clk_domain=ruby_system.clk_domain, ) self._cntrl = CHI_DMAController(ruby_system, self._sequencer) if parent: parent.cntrl = self._cntrl else: self.cntrl = self._cntrl self.connectController(self._cntrl) def getAllControllers(self): return [self._cntrl] def getNetworkSideControllers(self): return [self._cntrl] class CHI_RNI_DMA(CHI_RNI_Base): """ DMA controller wiredup to a given dma port """ def __init__(self, ruby_system, dma_port, parent): super(CHI_RNI_DMA, self).__init__(ruby_system, parent) assert dma_port != None self._sequencer.in_ports = dma_port class CHI_RNI_IO(CHI_RNI_Base): """ DMA controller wiredup to ruby_system IO port """ def __init__(self, ruby_system, parent): super(CHI_RNI_IO, self).__init__(ruby_system, parent) ruby_system._io_port = self._sequencer