Files
gem5/configs/ruby/CHI_config.py
Tiago Mück 06a8a47322 configs: fix CHI mem buffers
Disabling randomization for the memory request and response buffers.
CHI requires that memory responses for the same address arrive in
the same order the request was sent.

Change-Id: Ia4236188679beaf2969978675414a870ccd9f94a
Signed-off-by: Tiago Mück <tiago.muck@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63673
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
2022-09-28 18:56:04 +00:00

782 lines
26 KiB
Python

# Copyright (c) 2021,2022 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
Definitions for CHI nodes and controller types. These are used by
create_system in configs/ruby/CHI.py or may be used in custom configuration
scripts. When used with create_system, the user may provide an additional
configuration file as the --chi-config parameter to specialize the classes
defined here.
When using the CustomMesh topology, --chi-config must be provided with
specialization of the NoC_Param classes defining the NoC dimensions and
node to router binding. See configs/example/noc_config/2x4.py for an example.
"""
import math
import m5
from m5.objects import *
class Versions:
"""
Helper class to obtain unique ids for a given controller class.
These are passed as the 'version' parameter when creating the controller.
"""
_seqs = 0
@classmethod
def getSeqId(cls):
val = cls._seqs
cls._seqs += 1
return val
_version = {}
@classmethod
def getVersion(cls, tp):
if tp not in cls._version:
cls._version[tp] = 0
val = cls._version[tp]
cls._version[tp] = val + 1
return val
class NoC_Params:
"""
Default parameters for the interconnect. The value of data_width is
also used to set the data_channel_size for all CHI controllers.
(see configs/ruby/CHI.py)
"""
router_link_latency = 1
node_link_latency = 1
router_latency = 1
router_buffer_size = 4
cntrl_msg_size = 8
data_width = 32
cross_links = []
cross_link_latency = 0
class CHI_Node(SubSystem):
"""
Base class with common functions for setting up Cache or Memory
controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes.
Notice getNetworkSideControllers and getAllControllers must be implemented
in the derived classes.
"""
class NoC_Params:
"""
NoC config. parameters and bindings required for CustomMesh topology.
Maps 'num_nodes_per_router' CHI nodes to each router provided in
'router_list'. This assumes len(router_list)*num_nodes_per_router
equals the number of nodes
If 'num_nodes_per_router' is left undefined, we circulate around
'router_list' until all nodes are mapped.
See 'distributeNodes' in configs/topologies/CustomMesh.py
"""
num_nodes_per_router = None
router_list = None
def __init__(self, ruby_system):
super(CHI_Node, self).__init__()
self._ruby_system = ruby_system
self._network = ruby_system.network
def getNetworkSideControllers(self):
"""
Returns all ruby controllers that need to be connected to the
network
"""
raise NotImplementedError()
def getAllControllers(self):
"""
Returns all ruby controllers associated with this node
"""
raise NotImplementedError()
def setDownstream(self, cntrls):
"""
Sets cntrls as the downstream list of all controllers in this node
"""
for c in self.getNetworkSideControllers():
c.downstream_destinations = cntrls
def connectController(self, cntrl):
"""
Creates and configures the messages buffers for the CHI input/output
ports that connect to the network
"""
cntrl.reqOut = MessageBuffer()
cntrl.rspOut = MessageBuffer()
cntrl.snpOut = MessageBuffer()
cntrl.datOut = MessageBuffer()
cntrl.reqIn = MessageBuffer()
cntrl.rspIn = MessageBuffer()
cntrl.snpIn = MessageBuffer()
cntrl.datIn = MessageBuffer()
# All CHI ports are always connected to the network.
# Controllers that are not part of the getNetworkSideControllers list
# still communicate using internal routers, thus we need to wire-up the
# ports
cntrl.reqOut.out_port = self._network.in_port
cntrl.rspOut.out_port = self._network.in_port
cntrl.snpOut.out_port = self._network.in_port
cntrl.datOut.out_port = self._network.in_port
cntrl.reqIn.in_port = self._network.out_port
cntrl.rspIn.in_port = self._network.out_port
cntrl.snpIn.in_port = self._network.out_port
cntrl.datIn.in_port = self._network.out_port
class TriggerMessageBuffer(MessageBuffer):
"""
MessageBuffer for triggering internal controller events.
These buffers should not be affected by the Ruby tester randomization
and allow poping messages enqueued in the same cycle.
"""
randomization = "disabled"
allow_zero_latency = True
class OrderedTriggerMessageBuffer(TriggerMessageBuffer):
ordered = True
class MemCtrlMessageBuffer(MessageBuffer):
"""
MessageBuffer exchanging messages with the memory
These buffers should also not be affected by the Ruby tester randomization.
"""
randomization = "disabled"
ordered = True
class CHI_Cache_Controller(Cache_Controller):
"""
Default parameters for a Cache controller
The Cache_Controller can also be used as a DMA requester or as
a pure directory if all cache allocation policies are disabled.
"""
def __init__(self, ruby_system):
super(CHI_Cache_Controller, self).__init__(
version=Versions.getVersion(Cache_Controller),
ruby_system=ruby_system,
mandatoryQueue=MessageBuffer(),
prefetchQueue=MessageBuffer(),
triggerQueue=TriggerMessageBuffer(),
retryTriggerQueue=OrderedTriggerMessageBuffer(),
replTriggerQueue=OrderedTriggerMessageBuffer(),
reqRdy=TriggerMessageBuffer(),
snpRdy=TriggerMessageBuffer(),
)
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
# params such as: input port buffer size, cache banks, and output
# port latency
self.transitions_per_cycle = 1024
# This should be set to true in the data cache controller to enable
# timeouts on unique lines when a store conditional fails
self.sc_lock_enabled = False
class CHI_L1Controller(CHI_Cache_Controller):
"""
Default parameters for a L1 Cache controller
"""
def __init__(self, ruby_system, sequencer, cache, prefetcher):
super(CHI_L1Controller, self).__init__(ruby_system)
self.sequencer = sequencer
self.cache = cache
self.use_prefetcher = False
self.send_evictions = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
# Strict inclusive MOESI
self.allow_SD = True
self.alloc_on_seq_acc = True
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 16
self.number_of_snoop_TBEs = 4
self.number_of_DVM_TBEs = 16
self.number_of_DVM_snoop_TBEs = 4
self.unify_repl_TBEs = False
class CHI_L2Controller(CHI_Cache_Controller):
"""
Default parameters for a L2 Cache controller
"""
def __init__(self, ruby_system, cache, prefetcher):
super(CHI_L2Controller, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.allow_SD = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.send_evictions = False
# Strict inclusive MOESI
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 16
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CHI_HNFController(CHI_Cache_Controller):
"""
Default parameters for a coherent home node (HNF) cache controller
"""
def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
super(CHI_HNFController, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.addr_ranges = addr_ranges
self.allow_SD = True
self.is_HN = True
self.enable_DMT = True
self.enable_DCT = True
self.send_evictions = False
# MOESI / Mostly inclusive for shared / Exclusive for unique
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = False
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = True
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CHI_MNController(MiscNode_Controller):
"""
Default parameters for a Misc Node
"""
def __init__(
self, ruby_system, addr_range, l1d_caches, early_nonsync_comp
):
super(CHI_MNController, self).__init__(
version=Versions.getVersion(MiscNode_Controller),
ruby_system=ruby_system,
mandatoryQueue=MessageBuffer(),
triggerQueue=TriggerMessageBuffer(),
retryTriggerQueue=TriggerMessageBuffer(),
schedRspTriggerQueue=TriggerMessageBuffer(),
reqRdy=TriggerMessageBuffer(),
snpRdy=TriggerMessageBuffer(),
)
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
# params such as: input port buffer size, cache banks, and output
# port latency
self.transitions_per_cycle = 1024
self.addr_ranges = [addr_range]
# 16 total transaction buffer entries, but 1 is reserved for DVMNonSync
self.number_of_DVM_TBEs = 16
self.number_of_non_sync_TBEs = 1
self.early_nonsync_comp = early_nonsync_comp
# "upstream_destinations" = targets for DVM snoops
self.upstream_destinations = l1d_caches
class CHI_DMAController(CHI_Cache_Controller):
"""
Default parameters for a DMA controller
"""
def __init__(self, ruby_system, sequencer):
super(CHI_DMAController, self).__init__(ruby_system)
self.sequencer = sequencer
class DummyCache(RubyCache):
dataAccessLatency = 0
tagAccessLatency = 1
size = "128"
assoc = 1
self.use_prefetcher = False
self.cache = DummyCache()
self.sequencer.dcache = NULL
# All allocations are false
# Deallocations are true (don't really matter)
self.allow_SD = False
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = False
self.alloc_on_readunique = False
self.alloc_on_readonce = False
self.alloc_on_writeback = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
self.send_evictions = False
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 1
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CPUSequencerWrapper:
"""
Other generic configuration scripts assume a matching number of sequencers
and cpus. This wraps the instruction and data sequencer so they are
compatible with the other scripts. This assumes all scripts are using
connectCpuPorts/connectIOPorts to bind ports
"""
def __init__(self, iseq, dseq):
# use this style due to __setattr__ override below
self.__dict__["inst_seq"] = iseq
self.__dict__["data_seq"] = dseq
self.__dict__["support_data_reqs"] = True
self.__dict__["support_inst_reqs"] = True
# Compatibility with certain scripts that wire up ports
# without connectCpuPorts
self.__dict__["in_ports"] = dseq.in_ports
def connectCpuPorts(self, cpu):
assert isinstance(cpu, BaseCPU)
cpu.icache_port = self.inst_seq.in_ports
for p in cpu._cached_ports:
if str(p) != "icache_port":
exec("cpu.%s = self.data_seq.in_ports" % p)
cpu.connectUncachedPorts(
self.data_seq.in_ports, self.data_seq.interrupt_out_port
)
def connectIOPorts(self, piobus):
self.data_seq.connectIOPorts(piobus)
def __setattr__(self, name, value):
setattr(self.inst_seq, name, value)
setattr(self.data_seq, name, value)
class CHI_RNF(CHI_Node):
"""
Defines a CHI request node.
Notice all contollers and sequencers are set as children of the cpus, so
this object acts more like a proxy for seting things up and has no topology
significance unless the cpus are set as its children at the top level
"""
def __init__(
self,
cpus,
ruby_system,
l1Icache_type,
l1Dcache_type,
cache_line_size,
l1Iprefetcher_type=None,
l1Dprefetcher_type=None,
):
super(CHI_RNF, self).__init__(ruby_system)
self._block_size_bits = int(math.log(cache_line_size, 2))
# All sequencers and controllers
self._seqs = []
self._cntrls = []
# Last level controllers in this node, i.e., the ones that will send
# requests to the home nodes
self._ll_cntrls = []
self._cpus = cpus
# First creates L1 caches and sequencers
for cpu in self._cpus:
cpu.inst_sequencer = RubySequencer(
version=Versions.getSeqId(), ruby_system=ruby_system
)
cpu.data_sequencer = RubySequencer(
version=Versions.getSeqId(), ruby_system=ruby_system
)
self._seqs.append(
CPUSequencerWrapper(cpu.inst_sequencer, cpu.data_sequencer)
)
# caches
l1i_cache = l1Icache_type(
start_index_bit=self._block_size_bits, is_icache=True
)
l1d_cache = l1Dcache_type(
start_index_bit=self._block_size_bits, is_icache=False
)
# Placeholders for future prefetcher support
if l1Iprefetcher_type != None or l1Dprefetcher_type != None:
m5.fatal("Prefetching not supported yet")
l1i_pf = NULL
l1d_pf = NULL
# cache controllers
cpu.l1i = CHI_L1Controller(
ruby_system, cpu.inst_sequencer, l1i_cache, l1i_pf
)
cpu.l1d = CHI_L1Controller(
ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf
)
cpu.inst_sequencer.dcache = NULL
cpu.data_sequencer.dcache = cpu.l1d.cache
cpu.l1d.sc_lock_enabled = True
cpu._ll_cntrls = [cpu.l1i, cpu.l1d]
for c in cpu._ll_cntrls:
self._cntrls.append(c)
self.connectController(c)
self._ll_cntrls.append(c)
def getSequencers(self):
return self._seqs
def getAllControllers(self):
return self._cntrls
def getNetworkSideControllers(self):
return self._cntrls
def setDownstream(self, cntrls):
for c in self._ll_cntrls:
c.downstream_destinations = cntrls
def getCpus(self):
return self._cpus
# Adds a private L2 for each cpu
def addPrivL2Cache(self, cache_type, pf_type=None):
self._ll_cntrls = []
for cpu in self._cpus:
l2_cache = cache_type(
start_index_bit=self._block_size_bits, is_icache=False
)
if pf_type != None:
m5.fatal("Prefetching not supported yet")
l2_pf = NULL
cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf)
self._cntrls.append(cpu.l2)
self.connectController(cpu.l2)
self._ll_cntrls.append(cpu.l2)
for c in cpu._ll_cntrls:
c.downstream_destinations = [cpu.l2]
cpu._ll_cntrls = [cpu.l2]
class CHI_HNF(CHI_Node):
"""
Encapsulates an HNF cache/directory controller.
Before the first controller is created, the class method
CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object
to set-up the interleaved address ranges used by the HNFs
"""
class NoC_Params(CHI_Node.NoC_Params):
"""HNFs may also define the 'pairing' parameter to allow pairing"""
pairing = None
_addr_ranges = {}
@classmethod
def createAddrRanges(cls, sys_mem_ranges, cache_line_size, hnfs):
# Create the HNFs interleaved addr ranges
block_size_bits = int(math.log(cache_line_size, 2))
llc_bits = int(math.log(len(hnfs), 2))
numa_bit = block_size_bits + llc_bits - 1
for i, hnf in enumerate(hnfs):
ranges = []
for r in sys_mem_ranges:
addr_range = AddrRange(
r.start,
size=r.size(),
intlvHighBit=numa_bit,
intlvBits=llc_bits,
intlvMatch=i,
)
ranges.append(addr_range)
cls._addr_ranges[hnf] = (ranges, numa_bit)
@classmethod
def getAddrRanges(cls, hnf_idx):
assert len(cls._addr_ranges) != 0
return cls._addr_ranges[hnf_idx]
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
super(CHI_HNF, self).__init__(ruby_system)
addr_ranges, intlvHighBit = self.getAddrRanges(hnf_idx)
# All ranges should have the same interleaving
assert len(addr_ranges) >= 1
ll_cache = llcache_type(start_index_bit=intlvHighBit + 1)
self._cntrl = CHI_HNFController(
ruby_system, ll_cache, NULL, addr_ranges
)
if parent == None:
self.cntrl = self._cntrl
else:
parent.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_MN(CHI_Node):
"""
Encapsulates a Misc Node controller.
"""
class NoC_Params(CHI_Node.NoC_Params):
"""HNFs may also define the 'pairing' parameter to allow pairing"""
pairing = None
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, l1d_caches, early_nonsync_comp=False):
super(CHI_MN, self).__init__(ruby_system)
# MiscNode has internal address range starting at 0
addr_range = AddrRange(0, size="1kB")
self._cntrl = CHI_MNController(
ruby_system, addr_range, l1d_caches, early_nonsync_comp
)
self.cntrl = self._cntrl
self.connectController(self._cntrl)
def connectController(self, cntrl):
CHI_Node.connectController(self, cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_SNF_Base(CHI_Node):
"""
Creates CHI node controllers for the memory controllers
"""
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_SNF_Base, self).__init__(ruby_system)
self._cntrl = Memory_Controller(
version=Versions.getVersion(Memory_Controller),
ruby_system=ruby_system,
triggerQueue=TriggerMessageBuffer(),
responseFromMemory=MemCtrlMessageBuffer(),
requestToMemory=MemCtrlMessageBuffer(),
reqRdy=TriggerMessageBuffer(),
transitions_per_cycle=1024,
)
self.connectController(self._cntrl)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
def getMemRange(self, mem_ctrl):
# TODO need some kind of transparent API for
# MemCtrl+DRAM vs SimpleMemory
if hasattr(mem_ctrl, "range"):
return mem_ctrl.range
else:
return mem_ctrl.dram.range
class CHI_SNF_BootMem(CHI_SNF_Base):
"""
Create the SNF for the boot memory
"""
def __init__(self, ruby_system, parent, bootmem):
super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
self._cntrl.memory_out_port = bootmem.port
self._cntrl.addr_ranges = self.getMemRange(bootmem)
class CHI_SNF_MainMem(CHI_SNF_Base):
"""
Create the SNF for a list main memory controllers
"""
def __init__(self, ruby_system, parent, mem_ctrl=None):
super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
if mem_ctrl:
self._cntrl.memory_out_port = mem_ctrl.port
self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
# else bind ports and range later
class CHI_RNI_Base(CHI_Node):
"""
Request node without cache / DMA
"""
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_RNI_Base, self).__init__(ruby_system)
self._sequencer = RubySequencer(
version=Versions.getSeqId(),
ruby_system=ruby_system,
clk_domain=ruby_system.clk_domain,
)
self._cntrl = CHI_DMAController(ruby_system, self._sequencer)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_RNI_DMA(CHI_RNI_Base):
"""
DMA controller wiredup to a given dma port
"""
def __init__(self, ruby_system, dma_port, parent):
super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
assert dma_port != None
self._sequencer.in_ports = dma_port
class CHI_RNI_IO(CHI_RNI_Base):
"""
DMA controller wiredup to ruby_system IO port
"""
def __init__(self, ruby_system, parent):
super(CHI_RNI_IO, self).__init__(ruby_system, parent)
ruby_system._io_port = self._sequencer