misc: Merge branch 'release-staging-v21-0' into develop

Change-Id: I0ad043ded56fb848e045057a1e7a56ea39797906
This commit is contained in:
Bobby R. Bruce
2021-03-18 11:13:14 -07:00
49 changed files with 9819 additions and 226 deletions

View File

@@ -1,3 +1,16 @@
# Version 20.1.0.5
**[HOTFIX]** This hotfix release fixes three known bugs:
* `src/python/m5/util/convert.py` incorrectly stated kibibytes as 'kiB' instead of 'KiB'. This has been fixed.
* Atomic accesses were not checking the access permission bits in the page table descriptors. They were incorrectly using the nature of the request itself. This is now fixed.
* `num_l2chaches_per_cluster` and `num_cpus_per_cluster` were cast to floats in `configs/ruby/MESI_Three_Level_HTM.py`, which caused errors. This has been fixed so they are correctly cast to integers.
# Version 20.1.0.4
**[HOTFIX]** [gem5 was failing to build with SCons 4.0.1 and 4.1.0](https://gem5.atlassian.net/browse/GEM5-916).
This hotfix makes the necessary changes to `site_scons/site_tools/default.py` for gem5 to compile successfully on these versions of SCons.
# Version 20.1.0.3
**[HOTFIX]** A patch was apply to fix an [error where booting Linux stalled when using the ARM ISA](https://gem5.atlassian.net/browse/GEM5-901).

View File

@@ -760,6 +760,9 @@ protocol_dirs = []
Export('protocol_dirs')
slicc_includes = []
Export('slicc_includes')
# list of protocols that require the partial functional read interface
need_partial_func_reads = []
Export('need_partial_func_reads')
# Walk the tree and execute all SConsopts scripts that wil add to the
# above variables

View File

@@ -0,0 +1,70 @@
# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 2x4 mesh definition
#
# 0 --- 1 --- 2 --- 3
# | | | |
# 4 --- 5 --- 6 --- 7
#
mesh:
num_rows : 2
num_cols : 4
router_latency : 1
link_latency : 1
# Bindings for each CHI node type.
CHI_RNF:
# Uncomment to map num_nodes_per_router RNFs in each provided router,
# assuming num. created CHI_RNFs == len(router_list)*num_nodes_per_router
# num_nodes_per_router: 1
router_list: [1, 2, 5, 6]
CHI_HNF:
# num_nodes_per_router: 1
router_list: [1, 2, 5, 6]
CHI_SNF_MainMem:
# num_nodes_per_router: 1
router_list: [0, 4]
# Applies to CHI_SNF_BootMem and possibly other non-main memories
CHI_SNF_IO:
router_list: [3]
# Applies to CHI_RNI_DMA and CHI_RNI_IO
CHI_RNI_IO:
router_list: [7]

840
configs/ruby/CHI.py Normal file
View File

@@ -0,0 +1,840 @@
# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import math
import yaml
import m5
from m5.objects import *
from m5.defines import buildEnv
from .Ruby import create_topology, setup_memory_controllers
def define_options(parser):
parser.add_option("--noc-config", action="store", type="string",
default=None,
help="YAML NoC config. parameters and bindings. "
"required for CustomMesh topology")
class Versions:
'''
Helper class to obtain unique ids for a given controller class.
These are passed as the 'version' parameter when creating the controller.
'''
_seqs = 0
@classmethod
def getSeqId(cls):
val = cls._seqs
cls._seqs += 1
return val
_version = {}
@classmethod
def getVersion(cls, tp):
if tp not in cls._version:
cls._version[tp] = 0
val = cls._version[tp]
cls._version[tp] = val + 1
return val
class CHI_Node(SubSystem):
'''
Base class with common functions for setting up Cache or Memory
controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes.
Notice getNetworkSideControllers and getAllControllers must be implemented
in the derived classes.
'''
def __init__(self, ruby_system):
super(CHI_Node, self).__init__()
self._ruby_system = ruby_system
self._network = ruby_system.network
def getNetworkSideControllers(self):
'''
Returns all ruby controllers that need to be connected to the
network
'''
raise NotImplementedError()
def getAllControllers(self):
'''
Returns all ruby controllers associated with this node
'''
raise NotImplementedError()
def setDownstream(self, cntrls):
'''
Sets cntrls as the downstream list of all controllers in this node
'''
for c in self.getNetworkSideControllers():
c.downstream_destinations = cntrls
def connectController(self, cntrl):
'''
Creates and configures the messages buffers for the CHI input/output
ports that connect to the network
'''
cntrl.reqOut = MessageBuffer()
cntrl.rspOut = MessageBuffer()
cntrl.snpOut = MessageBuffer()
cntrl.datOut = MessageBuffer()
cntrl.reqIn = MessageBuffer()
cntrl.rspIn = MessageBuffer()
cntrl.snpIn = MessageBuffer()
cntrl.datIn = MessageBuffer()
# All CHI ports are always connected to the network.
# Controllers that are not part of the getNetworkSideControllers list
# still communicate using internal routers, thus we need to wire-up the
# ports
cntrl.reqOut.out_port = self._network.in_port
cntrl.rspOut.out_port = self._network.in_port
cntrl.snpOut.out_port = self._network.in_port
cntrl.datOut.out_port = self._network.in_port
cntrl.reqIn.in_port = self._network.out_port
cntrl.rspIn.in_port = self._network.out_port
cntrl.snpIn.in_port = self._network.out_port
cntrl.datIn.in_port = self._network.out_port
class TriggerMessageBuffer(MessageBuffer):
'''
MessageBuffer for triggering internal controller events.
These buffers should not be affected by the Ruby tester randomization
and allow poping messages enqueued in the same cycle.
'''
randomization = 'disabled'
allow_zero_latency = True
class OrderedTriggerMessageBuffer(TriggerMessageBuffer):
ordered = True
class CHI_Cache_Controller(Cache_Controller):
'''
Default parameters for a Cache controller
The Cache_Controller can also be used as a DMA requester or as
a pure directory if all cache allocation policies are disabled.
'''
def __init__(self, ruby_system):
super(CHI_Cache_Controller, self).__init__(
version = Versions.getVersion(Cache_Controller),
ruby_system = ruby_system,
mandatoryQueue = MessageBuffer(),
prefetchQueue = MessageBuffer(),
triggerQueue = TriggerMessageBuffer(),
retryTriggerQueue = OrderedTriggerMessageBuffer(),
replTriggerQueue = OrderedTriggerMessageBuffer(),
reqRdy = TriggerMessageBuffer(),
snpRdy = TriggerMessageBuffer())
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
# params such as: input port buffer size, cache banks, and output
# port latency
self.transitions_per_cycle = 128
# This should be set to true in the data cache controller to enable
# timeouts on unique lines when a store conditional fails
self.sc_lock_enabled = False
class CHI_L1Controller(CHI_Cache_Controller):
'''
Default parameters for a L1 Cache controller
'''
def __init__(self, ruby_system, sequencer, cache, prefetcher):
super(CHI_L1Controller, self).__init__(ruby_system)
self.sequencer = sequencer
self.cache = cache
self.use_prefetcher = False
self.send_evictions = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
# Strict inclusive MOESI
self.allow_SD = True
self.alloc_on_seq_acc = True
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 16
self.number_of_snoop_TBEs = 4
self.unify_repl_TBEs = False
class CHI_L2Controller(CHI_Cache_Controller):
'''
Default parameters for a L2 Cache controller
'''
def __init__(self, ruby_system, cache, prefetcher):
super(CHI_L2Controller, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.allow_SD = True
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.send_evictions = False
# Strict inclusive MOESI
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = True
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = True
self.dealloc_backinv_shared = True
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 16
self.unify_repl_TBEs = False
class CHI_HNFController(CHI_Cache_Controller):
'''
Default parameters for a coherent home node (HNF) cache controller
'''
def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
super(CHI_HNFController, self).__init__(ruby_system)
self.sequencer = NULL
self.cache = cache
self.use_prefetcher = False
self.addr_ranges = addr_ranges
self.allow_SD = True
self.is_HN = True
self.enable_DMT = True
self.enable_DCT = True
self.send_evictions = False
# MOESI / Mostly inclusive for shared / Exclusive for unique
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = True
self.alloc_on_readunique = False
self.alloc_on_readonce = True
self.alloc_on_writeback = True
self.dealloc_on_unique = True
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.unify_repl_TBEs = False
class CHI_DMAController(CHI_Cache_Controller):
'''
Default parameters for a DMA controller
'''
def __init__(self, ruby_system, sequencer):
super(CHI_DMAController, self).__init__(ruby_system)
self.sequencer = sequencer
class DummyCache(RubyCache):
dataAccessLatency = 0
tagAccessLatency = 1
size = "128"
assoc = 1
self.use_prefetcher = False
self.cache = DummyCache()
self.sequencer.dcache = NULL
# All allocations are false
# Deallocations are true (don't really matter)
self.allow_SD = False
self.is_HN = False
self.enable_DMT = False
self.enable_DCT = False
self.alloc_on_seq_acc = False
self.alloc_on_seq_line_write = False
self.alloc_on_readshared = False
self.alloc_on_readunique = False
self.alloc_on_readonce = False
self.alloc_on_writeback = False
self.dealloc_on_unique = False
self.dealloc_on_shared = False
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
self.send_evictions = False
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 1
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.unify_repl_TBEs = False
class CPUSequencerWrapper:
'''
Other generic configuration scripts assume a matching number of sequencers
and cpus. This wraps the instruction and data sequencer so they are
compatible with the other scripts. This assumes all scripts are using
connectCpuPorts/connectIOPorts to bind ports
'''
def __init__(self, iseq, dseq):
# use this style due to __setattr__ override below
self.__dict__['inst_seq'] = iseq
self.__dict__['data_seq'] = dseq
self.__dict__['support_data_reqs'] = True
self.__dict__['support_inst_reqs'] = True
# Compatibility with certain scripts that wire up ports
# without connectCpuPorts
self.__dict__['slave'] = dseq.in_ports
self.__dict__['in_ports'] = dseq.in_ports
def connectCpuPorts(self, cpu):
assert(isinstance(cpu, BaseCPU))
cpu.icache_port = self.inst_seq.in_ports
for p in cpu._cached_ports:
if str(p) != 'icache_port':
exec('cpu.%s = self.data_seq.in_ports' % p)
cpu.connectUncachedPorts(self.data_seq)
def connectIOPorts(self, piobus):
self.data_seq.connectIOPorts(piobus)
def __setattr__(self, name, value):
setattr(self.inst_seq, name, value)
setattr(self.data_seq, name, value)
class CHI_RNF(CHI_Node):
'''
Defines a CHI request node.
Notice all contollers and sequencers are set as children of the cpus, so
this object acts more like a proxy for seting things up and has no topology
significance unless the cpus are set as its children at the top level
'''
def __init__(self, cpus, ruby_system,
l1Icache_type, l1Dcache_type,
cache_line_size,
l1Iprefetcher_type=None, l1Dprefetcher_type=None):
super(CHI_RNF, self).__init__(ruby_system)
self._block_size_bits = int(math.log(cache_line_size, 2))
# All sequencers and controllers
self._seqs = []
self._cntrls = []
# Last level controllers in this node, i.e., the ones that will send
# requests to the home nodes
self._ll_cntrls = []
self._cpus = cpus
# First creates L1 caches and sequencers
for cpu in self._cpus:
cpu.inst_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
cpu.data_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
self._seqs.append(CPUSequencerWrapper(cpu.inst_sequencer,
cpu.data_sequencer))
# caches
l1i_cache = l1Icache_type(start_index_bit = self._block_size_bits,
is_icache = True)
l1d_cache = l1Dcache_type(start_index_bit = self._block_size_bits,
is_icache = False)
# Placeholders for future prefetcher support
if l1Iprefetcher_type != None or l1Dprefetcher_type != None:
m5.fatal('Prefetching not supported yet')
l1i_pf = NULL
l1d_pf = NULL
# cache controllers
cpu.l1i = CHI_L1Controller(ruby_system, cpu.inst_sequencer,
l1i_cache, l1i_pf)
cpu.l1d = CHI_L1Controller(ruby_system, cpu.data_sequencer,
l1d_cache, l1d_pf)
cpu.inst_sequencer.dcache = NULL
cpu.data_sequencer.dcache = cpu.l1d.cache
cpu.l1d.sc_lock_enabled = True
cpu._ll_cntrls = [cpu.l1i, cpu.l1d]
for c in cpu._ll_cntrls:
self._cntrls.append(c)
self.connectController(c)
self._ll_cntrls.append(c)
def getSequencers(self):
return self._seqs
def getAllControllers(self):
return self._cntrls
def getNetworkSideControllers(self):
return self._cntrls
def setDownstream(self, cntrls):
for c in self._ll_cntrls:
c.downstream_destinations = cntrls
def getCpus(self):
return self._cpus
# Adds a private L2 for each cpu
def addPrivL2Cache(self, cache_type, pf_type=None):
self._ll_cntrls = []
for cpu in self._cpus:
l2_cache = cache_type(start_index_bit = self._block_size_bits,
is_icache = False)
if pf_type != None:
m5.fatal('Prefetching not supported yet')
l2_pf = NULL
cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf)
self._cntrls.append(cpu.l2)
self.connectController(cpu.l2)
self._ll_cntrls.append(cpu.l2)
for c in cpu._ll_cntrls:
c.downstream_destinations = [cpu.l2]
cpu._ll_cntrls = [cpu.l2]
class CHI_HNF(CHI_Node):
'''
Encapsulates an HNF cache/directory controller.
Before the first controller is created, the class method
CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object
to set-up the interleaved address ranges used by the HNFs
'''
_addr_ranges = []
@classmethod
def createAddrRanges(cls, sys_mem_ranges, cache_line_size, num_hnfs):
# Create the HNFs interleaved addr ranges
block_size_bits = int(math.log(cache_line_size, 2))
cls._addr_ranges = []
llc_bits = int(math.log(num_hnfs, 2))
numa_bit = block_size_bits + llc_bits - 1
for i in range(num_hnfs):
ranges = []
for r in sys_mem_ranges:
addr_range = AddrRange(r.start, size = r.size(),
intlvHighBit = numa_bit,
intlvBits = llc_bits,
intlvMatch = i)
ranges.append(addr_range)
cls._addr_ranges.append((ranges, numa_bit, i))
@classmethod
def getAddrRanges(cls, hnf_idx):
assert(len(cls._addr_ranges) != 0)
return cls._addr_ranges[hnf_idx]
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
super(CHI_HNF, self).__init__(ruby_system)
addr_ranges,intlvHighBit,intlvMatch = CHI_HNF.getAddrRanges(hnf_idx)
# All ranges should have the same interleaving
assert(len(addr_ranges) >= 1)
assert(intlvMatch == hnf_idx)
ll_cache = llcache_type(start_index_bit = intlvHighBit + 1)
self._cntrl = CHI_HNFController(ruby_system, ll_cache, NULL,
addr_ranges)
if parent == None:
self.cntrl = self._cntrl
else:
parent.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_SNF_Base(CHI_Node):
'''
Creates CHI node controllers for the memory controllers
'''
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_SNF_Base, self).__init__(ruby_system)
self._cntrl = Memory_Controller(
version = Versions.getVersion(Memory_Controller),
ruby_system = ruby_system,
triggerQueue = TriggerMessageBuffer(),
responseFromMemory = MessageBuffer(),
requestToMemory = MessageBuffer(ordered = True),
reqRdy = TriggerMessageBuffer())
self.connectController(self._cntrl)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
def getMemRange(self, mem_ctrl):
# TODO need some kind of transparent API for
# MemCtrl+DRAM vs SimpleMemory
if hasattr(mem_ctrl, 'range'):
return mem_ctrl.range
else:
return mem_ctrl.dram.range
class CHI_SNF_BootMem(CHI_SNF_Base):
'''
Create the SNF for the boot memory
'''
def __init__(self, ruby_system, parent, bootmem):
super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
self._cntrl.memory_out_port = bootmem.port
self._cntrl.addr_ranges = self.getMemRange(bootmem)
class CHI_SNF_MainMem(CHI_SNF_Base):
'''
Create the SNF for a list main memory controllers
'''
def __init__(self, ruby_system, parent, mem_ctrl = None):
super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
if mem_ctrl:
self._cntrl.memory_out_port = mem_ctrl.port
self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
# else bind ports and range later
class CHI_RNI_Base(CHI_Node):
'''
Request node without cache / DMA
'''
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_RNI_Base, self).__init__(ruby_system)
self._sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system,
clk_domain = ruby_system.clk_domain)
self._cntrl = CHI_DMAController(ruby_system, self._sequencer)
if parent:
parent.cntrl = self._cntrl
else:
self.cntrl = self._cntrl
self.connectController(self._cntrl)
def getAllControllers(self):
return [self._cntrl]
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_RNI_DMA(CHI_RNI_Base):
'''
DMA controller wiredup to a given dma port
'''
def __init__(self, ruby_system, dma_port, parent):
super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
assert(dma_port != None)
self._sequencer.in_ports = dma_port
class CHI_RNI_IO(CHI_RNI_Base):
'''
DMA controller wiredup to ruby_system IO port
'''
def __init__(self, ruby_system, parent):
super(CHI_RNI_IO, self).__init__(ruby_system, parent)
ruby_system._io_port = self._sequencer
def noc_params_from_config(config, noc_params):
# mesh options
noc_params.num_rows = config['mesh']['num_rows']
noc_params.num_cols = config['mesh']['num_cols']
if 'router_latency' in config['mesh']:
noc_params.router_latency = config['mesh']['router_latency']
if 'link_latency' in config['mesh']:
noc_params.router_link_latency = config['mesh']['link_latency']
noc_params.node_link_latency = config['mesh']['link_latency']
if 'router_link_latency' in config['mesh']:
noc_params.router_link_latency = config['mesh']['router_link_latency']
if 'node_link_latency' in config['mesh']:
noc_params.node_link_latency = config['mesh']['node_link_latency']
if 'cross_links' in config['mesh']:
noc_params.cross_link_latency = \
config['mesh']['cross_link_latency']
noc_params.cross_links = []
for x, y in config['mesh']['cross_links']:
noc_params.cross_links.append((x, y))
noc_params.cross_links.append((y, x))
else:
noc_params.cross_links = []
noc_params.cross_link_latency = 0
# CHI_RNF options
noc_params.CHI_RNF = config['CHI_RNF']
# CHI_RNI_IO
noc_params.CHI_RNI_IO = config['CHI_RNI_IO']
# CHI_HNF options
noc_params.CHI_HNF = config['CHI_HNF']
if 'pairing' in config['CHI_HNF']:
noc_params.pairing = config['CHI_HNF']['pairing']
# CHI_SNF_MainMem
noc_params.CHI_SNF_MainMem = config['CHI_SNF_MainMem']
# CHI_SNF_IO (applies to CHI_SNF_Bootmem)
noc_params.CHI_SNF_IO = config['CHI_SNF_IO']
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system):
if buildEnv['PROTOCOL'] != 'CHI':
m5.panic("This script requires the CHI build")
if options.num_dirs < 1:
m5.fatal('--num-dirs must be at least 1')
if options.num_l3caches < 1:
m5.fatal('--num-l3caches must be at least 1')
# Default parameters for the network
class NoC_Params(object):
def __init__(self):
self.topology = options.topology
self.network = options.network
self.router_link_latency = 1
self.node_link_latency = 1
self.router_latency = 1
self.router_buffer_size = 4
self.cntrl_msg_size = 8
self.data_width = 32
params = NoC_Params()
# read additional configurations from yaml file if provided
if options.noc_config:
with open(options.noc_config, 'r') as file:
noc_params_from_config(yaml.load(file), params)
elif params.topology == 'CustomMesh':
m5.fatal('--noc-config must be provided if topology is CustomMesh')
# Declare caches and controller types used by the protocol
# Notice tag and data accesses are not concurrent, so the a cache hit
# latency = tag + data + response latencies.
# Default response latencies are 1 cy for all controllers.
# For L1 controllers the mandatoryQueue enqueue latency is always 1 cy and
# this is deducted from the initial tag read latency for sequencer requests
# dataAccessLatency may be set to 0 if one wants to consider parallel
# data and tag lookups
class L1ICache(RubyCache):
dataAccessLatency = 1
tagAccessLatency = 1
size = options.l1i_size
assoc = options.l1i_assoc
class L1DCache(RubyCache):
dataAccessLatency = 2
tagAccessLatency = 1
size = options.l1d_size
assoc = options.l1d_assoc
class L2Cache(RubyCache):
dataAccessLatency = 6
tagAccessLatency = 2
size = options.l2_size
assoc = options.l2_assoc
class HNFCache(RubyCache):
dataAccessLatency = 10
tagAccessLatency = 2
size = options.l3_size
assoc = options.l3_assoc
# other functions use system.cache_line_size assuming it has been set
assert(system.cache_line_size.value == options.cacheline_size)
cpu_sequencers = []
mem_cntrls = []
mem_dests = []
network_nodes = []
network_cntrls = []
hnf_dests = []
all_cntrls = []
# Creates on RNF per cpu with priv l2 caches
assert(len(system.cpu) == options.num_cpus)
ruby_system.rnf = [ CHI_RNF([cpu], ruby_system, L1ICache, L1DCache,
system.cache_line_size.value)
for cpu in system.cpu ]
for rnf in ruby_system.rnf:
rnf.addPrivL2Cache(L2Cache)
cpu_sequencers.extend(rnf.getSequencers())
all_cntrls.extend(rnf.getAllControllers())
network_nodes.append(rnf)
network_cntrls.extend(rnf.getNetworkSideControllers())
# Look for other memories
other_memories = []
if bootmem:
other_memories.append(bootmem)
if getattr(system, 'sram', None):
other_memories.append(getattr(system, 'sram', None))
on_chip_mem_ports = getattr(system, '_on_chip_mem_ports', None)
if on_chip_mem_ports:
other_memories.extend([p.simobj for p in on_chip_mem_ports])
# Create the LLCs cntrls
sysranges = [] + system.mem_ranges
for m in other_memories:
sysranges.append(m.range)
CHI_HNF.createAddrRanges(sysranges, system.cache_line_size.value,
options.num_l3caches)
ruby_system.hnf = [ CHI_HNF(i, ruby_system, HNFCache, None)
for i in range(options.num_l3caches) ]
for hnf in ruby_system.hnf:
network_nodes.append(hnf)
network_cntrls.extend(hnf.getNetworkSideControllers())
assert(hnf.getAllControllers() == hnf.getNetworkSideControllers())
all_cntrls.extend(hnf.getAllControllers())
hnf_dests.extend(hnf.getAllControllers())
# Create the memory controllers
# Notice we don't define a Directory_Controller type so we don't use
# create_directories shared by other protocols.
ruby_system.snf = [ CHI_SNF_MainMem(ruby_system, None, None)
for i in range(options.num_dirs) ]
for snf in ruby_system.snf:
network_nodes.append(snf)
network_cntrls.extend(snf.getNetworkSideControllers())
assert(snf.getAllControllers() == snf.getNetworkSideControllers())
mem_cntrls.extend(snf.getAllControllers())
all_cntrls.extend(snf.getAllControllers())
mem_dests.extend(snf.getAllControllers())
if len(other_memories) > 0:
ruby_system.rom_snf = [ CHI_SNF_BootMem(ruby_system, None, m)
for m in other_memories ]
for snf in ruby_system.rom_snf:
network_nodes.append(snf)
network_cntrls.extend(snf.getNetworkSideControllers())
all_cntrls.extend(snf.getAllControllers())
mem_dests.extend(snf.getAllControllers())
# Creates the controller for dma ports and io
if len(dma_ports) > 0:
ruby_system.dma_rni = [ CHI_RNI_DMA(ruby_system, dma_port, None)
for dma_port in dma_ports ]
for rni in ruby_system.dma_rni:
network_nodes.append(rni)
network_cntrls.extend(rni.getNetworkSideControllers())
all_cntrls.extend(rni.getAllControllers())
if full_system:
ruby_system.io_rni = CHI_RNI_IO(ruby_system, None)
network_nodes.append(ruby_system.io_rni)
network_cntrls.extend(ruby_system.io_rni.getNetworkSideControllers())
all_cntrls.extend(ruby_system.io_rni.getAllControllers())
# Assign downstream destinations
for rnf in ruby_system.rnf:
rnf.setDownstream(hnf_dests)
if len(dma_ports) > 0:
for rni in ruby_system.dma_rni:
rni.setDownstream(hnf_dests)
if full_system:
ruby_system.io_rni.setDownstream(hnf_dests)
for hnf in ruby_system.hnf:
hnf.setDownstream(mem_dests)
# Setup data message size for all controllers
for cntrl in all_cntrls:
cntrl.data_channel_size = params.data_width
# Network configurations
# virtual networks: 0=request, 1=snoop, 2=response, 3=data
ruby_system.network.number_of_virtual_networks = 4
ruby_system.network.control_msg_size = params.cntrl_msg_size
ruby_system.network.data_msg_size = params.data_width
ruby_system.network.buffer_size = params.router_buffer_size
if params.topology == 'CustomMesh':
topology = create_topology(network_nodes, params)
elif params.topology in ['Crossbar', 'Pt2Pt']:
topology = create_topology(network_cntrls, params)
else:
m5.fatal("%s not supported!" % params.topology)
# Incorporate the params into options so it's propagated to
# makeTopology by the parent script
for k in dir(params):
if not k.startswith('__'):
setattr(options, k, getattr(params, k))
return (cpu_sequencers, mem_cntrls, topology)

View File

@@ -78,10 +78,10 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dma_cntrl_nodes = []
assert (options.num_cpus % options.num_clusters == 0)
num_cpus_per_cluster = options.num_cpus / options.num_clusters
num_cpus_per_cluster = options.num_cpus // options.num_clusters
assert (options.num_l2caches % options.num_clusters == 0)
num_l2caches_per_cluster = options.num_l2caches / options.num_clusters
num_l2caches_per_cluster = options.num_l2caches // options.num_clusters
l2_bits = int(math.log(num_l2caches_per_cluster, 2))
block_size_bits = int(math.log(options.cacheline_size, 2))
@@ -141,7 +141,6 @@ def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system = ruby_system)
cpu_seq = RubyHTMSequencer(version = i * num_cpus_per_cluster + j,
icache = l0i_cache,
clk_domain = clk_domain,
dcache = l0d_cache,
ruby_system = ruby_system)

View File

@@ -0,0 +1,444 @@
# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import math
from m5.util import fatal
from m5.params import *
from m5.objects import *
from m5.defines import buildEnv
if buildEnv['PROTOCOL'] == 'CHI':
import ruby.CHI as CHI
from topologies.BaseTopology import SimpleTopology
class CustomMesh(SimpleTopology):
description = 'CustomMesh'
def __init__(self, controllers):
self.nodes = controllers
#--------------------------------------------------------------------------
# _makeMesh
#--------------------------------------------------------------------------
def _makeMesh(self, IntLink, link_latency, num_rows, num_columns,
cross_links, cross_link_latency):
# East->West, West->East, North->South, South->North
# XY routing weights
link_weights = [1, 1, 2, 2]
# East output to West input links
for row in range(num_rows):
for col in range(num_columns):
if (col + 1 < num_columns):
east_out = col + (row * num_columns)
west_in = (col + 1) + (row * num_columns)
llat = cross_link_latency \
if (east_out, west_in) in cross_links \
else link_latency
self._int_links.append(\
IntLink(link_id=self._link_count,
src_node=self._routers[east_out],
dst_node=self._routers[west_in],
dst_inport="West",
latency = llat,
weight=link_weights[0]))
self._link_count += 1
# West output to East input links
for row in range(num_rows):
for col in range(num_columns):
if (col + 1 < num_columns):
east_in = col + (row * num_columns)
west_out = (col + 1) + (row * num_columns)
llat = cross_link_latency \
if (west_out, east_in) in cross_links \
else link_latency
self._int_links.append(\
IntLink(link_id=self._link_count,
src_node=self._routers[west_out],
dst_node=self._routers[east_in],
dst_inport="East",
latency = llat,
weight=link_weights[1]))
self._link_count += 1
# North output to South input links
for col in range(num_columns):
for row in range(num_rows):
if (row + 1 < num_rows):
north_out = col + (row * num_columns)
south_in = col + ((row + 1) * num_columns)
llat = cross_link_latency \
if (north_out, south_in) in cross_links \
else link_latency
self._int_links.append(\
IntLink(link_id=self._link_count,
src_node=self._routers[north_out],
dst_node=self._routers[south_in],
dst_inport="South",
latency = llat,
weight=link_weights[2]))
self._link_count += 1
# South output to North input links
for col in range(num_columns):
for row in range(num_rows):
if (row + 1 < num_rows):
north_in = col + (row * num_columns)
south_out = col + ((row + 1) * num_columns)
llat = cross_link_latency \
if (south_out, north_in) in cross_links \
else link_latency
self._int_links.append(\
IntLink(link_id=self._link_count,
src_node=self._routers[south_out],
dst_node=self._routers[north_in],
dst_inport="North",
latency = llat,
weight=link_weights[3]))
self._link_count += 1
#--------------------------------------------------------------------------
# distributeNodes
#--------------------------------------------------------------------------
def _createRNFRouter(self, mesh_router):
# Create a zero-latency router bridging node controllers
# and the mesh router
node_router = self._Router(router_id = len(self._routers),
latency = 0)
self._routers.append(node_router)
# connect node_router <-> mesh router
self._int_links.append(self._IntLink( \
link_id = self._link_count,
src_node = node_router,
dst_node = mesh_router,
latency = self._router_link_latency))
self._link_count += 1
self._int_links.append(self._IntLink( \
link_id = self._link_count,
src_node = mesh_router,
dst_node = node_router,
latency = self._router_link_latency))
self._link_count += 1
return node_router
def distributeNodes(self, num_nodes_per_router, router_idx_list,
node_list):
if num_nodes_per_router:
# evenly distribute nodes to all listed routers
assert(len(router_idx_list)*num_nodes_per_router == len(node_list))
for idx, node in enumerate(node_list):
mesh_router_idx = router_idx_list[idx // num_nodes_per_router]
router = self._routers[mesh_router_idx]
# Create another router bridging RNF node controllers
# and the mesh router
# for non-RNF nodes, node router is mesh router
if isinstance(node, CHI.CHI_RNF):
router = self._createRNFRouter(router)
# connect all ctrls in the node to node_router
ctrls = node.getNetworkSideControllers()
for c in ctrls:
self._ext_links.append(self._ExtLink(
link_id = self._link_count,
ext_node = c,
int_node = router,
latency = self._node_link_latency))
self._link_count += 1
else:
# try to circulate all nodes to all routers, some routers may be
# connected to zero or more than one node.
idx = 0
for node in node_list:
ridx = router_idx_list[idx]
router = self._routers[ridx]
if isinstance(node, CHI.CHI_RNF):
router = self._createRNFRouter(router)
ctrls = node.getNetworkSideControllers()
for c in ctrls:
self._ext_links.append(self._ExtLink( \
link_id = self._link_count,
ext_node = c,
int_node = router,
latency = self._node_link_latency))
self._link_count += 1
idx = (idx + 1) % len(router_idx_list)
#--------------------------------------------------------------------------
# makeTopology
#--------------------------------------------------------------------------
def makeTopology(self, options, network, IntLink, ExtLink, Router):
assert(buildEnv['PROTOCOL'] == 'CHI')
num_rows = options.num_rows
num_cols = options.num_cols
num_mesh_routers = num_rows * num_cols
self._IntLink = IntLink
self._ExtLink = ExtLink
self._Router = Router
if hasattr(options, 'router_link_latency'):
self._router_link_latency = options.router_link_latency
self._node_link_latency = options.node_link_latency
else:
print("WARNING: router/node link latencies not provided")
self._router_link_latency = options.link_latency
self._node_link_latency = options.link_latency
# classify nodes into different types
rnf_list = []
hnf_list = []
mem_ctrls = []
io_mem_ctrls = []
io_rni_ctrls = []
for n in self.nodes:
if isinstance(n, CHI.CHI_RNF):
rnf_list.append(n)
elif isinstance(n, CHI.CHI_HNF):
hnf_list.append(n)
elif isinstance(n, CHI.CHI_SNF_MainMem):
mem_ctrls.append(n)
elif isinstance(n, CHI.CHI_SNF_BootMem):
io_mem_ctrls.append(n)
elif isinstance(n, CHI.CHI_RNI_DMA):
io_rni_ctrls.append(n)
elif isinstance(n, CHI.CHI_RNI_IO):
io_rni_ctrls.append(n)
else:
fatal('topologies.CustomMesh: {} not supported'
.format(n.__class__.__name__))
# Create all mesh routers
self._routers = [Router(router_id=i, latency = options.router_latency)\
for i in range(num_mesh_routers)]
self._link_count = 0
self._int_links = []
self._ext_links = []
# Create all the mesh internal links.
self._makeMesh(IntLink, self._router_link_latency, num_rows, num_cols,
options.cross_links, options.cross_link_latency)
# Place CHI_RNF on the mesh
num_nodes_per_router = options.CHI_RNF['num_nodes_per_router'] \
if 'num_nodes_per_router' in options.CHI_RNF else None
self.distributeNodes(num_nodes_per_router,
options.CHI_RNF['router_list'],
rnf_list)
# Place CHI_HNF on the mesh
num_nodes_per_router = options.CHI_HNF['num_nodes_per_router'] \
if 'num_nodes_per_router' in options.CHI_HNF else None
self.distributeNodes(num_nodes_per_router,
options.CHI_HNF['router_list'],
hnf_list)
# Place CHI_SNF_MainMem on the mesh
num_nodes_per_router = options.CHI_SNF_MainMem['num_nodes_per_router']\
if 'num_nodes_per_router' in options.CHI_SNF_MainMem else None
self.distributeNodes(num_nodes_per_router,
options.CHI_SNF_MainMem['router_list'],
mem_ctrls)
# Place all IO mem nodes on the mesh
num_nodes_per_router = options.CHI_SNF_IO['num_nodes_per_router'] \
if 'num_nodes_per_router' in options.CHI_SNF_IO else None
self.distributeNodes(num_nodes_per_router,
options.CHI_SNF_IO['router_list'],
io_mem_ctrls)
# Place all IO request nodes on the mesh
num_nodes_per_router = options.CHI_RNI_IO['num_nodes_per_router'] \
if 'num_nodes_per_router' in options.CHI_RNI_IO else None
self.distributeNodes(num_nodes_per_router,
options.CHI_RNI_IO['router_list'],
io_rni_ctrls)
# Set up
network.int_links = self._int_links
network.ext_links = self._ext_links
network.routers = self._routers
pairing = getattr(options, 'pairing', None)
if pairing != None:
self._autoPairHNFandSNF(hnf_list, mem_ctrls, pairing)
#--------------------------------------------------------------------------
# _autoPair
#--------------------------------------------------------------------------
def _autoPairHNFandSNF(self, cache_ctrls, mem_ctrls, pairing):
# Use the pairing defined by the configuration to reassign the
# memory ranges
pair_debug = False
print("Pairing HNFs to SNFs")
print(pairing)
all_cache = []
for c in cache_ctrls: all_cache.extend(c.getNetworkSideControllers())
all_mem = []
for c in mem_ctrls: all_mem.extend(c.getNetworkSideControllers())
# checks and maps index from pairing map to component
assert(len(pairing) == len(all_cache))
def _tolist(val): return val if isinstance(val, list) else [val]
for m in all_mem: m._pairing = []
pairing_check = max(1, len(all_mem) / len(all_cache))
for cidx,c in enumerate(all_cache):
c._pairing = []
for midx in _tolist(pairing[cidx]):
c._pairing.append(all_mem[midx])
if c not in all_mem[midx]._pairing:
all_mem[midx]._pairing.append(c)
assert(len(c._pairing) == pairing_check)
if pair_debug:
print(c.path())
for r in c.addr_ranges:
print("%s" % r)
for p in c._pairing:
print("\t"+p.path())
for r in p.addr_ranges:
print("\t%s" % r)
# all must be paired
for c in all_cache: assert(len(c._pairing) > 0)
for m in all_mem: assert(len(m._pairing) > 0)
# only support a single range for the main memory controllers
tgt_range_start = all_mem[0].addr_ranges[0].start.value
for mem in all_mem:
for r in mem.addr_ranges:
if r.start.value != tgt_range_start:
fatal('topologies.CustomMesh: not supporting pairing of '\
'main memory with multiple ranges')
# reassign ranges for a 1 -> N paring
def _rerange(src_cntrls, tgt_cntrls, fix_tgt_peer):
assert(len(tgt_cntrls) >= len(src_cntrls))
def _rangeToBit(addr_ranges):
bit = None
for r in addr_ranges:
if bit == None:
bit = r.intlvMatch
else:
assert(bit == r.intlvMatch)
return bit
def _getPeer(cntrl):
return cntrl.memory_out_port.peer.simobj
sorted_src = list(src_cntrls)
sorted_src.sort(key = lambda x: _rangeToBit(x.addr_ranges))
# paired controllers need to have seq. interleaving match values
intlvMatch = 0
for src in sorted_src:
for tgt in src._pairing:
for r in tgt.addr_ranges:
r.intlvMatch = intlvMatch
if fix_tgt_peer:
_getPeer(tgt).range.intlvMatch = intlvMatch
intlvMatch = intlvMatch + 1
# recreate masks
for src in sorted_src:
for src_range in src.addr_ranges:
if src_range.start.value != tgt_range_start:
continue
new_src_mask = []
for m in src_range.masks:
# TODO should mask all the way to the max range size
new_src_mask.append(m | (m*2) | (m*4) |
(m*8) | (m*16))
for tgt in src._pairing:
paired = False
for tgt_range in tgt.addr_ranges:
if tgt_range.start.value == \
src_range.start.value:
src_range.masks = new_src_mask
new_tgt_mask = []
lsbs = len(tgt_range.masks) - \
len(new_src_mask)
for i in range(lsbs):
new_tgt_mask.append(tgt_range.masks[i])
for m in new_src_mask:
new_tgt_mask.append(m)
tgt_range.masks = new_tgt_mask
if fix_tgt_peer:
_getPeer(tgt).range.masks = new_tgt_mask
paired = True
if not paired:
fatal('topologies.CustomMesh: could not ' \
'reassign ranges {} {}'.format(
src.path(), tgt.path()))
if len(all_mem) >= len(all_cache):
_rerange(all_cache, all_mem, True)
else:
_rerange(all_mem, all_cache, False)
if pair_debug:
print("")
for cidx,c in enumerate(all_cache):
assert(len(c._pairing) == pairing_check)
print(c.path())
for r in c.addr_ranges:
print("%s" % r)
for p in c._pairing:
print("\t"+p.path())
for r in p.addr_ranges:
print("\t%s" % r)

View File

@@ -117,7 +117,7 @@
using std::fputs;
using std::getc;
using std::getchar;
using std::gets;
//using std::gets;
using std::putc;
using std::putchar;
using std::puts;

View File

@@ -917,7 +917,7 @@ PySource('m5', 'python/m5/info.py')
# Create all of the SimObject param headers and enum headers
#
def createSimObjectParamStruct(target, source, env):
def createSimObjectParamDecl(target, source, env):
assert len(target) == 1 and len(source) == 1
name = source[0].get_text_contents()
@@ -927,6 +927,16 @@ def createSimObjectParamStruct(target, source, env):
obj.cxx_param_decl(code)
code.write(target[0].abspath)
def createSimObjectParamDef(target, source, env):
assert len(target) == 1 and len(source) == 1
name = source[0].get_text_contents()
obj = sim_objects[name]
code = code_formatter()
obj.cxx_param_def(code)
code.write(target[0].abspath)
def createSimObjectCxxConfig(is_header):
def body(target, source, env):
assert len(target) == 1 and len(source) == 1
@@ -987,9 +997,16 @@ for name,simobj in sorted(sim_objects.items()):
hh_file = File('params/%s.hh' % name)
params_hh_files.append(hh_file)
env.Command(hh_file, Value(name),
MakeAction(createSimObjectParamStruct, Transform("SO PARAM")))
MakeAction(createSimObjectParamDecl, Transform("SOPARMHH")))
env.Depends(hh_file, depends + extra_deps)
if not getattr(simobj, 'abstract', False) and hasattr(simobj, 'type'):
cc_file = File('params/%s.cc' % name)
env.Command(cc_file, Value(name),
MakeAction(createSimObjectParamDef, Transform("SOPARMCC")))
env.Depends(cc_file, depends + extra_deps)
Source(cc_file)
# C++ parameter description files
if GetOption('with_cxx_config'):
for name,simobj in sorted(sim_objects.items()):

View File

@@ -103,7 +103,7 @@ class BaseMMU : public SimObject
return getTlb(mode)->finalizePhysical(req, tc, mode);
}
void takeOverFrom(BaseMMU *old_mmu);
virtual void takeOverFrom(BaseMMU *old_mmu);
public:
BaseTLB* dtb;

View File

@@ -35,6 +35,8 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
from m5.objects.BaseMMU import BaseMMU
from m5.objects.RiscvTLB import RiscvTLB
from m5.objects.PMAChecker import PMAChecker
@@ -45,7 +47,7 @@ class RiscvMMU(BaseMMU):
cxx_header = 'arch/riscv/mmu.hh'
itb = RiscvTLB()
dtb = RiscvTLB()
pma_checker = PMAChecker()
pma_checker = Param.PMAChecker(PMAChecker(), "PMA Checker")
@classmethod
def walkerPorts(cls):

View File

@@ -43,7 +43,7 @@ class RiscvPagetableWalker(ClockedObject):
num_squash_per_cycle = Param.Unsigned(4,
"Number of outstanding walks that can be squashed per cycle")
# Grab the pma_checker from the MMU
pma_checker = Param.PMAChecker(Parent.any, "PMA Chekcer")
pma_checker = Param.PMAChecker(Parent.any, "PMA Checker")
class RiscvTLB(BaseTLB):
type = 'RiscvTLB'
@@ -53,4 +53,4 @@ class RiscvTLB(BaseTLB):
walker = Param.RiscvPagetableWalker(\
RiscvPagetableWalker(), "page table walker")
# Grab the pma_checker from the MMU
pma_checker = Param.PMAChecker(Parent.any, "PMA Chekcer")
pma_checker = Param.PMAChecker(Parent.any, "PMA Checker")

View File

@@ -40,6 +40,7 @@
#include "arch/generic/mmu.hh"
#include "arch/riscv/isa.hh"
#include "arch/riscv/pma_checker.hh"
#include "arch/riscv/tlb.hh"
#include "params/RiscvMMU.hh"
@@ -49,8 +50,10 @@ namespace RiscvISA {
class MMU : public BaseMMU
{
public:
PMAChecker *pma;
MMU(const RiscvMMUParams &p)
: BaseMMU(p)
: BaseMMU(p), pma(p.pma_checker)
{}
PrivilegeMode
@@ -64,6 +67,14 @@ class MMU : public BaseMMU
{
return static_cast<TLB*>(dtb)->getWalker();
}
void
takeOverFrom(BaseMMU *old_mmu) override
{
MMU *ommu = dynamic_cast<MMU*>(old_mmu);
BaseMMU::takeOverFrom(ommu);
pma->takeOverFrom(ommu->pma);
}
};
} // namespace RiscvISA

View File

@@ -81,3 +81,9 @@ PMAChecker::isUncacheable(PacketPtr pkt)
{
return isUncacheable(pkt->getAddrRange());
}
void
PMAChecker::takeOverFrom(PMAChecker *old)
{
uncacheable = old->uncacheable;
}

View File

@@ -74,6 +74,8 @@ class PMAChecker : public SimObject
bool isUncacheable(const AddrRange &range);
bool isUncacheable(const Addr &addr, const unsigned size);
bool isUncacheable(PacketPtr pkt);
void takeOverFrom(PMAChecker *old);
};
#endif // __ARCH_RISCV_PMA_CHECKER_HH__

View File

@@ -211,7 +211,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
// U mode CSR
r.ustatus = context->readMiscRegNoEffect(
CSRData.at(CSR_USTATUS).physIndex) & CSRMasks.at(CSR_USTATUS);
r.uie = context->readMiscRegNoEffect(
r.uie = context->readMiscReg(
CSRData.at(CSR_UIE).physIndex) & CSRMasks.at(CSR_UIE);
r.utvec = context->readMiscRegNoEffect(
CSRData.at(CSR_UTVEC).physIndex);
@@ -223,7 +223,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
CSRData.at(CSR_UCAUSE).physIndex);
r.utval = context->readMiscRegNoEffect(
CSRData.at(CSR_UTVAL).physIndex);
r.uip = context->readMiscRegNoEffect(
r.uip = context->readMiscReg(
CSRData.at(CSR_UIP).physIndex) & CSRMasks.at(CSR_UIP);
// S mode CSR
@@ -233,7 +233,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
CSRData.at(CSR_SEDELEG).physIndex);
r.sideleg = context->readMiscRegNoEffect(
CSRData.at(CSR_SIDELEG).physIndex);
r.sie = context->readMiscRegNoEffect(
r.sie = context->readMiscReg(
CSRData.at(CSR_SIE).physIndex) & CSRMasks.at(CSR_SIE);
r.stvec = context->readMiscRegNoEffect(
CSRData.at(CSR_STVEC).physIndex);
@@ -247,7 +247,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
CSRData.at(CSR_SCAUSE).physIndex);
r.stval = context->readMiscRegNoEffect(
CSRData.at(CSR_STVAL).physIndex);
r.sip = context->readMiscRegNoEffect(
r.sip = context->readMiscReg(
CSRData.at(CSR_SIP).physIndex) & CSRMasks.at(CSR_SIP);
r.satp = context->readMiscRegNoEffect(
CSRData.at(CSR_SATP).physIndex);
@@ -269,7 +269,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
CSRData.at(CSR_MEDELEG).physIndex);
r.mideleg = context->readMiscRegNoEffect(
CSRData.at(CSR_MIDELEG).physIndex);
r.mie = context->readMiscRegNoEffect(
r.mie = context->readMiscReg(
CSRData.at(CSR_MIE).physIndex) & CSRMasks.at(CSR_MIE);
r.mtvec = context->readMiscRegNoEffect(
CSRData.at(CSR_MTVEC).physIndex);
@@ -283,7 +283,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
CSRData.at(CSR_MCAUSE).physIndex);
r.mtval = context->readMiscRegNoEffect(
CSRData.at(CSR_MTVAL).physIndex);
r.mip = context->readMiscRegNoEffect(
r.mip = context->readMiscReg(
CSRData.at(CSR_MIP).physIndex) & CSRMasks.at(CSR_MIP);
// H mode CSR (to be implemented)
@@ -340,11 +340,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
newVal = (oldVal & ~mask) | (r.ustatus & mask);
context->setMiscRegNoEffect(
CSRData.at(CSR_USTATUS).physIndex, newVal);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_UIE).physIndex);
mask = CSRMasks.at(CSR_UIE);
newVal = (oldVal & ~mask) | (r.uie & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_UIE).physIndex, newVal);
context->setMiscRegNoEffect(
CSRData.at(CSR_UTVEC).physIndex, r.utvec);
@@ -356,11 +356,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
CSRData.at(CSR_UCAUSE).physIndex, r.ucause);
context->setMiscRegNoEffect(
CSRData.at(CSR_UTVAL).physIndex, r.utval);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_UIP).physIndex);
mask = CSRMasks.at(CSR_UIP);
newVal = (oldVal & ~mask) | (r.uip & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_UIP).physIndex, newVal);
// S mode CSR
@@ -374,11 +374,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg);
context->setMiscRegNoEffect(
CSRData.at(CSR_SIDELEG).physIndex, r.sideleg);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_SIE).physIndex);
mask = CSRMasks.at(CSR_SIE);
newVal = (oldVal & ~mask) | (r.sie & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_SIE).physIndex, newVal);
context->setMiscRegNoEffect(
CSRData.at(CSR_STVEC).physIndex, r.stvec);
@@ -392,11 +392,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
CSRData.at(CSR_SCAUSE).physIndex, r.scause);
context->setMiscRegNoEffect(
CSRData.at(CSR_STVAL).physIndex, r.stval);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_SIP).physIndex);
mask = CSRMasks.at(CSR_SIP);
newVal = (oldVal & ~mask) | (r.sip & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_SIP).physIndex, newVal);
context->setMiscRegNoEffect(
CSRData.at(CSR_SATP).physIndex, r.satp);
@@ -426,11 +426,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
CSRData.at(CSR_MEDELEG).physIndex, r.medeleg);
context->setMiscRegNoEffect(
CSRData.at(CSR_MIDELEG).physIndex, r.mideleg);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_MIE).physIndex);
mask = CSRMasks.at(CSR_MIE);
newVal = (oldVal & ~mask) | (r.mie & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_MIE).physIndex, newVal);
context->setMiscRegNoEffect(
CSRData.at(CSR_MTVEC).physIndex, r.mtvec);
@@ -444,11 +444,11 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
CSRData.at(CSR_MCAUSE).physIndex, r.mcause);
context->setMiscRegNoEffect(
CSRData.at(CSR_MTVAL).physIndex, r.mtval);
oldVal = context->readMiscRegNoEffect(
oldVal = context->readMiscReg(
CSRData.at(CSR_MIP).physIndex);
mask = CSRMasks.at(CSR_MIP);
newVal = (oldVal & ~mask) | (r.mip & mask);
context->setMiscRegNoEffect(
context->setMiscReg(
CSRData.at(CSR_MIP).physIndex, newVal);
// H mode CSR (to be implemented)

View File

@@ -519,3 +519,9 @@ TLB::TlbStats::TlbStats(Stats::Group *parent)
readAccesses + writeAccesses)
{
}
Port *
TLB::getTableWalkerPort()
{
return &walker->getPort("port");
}

View File

@@ -92,7 +92,7 @@ class TLB : public BaseTLB
Walker *getWalker();
void takeOverFrom(BaseTLB *otlb) override {}
void takeOverFrom(BaseTLB *old) override {}
TlbEntry *insert(Addr vpn, const TlbEntry &entry);
void flushAll() override;
@@ -108,6 +108,18 @@ class TLB : public BaseTLB
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/**
* Get the table walker port. This is used for
* migrating port connections during a CPU takeOverFrom()
* call. For architectures that do not have a table walker,
* NULL is returned, hence the use of a pointer rather than a
* reference. For RISC-V this method will always return a valid
* port pointer.
*
* @return A pointer to the walker port
*/
Port *getTableWalkerPort() override;
Addr translateWithTLB(Addr vaddr, uint16_t asid, Mode mode);
Fault translateAtomic(const RequestPtr &req,

View File

@@ -47,10 +47,9 @@
#define UNIT_RATE(T1, T2) Stats::Units::Rate<T1, T2>::get()
#define UNIT_RATIO Stats::Units::Ratio::get()
#define UNIT_COUNT Stats::Units::Count::get()
#define UNIT_WATT Stats::Units::Watt::get()
#define UNIT_UNSPECIFIED Stats::Units::Unspecified::get()
#define UNIT_WATT UNIT_RATE(Stats::Units::Joule, Stats::Units::Second)
namespace Stats {
/**

View File

@@ -43,12 +43,6 @@ CpuThread::CpuThread(const Params &p)
assert(numLanes == 1);
}
CpuThread*
CpuThreadParams::create() const
{
return new CpuThread(*this);
}
void
CpuThread::issueLoadOps()
{

View File

@@ -48,12 +48,6 @@ DmaThread::~DmaThread()
}
DmaThread*
DmaThreadParams::create() const
{
return new DmaThread(*this);
}
void
DmaThread::issueLoadOps()
{

View File

@@ -48,12 +48,6 @@ GpuWavefront::~GpuWavefront()
}
GpuWavefront*
GpuWavefrontParams::create() const
{
return new GpuWavefront(*this);
}
void
GpuWavefront::issueLoadOps()
{

View File

@@ -357,9 +357,3 @@ ProtocolTester::SeqPort::recvTimingResp(PacketPtr pkt)
return true;
}
ProtocolTester*
ProtocolTesterParams::create() const
{
return new ProtocolTester(*this);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2018-2019 ARM Limited
* Copyright (c) 2014, 2018-2019, 2021 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -59,12 +59,12 @@
*/
SMMUv3BaseCache::SMMUv3BaseCache(const std::string &policy_name, uint32_t seed,
Stats::Group *parent) :
replacementPolicy(decodePolicyName(policy_name)),
Stats::Group *parent, const std::string &name)
: replacementPolicy(decodePolicyName(policy_name)),
nextToReplace(0),
random(seed),
useStamp(0),
baseCacheStats(parent)
baseCacheStats(parent, name)
{}
int
@@ -82,8 +82,9 @@ SMMUv3BaseCache::decodePolicyName(const std::string &policy_name)
}
SMMUv3BaseCache::
SMMUv3BaseCacheStats::SMMUv3BaseCacheStats(Stats::Group *parent)
: Stats::Group(parent),
SMMUv3BaseCacheStats::SMMUv3BaseCacheStats(
Stats::Group *parent, const std::string &name)
: Stats::Group(parent, name.c_str()),
ADD_STAT(averageLookups,
UNIT_RATE(Stats::Units::Count, Stats::Units::Second),
"Average number lookups per second"),
@@ -144,9 +145,10 @@ SMMUv3BaseCacheStats::SMMUv3BaseCacheStats(Stats::Group *parent)
*/
SMMUTLB::SMMUTLB(unsigned numEntries, unsigned _associativity,
const std::string &policy, Stats::Group *parent)
const std::string &policy, Stats::Group *parent,
const std::string &name)
:
SMMUv3BaseCache(policy, SMMUTLB_SEED, parent),
SMMUv3BaseCache(policy, SMMUTLB_SEED, parent, name),
associativity(_associativity)
{
if (associativity == 0)
@@ -426,7 +428,7 @@ SMMUTLB::pickEntryIdxToReplace(const Set &set, AllocPolicy alloc)
ARMArchTLB::ARMArchTLB(unsigned numEntries, unsigned _associativity,
const std::string &policy, Stats::Group *parent)
:
SMMUv3BaseCache(policy, ARMARCHTLB_SEED, parent),
SMMUv3BaseCache(policy, ARMARCHTLB_SEED, parent, "tlb"),
associativity(_associativity)
{
if (associativity == 0)
@@ -625,7 +627,7 @@ ARMArchTLB::pickEntryIdxToReplace(const Set &set)
IPACache::IPACache(unsigned numEntries, unsigned _associativity,
const std::string &policy, Stats::Group *parent)
:
SMMUv3BaseCache(policy, IPACACHE_SEED, parent),
SMMUv3BaseCache(policy, IPACACHE_SEED, parent, "ipa"),
associativity(_associativity)
{
if (associativity == 0)
@@ -805,7 +807,7 @@ IPACache::pickEntryIdxToReplace(const Set &set)
ConfigCache::ConfigCache(unsigned numEntries, unsigned _associativity,
const std::string &policy, Stats::Group *parent)
:
SMMUv3BaseCache(policy, CONFIGCACHE_SEED, parent),
SMMUv3BaseCache(policy, CONFIGCACHE_SEED, parent, "cfg"),
associativity(_associativity)
{
if (associativity == 0)
@@ -969,7 +971,7 @@ ConfigCache::pickEntryIdxToReplace(const Set &set)
WalkCache::WalkCache(const std::array<unsigned, 2*WALK_CACHE_LEVELS> &_sizes,
unsigned _associativity, const std::string &policy,
Stats::Group *parent) :
SMMUv3BaseCache(policy, WALKCACHE_SEED, parent),
SMMUv3BaseCache(policy, WALKCACHE_SEED, parent, "walk"),
walkCacheStats(&(SMMUv3BaseCache::baseCacheStats)),
associativity(_associativity),
sizes()
@@ -1041,10 +1043,8 @@ WalkCache::lookup(Addr va, Addr vaMask,
if (result == NULL)
baseCacheStats.totalMisses++;
walkCacheStats.lookupsByStageLevel[stage-1][level]++;
walkCacheStats.totalLookupsByStageLevel[stage-1][level]++;
if (result == NULL) {
walkCacheStats.missesByStageLevel[stage-1][level]++;
walkCacheStats.totalMissesByStageLevel[stage-1][level]++;
}
}
@@ -1077,7 +1077,6 @@ WalkCache::store(const Entry &incoming)
}
baseCacheStats.totalUpdates++;
walkCacheStats.updatesByStageLevel[incoming.stage-1][incoming.level]++;
walkCacheStats
.totalUpdatesByStageLevel[incoming.stage-1][incoming.level]++;
}
@@ -1226,68 +1225,105 @@ WalkCache::pickEntryIdxToReplace(const Set &set,
WalkCache::
WalkCacheStats::WalkCacheStats(Stats::Group *parent)
: Stats::Group(parent, "WalkCache")
: Stats::Group(parent),
ADD_STAT(totalLookupsByStageLevel, UNIT_COUNT,
"Total number of lookups"),
ADD_STAT(totalMissesByStageLevel, UNIT_COUNT,
"Total number of misses"),
ADD_STAT(totalUpdatesByStageLevel, UNIT_COUNT,
"Total number of updates"),
ADD_STAT(insertionsByStageLevel, UNIT_COUNT,
"Number of insertions (not replacements)")
{
using namespace Stats;
totalLookupsByStageLevel
.init(2, WALK_CACHE_LEVELS)
.flags(pdf);
totalMissesByStageLevel
.init(2, WALK_CACHE_LEVELS)
.flags(pdf);
totalUpdatesByStageLevel
.init(2, WALK_CACHE_LEVELS)
.flags(pdf);
insertionsByStageLevel
.init(2, WALK_CACHE_LEVELS)
.flags(pdf);
for (int s = 0; s < 2; s++) {
totalLookupsByStageLevel.subname(s, csprintf("S%d", s + 1));
totalMissesByStageLevel.subname(s, csprintf("S%d", s + 1));
totalUpdatesByStageLevel.subname(s, csprintf("S%d", s + 1));
insertionsByStageLevel.subname(s, csprintf("S%d", s + 1));
for (int l = 0; l < WALK_CACHE_LEVELS; l++) {
averageLookupsByStageLevel[s][l]
.name(csprintf("averageLookupsS%dL%d", s+1, l))
.desc("Average number lookups per second")
.flags(pdf);
totalLookupsByStageLevel.ysubname(l, csprintf("L%d", l));
totalMissesByStageLevel.ysubname(l, csprintf("L%d", l));
totalUpdatesByStageLevel.ysubname(l, csprintf("L%d", l));
insertionsByStageLevel.ysubname(l, csprintf("L%d", l));
totalLookupsByStageLevel[s][l]
.name(csprintf("totalLookupsS%dL%d", s+1, l))
.desc("Total number of lookups")
.flags(pdf);
auto avg_lookup = new Stats::Formula(
this,
csprintf("averageLookups_S%dL%d", s+1, l).c_str(),
UNIT_RATE(Stats::Units::Count, Stats::Units::Second),
"Average number lookups per second");
avg_lookup->flags(pdf);
averageLookupsByStageLevel.push_back(avg_lookup);
averageLookupsByStageLevel[s][l] =
*avg_lookup =
totalLookupsByStageLevel[s][l] / simSeconds;
auto avg_misses = new Stats::Formula(
this,
csprintf("averageMisses_S%dL%d", s+1, l).c_str(),
UNIT_RATE(Stats::Units::Count, Stats::Units::Second),
"Average number misses per second");
avg_misses->flags(pdf);
averageMissesByStageLevel.push_back(avg_misses);
averageMissesByStageLevel[s][l]
.name(csprintf("averageMissesS%dL%d", s+1, l))
.desc("Average number misses per second")
.flags(pdf);
totalMissesByStageLevel[s][l]
.name(csprintf("totalMissesS%dL%d", s+1, l))
.desc("Total number of misses")
.flags(pdf);
averageMissesByStageLevel[s][l] =
*avg_misses =
totalMissesByStageLevel[s][l] / simSeconds;
auto avg_updates = new Stats::Formula(
this,
csprintf("averageUpdates_S%dL%d", s+1, l).c_str(),
UNIT_RATE(Stats::Units::Count, Stats::Units::Second),
"Average number updates per second");
avg_updates->flags(pdf);
averageUpdatesByStageLevel.push_back(avg_updates);
averageUpdatesByStageLevel[s][l]
.name(csprintf("averageUpdatesS%dL%d", s+1, l))
.desc("Average number updates per second")
.flags(pdf);
totalUpdatesByStageLevel[s][l]
.name(csprintf("totalUpdatesS%dL%d", s+1, l))
.desc("Total number of updates")
.flags(pdf);
averageUpdatesByStageLevel[s][l] =
*avg_updates =
totalUpdatesByStageLevel[s][l] / simSeconds;
auto avg_hitrate = new Stats::Formula(
this,
csprintf("averageHitRate_S%dL%d", s+1, l).c_str(),
UNIT_RATIO,
"Average hit rate");
avg_hitrate->flags(pdf);
averageHitRateByStageLevel.push_back(avg_hitrate);
averageHitRateByStageLevel[s][l]
.name(csprintf("averageHitRateS%dL%d", s+1, l))
.desc("Average hit rate")
.flags(pdf);
averageHitRateByStageLevel[s][l] =
*avg_hitrate =
(totalLookupsByStageLevel[s][l] -
totalMissesByStageLevel[s][l])
/ totalLookupsByStageLevel[s][l];
insertionsByStageLevel[s][l]
.name(csprintf("insertionsS%dL%d", s+1, l))
.desc("Number of insertions (not replacements)")
.flags(pdf);
}
}
}
WalkCache::
WalkCacheStats::~WalkCacheStats()
{
for (auto avg_lookup : averageLookupsByStageLevel)
delete avg_lookup;
for (auto avg_miss : averageMissesByStageLevel)
delete avg_miss;
for (auto avg_update : averageUpdatesByStageLevel)
delete avg_update;
for (auto avg_hitrate : averageHitRateByStageLevel)
delete avg_hitrate;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2018-2019 ARM Limited
* Copyright (c) 2014, 2018-2019, 2021 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -67,7 +67,7 @@ class SMMUv3BaseCache
struct SMMUv3BaseCacheStats : public Stats::Group
{
SMMUv3BaseCacheStats(Stats::Group *parent);
SMMUv3BaseCacheStats(Stats::Group *parent, const std::string &name);
Stats::Formula averageLookups;
Stats::Scalar totalLookups;
@@ -87,7 +87,7 @@ class SMMUv3BaseCache
public:
SMMUv3BaseCache(const std::string &policy_name, uint32_t seed,
Stats::Group *parent);
Stats::Group *parent, const std::string &name);
virtual ~SMMUv3BaseCache() {}
};
@@ -122,7 +122,8 @@ class SMMUTLB : public SMMUv3BaseCache
};
SMMUTLB(unsigned numEntries, unsigned _associativity,
const std::string &policy, Stats::Group *parent);
const std::string &policy, Stats::Group *parent,
const std::string &name);
SMMUTLB(const SMMUTLB& tlb) = delete;
virtual ~SMMUTLB() {}
@@ -324,22 +325,20 @@ class WalkCache : public SMMUv3BaseCache
struct WalkCacheStats : public Stats::Group
{
WalkCacheStats(Stats::Group *parent);
~WalkCacheStats();
unsigned int lookupsByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Formula averageLookupsByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Scalar totalLookupsByStageLevel[2][WALK_CACHE_LEVELS];
std::vector<Stats::Formula*> averageLookupsByStageLevel;
Stats::Vector2d totalLookupsByStageLevel;
unsigned int missesByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Formula averageMissesByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Scalar totalMissesByStageLevel[2][WALK_CACHE_LEVELS];
std::vector<Stats::Formula*> averageMissesByStageLevel;
Stats::Vector2d totalMissesByStageLevel;
unsigned int updatesByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Formula averageUpdatesByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Scalar totalUpdatesByStageLevel[2][WALK_CACHE_LEVELS];
std::vector<Stats::Formula*> averageUpdatesByStageLevel;
Stats::Vector2d totalUpdatesByStageLevel;
Stats::Formula averageHitRateByStageLevel[2][WALK_CACHE_LEVELS];
std::vector<Stats::Formula*> averageHitRateByStageLevel;
Stats::Scalar insertionsByStageLevel[2][WALK_CACHE_LEVELS];
Stats::Vector2d insertionsByStageLevel;
} walkCacheStats;
private:
typedef std::vector<Entry> Set;

View File

@@ -49,11 +49,11 @@ SMMUv3DeviceInterface::SMMUv3DeviceInterface(
microTLB(new SMMUTLB(p.utlb_entries,
p.utlb_assoc,
p.utlb_policy,
this)),
this, "utlb")),
mainTLB(new SMMUTLB(p.tlb_entries,
p.tlb_assoc,
p.tlb_policy,
this)),
this, "maintlb")),
microTLBEnable(p.utlb_enable),
mainTLBEnable(p.tlb_enable),
devicePortSem(1),

View File

@@ -354,17 +354,18 @@ Plic::readClaim(Register32& reg, const int context_id)
context_id, max_int_id);
clear(max_int_id);
reg.update(max_int_id);
return reg.get();
} else {
DPRINTF(Plic,
"Claim already cleared - context: %d, interrupt ID: %d\n",
context_id, max_int_id);
reg.update(0);
return 0;
}
} else {
warn("PLIC claim failed (not completed) - context: %d", context_id);
reg.update(0);
warn("PLIC claim repeated (not completed) - context: %d, last: %d",
context_id, lastID[context_id]);
return lastID[context_id];
}
return reg.get();
}
void
@@ -381,6 +382,7 @@ Plic::writeClaim(Register32& reg, const uint32_t& data, const int context_id)
DPRINTF(Plic,
"Complete - context: %d, interrupt ID: %d\n",
context_id, reg.get());
updateInt();
}
void
@@ -445,11 +447,11 @@ Plic::updateInt()
uint32_t max_id = output.maxID[i];
uint32_t priority = output.maxPriority[i];
uint32_t threshold = registers.threshold[i].get();
if (priority > threshold && max_id > 0) {
if (priority > threshold && max_id > 0 && lastID[i] == 0) {
DPRINTF(Plic,
"Int posted - thread: %d, int id: %d, ",
thread_id, int_id);
DPRINTF(Plic,
DPRINTFR(Plic,
"pri: %d, thres: %d\n", priority, threshold);
intrctrl->post(thread_id, int_id, 0);
} else {
@@ -457,7 +459,7 @@ Plic::updateInt()
DPRINTF(Plic,
"Int filtered - thread: %d, int id: %d, ",
thread_id, int_id);
DPRINTF(Plic,
DPRINTFR(Plic,
"pri: %d, thres: %d\n", priority, threshold);
}
intrctrl->clear(thread_id, int_id, 0);
@@ -499,6 +501,12 @@ Plic::serialize(CheckpointOut &cp) const
SERIALIZE_SCALAR(n_outputs);
SERIALIZE_CONTAINER(output.maxID);
SERIALIZE_CONTAINER(output.maxPriority);
SERIALIZE_CONTAINER(pendingPriority);
for (int i=0; i < effPriority.size(); i++) {
arrayParamOut(cp, std::string("effPriority") +
std::to_string(i), effPriority[i]);
}
SERIALIZE_CONTAINER(lastID);
}
void
@@ -541,4 +549,11 @@ Plic::unserialize(CheckpointIn &cp)
}
UNSERIALIZE_CONTAINER(output.maxID);
UNSERIALIZE_CONTAINER(output.maxPriority);
UNSERIALIZE_CONTAINER(pendingPriority);
for (int i=0; i < effPriority.size(); i++) {
arrayParamIn(cp, std::string("effPriority") +
std::to_string(i), effPriority[i]);
}
UNSERIALIZE_CONTAINER(lastID);
updateInt();
}

View File

@@ -114,9 +114,11 @@ MakeInclude('slicc_interface/RubyRequest.hh')
MakeInclude('common/Address.hh')
MakeInclude('common/BoolVec.hh')
MakeInclude('common/DataBlock.hh')
MakeInclude('common/ExpectedMap.hh')
MakeInclude('common/IntVec.hh')
MakeInclude('common/MachineID.hh')
MakeInclude('common/NetDest.hh')
MakeInclude('common/TriggerQueue.hh')
MakeInclude('common/Set.hh')
MakeInclude('common/WriteMask.hh')
MakeInclude('network/MessageBuffer.hh')

View File

@@ -0,0 +1,232 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MEM_RUBY_COMMON_EXPECTEDMAP_HH__
#define __MEM_RUBY_COMMON_EXPECTEDMAP_HH__
#include <cassert>
#include <iostream>
#include <unordered_map>
// ExpectedMap helper class is used to facilitate tracking of pending
// response and data messages in the CHI protocol. It offers additional
// functionality when compared to plain counters:
// - tracks the expected type for received messages
// - tracks segmented data messages (i.e. when a line transfer is split in
// multiple messages)
template<typename RespType, typename DataType>
class ExpectedMap
{
private:
template<typename Type>
struct ExpectedState
{
struct EnumClassHash
{
std::size_t operator()(Type t) const
{
return static_cast<std::size_t>(t);
}
};
private:
// chunks is the number segmented messages we expect to receive
// before incrementing numReceived. This is tipically always 1 for all
// non-data messages
int chunks;
int currChunk;
int numReceived;
std::unordered_map<Type, bool, EnumClassHash> expectedTypes;
public:
ExpectedState()
:chunks(1), currChunk(0), numReceived(0)
{}
void
clear(int msg_chunks)
{
chunks = msg_chunks;
currChunk = 0;
numReceived = 0;
expectedTypes.clear();
}
void
addExpectedType(const Type &val)
{
expectedTypes[val] = false;
}
int received() const { return numReceived; }
bool
increaseReceived(const Type &val)
{
if (expectedTypes.find(val) == expectedTypes.end())
return false;
expectedTypes[val] = true;
++currChunk;
if (currChunk == chunks) {
++numReceived;
currChunk = 0;
}
return true;
}
bool
receivedType(const Type &val) const
{
auto i = expectedTypes.find(val);
if (i != expectedTypes.end())
return i->second;
else
return false;
}
};
ExpectedState<DataType> expectedData;
ExpectedState<RespType> expectedResp;
int totalExpected;
public:
ExpectedMap()
:expectedData(), expectedResp(), totalExpected(0)
{}
// Clear the tracking state and specified the number of chunks are required
// to receive a complete data message
void
clear(int dataChunks)
{
expectedData.clear(dataChunks);
expectedResp.clear(1);
totalExpected = 0;
}
// Register an expected response message type
void
addExpectedRespType(const RespType &val)
{
expectedResp.addExpectedType(val);
}
// Register an expected data message type
void
addExpectedDataType(const DataType &val)
{
expectedData.addExpectedType(val);
}
// Set the number of expected messages
void setExpectedCount(int val) { totalExpected = val; }
void addExpectedCount(int val) { totalExpected += val; }
// Returns the number of messages received.
// Notice that a data message counts as received only after all of
// its chunks are received.
int
received() const
{
return expectedData.received() + expectedResp.received();
}
// Returns the remaining number of expected messages
int expected() const { return totalExpected - received(); }
// Has any expected message ?
bool hasExpected() const { return expected() != 0; }
// Has received any data ?
bool hasReceivedData() const { return expectedData.received() != 0; }
// Has received any response ?
bool hasReceivedResp() const { return expectedResp.received() != 0; }
// Notifies that a response message was received
bool
receiveResp(const RespType &val)
{
assert(received() < totalExpected);
return expectedResp.increaseReceived(val);
}
// Notifies that a data message chunk was received
bool
receiveData(const DataType &val)
{
assert(received() <= totalExpected);
return expectedData.increaseReceived(val);
}
// Has received any data of the given type ?
bool
receivedDataType(const DataType &val) const
{
return expectedData.receivedType(val);
}
// Has received any response of the given type ?
bool
receivedRespType(const RespType &val) const
{
return expectedResp.receivedType(val);
}
void
print(std::ostream& out) const
{
out << expected();
}
};
template<typename RespType, typename DataType>
inline std::ostream&
operator<<(std::ostream& out, const ExpectedMap<RespType,DataType>& obj)
{
obj.print(out);
return out;
}
#endif // __MEM_RUBY_COMMON_EXPECTEDMAP_HH__

View File

@@ -0,0 +1,125 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MEM_RUBY_COMMON_QUEUE_HH__
#define __MEM_RUBY_COMMON_QUEUE_HH__
#include <deque>
#include <iostream>
// TriggerQueue helper class is used keep a list of events that trigger the
// actions that need to be executed before an ouststanding transaction
// completes in the CHI protocol. When a transaction no longer has pending
// respose or data messages, this queue is checked and the event at the head
// of the queue is triggered. If the queue is empty, the transactions is
// finalized. Events can be marked as NB (non-blocking). NB are triggered by
// the protocol even if the transactions has pending data/responses.
template<typename T>
class TriggerQueue
{
private:
struct ValType {
T val;
bool non_blocking;
};
std::deque<ValType> queue;
public:
// Returns the head of the queue
const T& front() const { return queue.front().val; }
// Returns the head of the queue
// NOTE: SLICC won't allow to reuse front() or different
// values of the template parameter, thus we use an additional
// def. to workaround that
const T& next() const { return queue.front().val; }
// Returns the end of the queue
const T& back() const { return queue.back().val; }
// Is the head event non-blocking ?
bool frontNB() const { return queue.front().non_blocking; }
// Is the last event non-blocking ?
bool backNB() const { return queue.back().non_blocking; }
// Is the queue empty ?
bool empty() const { return queue.empty(); }
// put an event at the end of the queue
void push(const T &elem) { queue.push_back({elem,false}); }
// emplace an event at the end of the queue
template<typename... Ts>
void
emplace(Ts&&... args)
{
queue.push_back({T(std::forward<Ts>(args)...),false});
}
// put an event at the head of the queue
void pushFront(const T &elem) { queue.push_front({elem,false}); }
// put a non-blocking event at the end of the queue
void pushNB(const T &elem) { queue.push_back({elem,true}); }
// put a non-blocking event at the head of the queue
void pushFrontNB(const T &elem) { queue.push_front({elem,true}); }
// pop the head of the queue
void pop() { queue.pop_front(); }
void print(std::ostream& out) const;
};
template<class T>
inline std::ostream&
operator<<(std::ostream& out, const TriggerQueue<T>& obj)
{
obj.print(out);
out << std::flush;
return out;
}
template<class T>
inline void
TriggerQueue<T>::print(std::ostream& out) const
{
}
#endif // __MEM_RUBY_COMMON_QUEUE_HH__

View File

@@ -262,7 +262,9 @@ enumeration(MachineType, desc="...", default="MachineType_NULL") {
TCCdir, desc="Directory at the GPU L2 Cache (TCC)";
SQC, desc="GPU L1 Instr Cache (Sequencer Cache)";
RegionDir, desc="Region-granular directory";
RegionBuffer,desc="Region buffer for CPU and GPU";
RegionBuffer, desc="Region buffer for CPU and GPU";
Cache, desc="Generic coherent cache controller";
Memory, desc="Memory controller interface";
NULL, desc="null mach type";
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,398 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Outbound port definitions
out_port(reqOutPort, CHIRequestMsg, reqOut);
out_port(snpOutPort, CHIRequestMsg, snpOut);
out_port(rspOutPort, CHIResponseMsg, rspOut);
out_port(datOutPort, CHIDataMsg, datOut);
out_port(triggerOutPort, TriggerMsg, triggerQueue);
out_port(retryTriggerOutPort, RetryTriggerMsg, retryTriggerQueue);
out_port(replTriggerOutPort, TriggerMsg, replTriggerQueue);
out_port(reqRdyOutPort, CHIRequestMsg, reqRdy);
out_port(snpRdyOutPort, CHIRequestMsg, snpRdy);
// Include helper functions here. Some of them require the outports to be
// already defined
// Notice 'processNextState' and 'wakeupPending*' functions are defined after
// the required input ports. Currently the SLICC compiler does not support
// separate declaration and definition of functions in the .sm files.
include "CHI-cache-funcs.sm";
// Inbound port definitions and internal triggers queues
// Notice we never stall input ports connected to the network
// Incoming data and responses are always consumed.
// Incoming requests/snoop are moved to the respective internal rdy queue
// if a TBE can be allocated, or retried otherwise.
// Trigger events from the UD_T state
in_port(useTimerTable_in, Addr, useTimerTable, rank=11) {
if (useTimerTable_in.isReady(clockEdge())) {
Addr readyAddress := useTimerTable.nextAddress();
trigger(Event:UseTimeout, readyAddress, getCacheEntry(readyAddress),
getCurrentActiveTBE(readyAddress));
}
}
// Response
in_port(rspInPort, CHIResponseMsg, rspIn, rank=10,
rsc_stall_handler=rspInPort_rsc_stall_handler) {
if (rspInPort.isReady(clockEdge())) {
printResources();
peek(rspInPort, CHIResponseMsg) {
TBE tbe := getCurrentActiveTBE(in_msg.addr);
trigger(respToEvent(in_msg.type, tbe), in_msg.addr,
getCacheEntry(in_msg.addr), tbe);
}
}
}
bool rspInPort_rsc_stall_handler() {
error("rspInPort must never stall\n");
return false;
}
// Data
in_port(datInPort, CHIDataMsg, datIn, rank=9,
rsc_stall_handler=datInPort_rsc_stall_handler) {
if (datInPort.isReady(clockEdge())) {
printResources();
peek(datInPort, CHIDataMsg) {
int received := in_msg.bitMask.count();
assert((received <= data_channel_size) && (received > 0));
trigger(dataToEvent(in_msg.type), in_msg.addr,
getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr));
}
}
}
bool datInPort_rsc_stall_handler() {
error("datInPort must never stall\n");
return false;
}
// Snoops with an allocated TBE
in_port(snpRdyPort, CHIRequestMsg, snpRdy, rank=8,
rsc_stall_handler=snpRdyPort_rsc_stall_handler) {
if (snpRdyPort.isReady(clockEdge())) {
printResources();
peek(snpRdyPort, CHIRequestMsg) {
assert(in_msg.allowRetry == false);
TBE tbe := getCurrentActiveTBE(in_msg.addr);
if (is_valid(tbe) && tbe.hasUseTimeout) {
// we may be in the BUSY_INTR waiting for a cache block, but if
// the timeout is set the snoop must still wait, so trigger the
// stall form here to prevent creating other states
trigger(Event:SnpStalled, in_msg.addr,
getCacheEntry(in_msg.addr), tbe);
} else {
trigger(snpToEvent(in_msg.type), in_msg.addr,
getCacheEntry(in_msg.addr), tbe);
}
}
}
}
bool snpRdyPort_rsc_stall_handler() {
error("snpRdyPort must never stall\n");
return false;
}
void wakeupPendingSnps(TBE tbe) {
if (tbe.wakeup_pending_snp) {
Addr addr := tbe.addr;
wakeup_port(snpRdyPort, addr);
tbe.wakeup_pending_snp := false;
}
}
// Incoming snoops
// Not snoops are not retried, so the snoop channel is stalled if no
// Snp TBEs available
in_port(snpInPort, CHIRequestMsg, snpIn, rank=7) {
if (snpInPort.isReady(clockEdge())) {
assert(is_HN == false);
printResources();
peek(snpInPort, CHIRequestMsg) {
assert(in_msg.allowRetry == false);
trigger(Event:AllocSnoop, in_msg.addr,
getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr));
}
}
}
// Retry action triggers
// These are handled before other triggers since a retried request should
// be enqueued ahead of a new request
// TODO: consider moving DoRetry to the triggerQueue
in_port(retryTriggerInPort, RetryTriggerMsg, retryTriggerQueue, rank=6,
rsc_stall_handler=retryTriggerInPort_rsc_stall_handler) {
if (retryTriggerInPort.isReady(clockEdge())) {
printResources();
peek(retryTriggerInPort, RetryTriggerMsg) {
Event ev := in_msg.event;
TBE tbe := getCurrentActiveTBE(in_msg.addr);
assert((ev == Event:SendRetryAck) || (ev == Event:SendPCrdGrant) ||
(ev == Event:DoRetry));
if (ev == Event:DoRetry) {
assert(is_valid(tbe));
if (tbe.is_req_hazard || tbe.is_repl_hazard) {
ev := Event:DoRetry_Hazard;
}
}
trigger(ev, in_msg.addr, getCacheEntry(in_msg.addr), tbe);
}
}
}
bool retryTriggerInPort_rsc_stall_handler() {
DPRINTF(RubySlicc, "Retry trigger queue resource stall\n");
retryTriggerInPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat));
return true;
}
// Action triggers
in_port(triggerInPort, TriggerMsg, triggerQueue, rank=5,
rsc_stall_handler=triggerInPort_rsc_stall_handler) {
if (triggerInPort.isReady(clockEdge())) {
printResources();
peek(triggerInPort, TriggerMsg) {
TBE tbe := getCurrentActiveTBE(in_msg.addr);
assert(is_valid(tbe));
if (in_msg.from_hazard != (tbe.is_req_hazard || tbe.is_repl_hazard)) {
// possible when handling a snoop hazard and an action from the
// the initial transaction got woken up. Stall the action until the
// hazard ends
assert(in_msg.from_hazard == false);
assert(tbe.is_req_hazard || tbe.is_repl_hazard);
trigger(Event:ActionStalledOnHazard, in_msg.addr,
getCacheEntry(in_msg.addr), tbe);
} else {
trigger(tbe.pendAction, in_msg.addr, getCacheEntry(in_msg.addr), tbe);
}
}
}
}
bool triggerInPort_rsc_stall_handler() {
DPRINTF(RubySlicc, "Trigger queue resource stall\n");
triggerInPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat));
return true;
}
void wakeupPendingTgrs(TBE tbe) {
if (tbe.wakeup_pending_tgr) {
Addr addr := tbe.addr;
wakeup_port(triggerInPort, addr);
tbe.wakeup_pending_tgr := false;
}
}
// internally triggered evictions
// no stall handler for this one since it doesn't make sense try the next
// request when out of TBEs
in_port(replTriggerInPort, ReplacementMsg, replTriggerQueue, rank=4) {
if (replTriggerInPort.isReady(clockEdge())) {
printResources();
peek(replTriggerInPort, ReplacementMsg) {
TBE tbe := getCurrentActiveTBE(in_msg.addr);
CacheEntry cache_entry := getCacheEntry(in_msg.addr);
Event trigger := Event:null;
if (is_valid(cache_entry) &&
((upstreamHasUnique(cache_entry.state) && dealloc_backinv_unique) ||
(upstreamHasShared(cache_entry.state) && dealloc_backinv_shared))) {
trigger := Event:Global_Eviction;
} else {
if (is_HN) {
trigger := Event:LocalHN_Eviction;
} else {
trigger := Event:Local_Eviction;
}
}
trigger(trigger, in_msg.addr, cache_entry, tbe);
}
}
}
// Requests with an allocated TBE
in_port(reqRdyPort, CHIRequestMsg, reqRdy, rank=3,
rsc_stall_handler=reqRdyPort_rsc_stall_handler) {
if (reqRdyPort.isReady(clockEdge())) {
printResources();
peek(reqRdyPort, CHIRequestMsg) {
CacheEntry cache_entry := getCacheEntry(in_msg.addr);
TBE tbe := getCurrentActiveTBE(in_msg.addr);
DirEntry dir_entry := getDirEntry(in_msg.addr);
// Special case for possibly stale writebacks or evicts
if (in_msg.type == CHIRequestType:WriteBackFull) {
if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) ||
(dir_entry.owner != in_msg.requestor)) {
trigger(Event:WriteBackFull_Stale, in_msg.addr, cache_entry, tbe);
}
} else if (in_msg.type == CHIRequestType:WriteEvictFull) {
if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) ||
(dir_entry.ownerIsExcl == false) || (dir_entry.owner != in_msg.requestor)) {
trigger(Event:WriteEvictFull_Stale, in_msg.addr, cache_entry, tbe);
}
} else if (in_msg.type == CHIRequestType:WriteCleanFull) {
if (is_invalid(dir_entry) || (dir_entry.ownerExists == false) ||
(dir_entry.ownerIsExcl == false) || (dir_entry.owner != in_msg.requestor)) {
trigger(Event:WriteCleanFull_Stale, in_msg.addr, cache_entry, tbe);
}
} else if (in_msg.type == CHIRequestType:Evict) {
if (is_invalid(dir_entry) ||
(dir_entry.sharers.isElement(in_msg.requestor) == false)) {
trigger(Event:Evict_Stale, in_msg.addr, cache_entry, tbe);
}
}
// Normal request path
trigger(reqToEvent(in_msg.type, in_msg.is_local_pf), in_msg.addr, cache_entry, tbe);
}
}
}
bool reqRdyPort_rsc_stall_handler() {
DPRINTF(RubySlicc, "ReqRdy queue resource stall\n");
reqRdyPort.recycle(clockEdge(), cyclesToTicks(stall_recycle_lat));
return true;
}
void wakeupPendingReqs(TBE tbe) {
if (tbe.wakeup_pending_req) {
Addr addr := tbe.addr;
wakeup_port(reqRdyPort, addr);
tbe.wakeup_pending_req := false;
}
}
// Incoming new requests
in_port(reqInPort, CHIRequestMsg, reqIn, rank=2,
rsc_stall_handler=reqInPort_rsc_stall_handler) {
if (reqInPort.isReady(clockEdge())) {
printResources();
peek(reqInPort, CHIRequestMsg) {
if (in_msg.allowRetry) {
trigger(Event:AllocRequest, in_msg.addr,
getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr));
} else {
trigger(Event:AllocRequestWithCredit, in_msg.addr,
getCacheEntry(in_msg.addr), getCurrentActiveTBE(in_msg.addr));
}
}
}
}
bool reqInPort_rsc_stall_handler() {
error("reqInPort must never stall\n");
return false;
}
// Incoming new sequencer requests
in_port(seqInPort, RubyRequest, mandatoryQueue, rank=1) {
if (seqInPort.isReady(clockEdge())) {
printResources();
peek(seqInPort, RubyRequest) {
trigger(Event:AllocSeqRequest, in_msg.LineAddress,
getCacheEntry(in_msg.LineAddress),
getCurrentActiveTBE(in_msg.LineAddress));
}
}
}
// Incoming new prefetch requests
in_port(pfInPort, RubyRequest, prefetchQueue, rank=0) {
if (pfInPort.isReady(clockEdge())) {
printResources();
peek(pfInPort, RubyRequest) {
trigger(Event:AllocPfRequest, in_msg.LineAddress,
getCacheEntry(in_msg.LineAddress),
getCurrentActiveTBE(in_msg.LineAddress));
}
}
}
void processNextState(Addr address, TBE tbe, CacheEntry cache_entry) {
assert(is_valid(tbe));
DPRINTF(RubySlicc, "GoToNextState expected_req_resp=%d expected_snp_resp=%d snd_pendEv=%d snd_pendBytes=%d\n",
tbe.expected_req_resp.expected(),
tbe.expected_snp_resp.expected(),
tbe.snd_pendEv, tbe.snd_pendBytes.count());
// if no pending trigger and not expecting to receive anything, enqueue
// next
bool has_nb_trigger := (tbe.actions.empty() == false) &&
tbe.actions.frontNB() &&
(tbe.snd_pendEv == false);
int expected_msgs := tbe.expected_req_resp.expected() +
tbe.expected_snp_resp.expected() +
tbe.snd_pendBytes.count();
if ((tbe.pendAction == Event:null) && ((expected_msgs == 0) || has_nb_trigger)) {
Cycles trigger_latency := intToCycles(0);
if (tbe.delayNextAction > curTick()) {
trigger_latency := ticksToCycles(tbe.delayNextAction) -
ticksToCycles(curTick());
tbe.delayNextAction := intToTick(0);
}
tbe.pendAction := Event:null;
if (tbe.actions.empty()) {
// time to go to the final state
tbe.pendAction := Event:Final;
} else {
tbe.pendAction := tbe.actions.front();
tbe.actions.pop();
}
assert(tbe.pendAction != Event:null);
enqueue(triggerOutPort, TriggerMsg, trigger_latency) {
out_msg.addr := tbe.addr;
out_msg.from_hazard := tbe.is_req_hazard || tbe.is_repl_hazard;
}
}
printTBEState(tbe);
// we might be going to BUSY_INTERRUPTABLE so wakeup pending snoops
// if any
wakeupPendingSnps(tbe);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,775 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
machine(MachineType:Cache, "Cache coherency protocol") :
// Sequencer to insert Load/Store requests.
// May be null if this is not a L1 cache
Sequencer * sequencer;
// Cache for storing local lines.
// NOTE: it is assumed that a cache tag and directory lookups and updates
// happen in parallel. The cache tag latency is used for both cases.
CacheMemory * cache;
// Additional pipeline latency modeling for the different request types
// When defined, these are applied after the initial tag array read and
// sending necessary snoops.
Cycles read_hit_latency := 0;
Cycles read_miss_latency := 0;
Cycles write_fe_latency := 0; // Front-end: Rcv req -> Snd req
Cycles write_be_latency := 0; // Back-end: Rcv ack -> Snd data
Cycles fill_latency := 0; // Fill latency
Cycles snp_latency := 0; // Applied before handling any snoop
Cycles snp_inv_latency := 0; // Additional latency for invalidating snoops
// Waits for cache data array write to complete before executing next action
// Note a new write will always block if bank stalls are enabled in the cache
bool wait_for_cache_wr := "False";
// Request TBE allocation latency
Cycles allocation_latency := 0;
// Enqueue latencies for outgoing messages
// NOTE: should remove this and only use parameters above?
Cycles request_latency := 1;
Cycles response_latency := 1;
Cycles snoop_latency := 1;
Cycles data_latency := 1;
// When an SC fails, unique lines are locked to this controller for a period
// proportional to the number of consecutive failed SC requests. See
// the usage of sc_lock_multiplier and llscCheckMonitor for details
int sc_lock_base_latency_cy := 4;
int sc_lock_multiplier_inc := 4;
int sc_lock_multiplier_decay := 1;
int sc_lock_multiplier_max := 256;
bool sc_lock_enabled;
// Recycle latency on resource stalls
Cycles stall_recycle_lat := 1;
// Notify the sequencer when a line is evicted. This should be set is the
// sequencer is not null and handled LL/SC request types.
bool send_evictions;
// Number of entries in the snoop and replacement TBE tables
// notice the "number_of_TBEs" parameter is defined by AbstractController
int number_of_snoop_TBEs;
int number_of_repl_TBEs;
// replacements use the same TBE slot as the request that triggered it
// in this case the number_of_repl_TBEs parameter is ignored
bool unify_repl_TBEs;
// wait for the final tag update to complete before deallocating TBE and
// going to final stable state
bool dealloc_wait_for_tag := "False";
// Width of the data channel. Data transfer are split in multiple messages
// at the protocol level when this is less than the cache line size.
int data_channel_size;
// Set when this is used as the home node and point of coherency of the
// system. Must be false for every other cache level.
bool is_HN;
// Enables direct memory transfers between SNs and RNs when the data is
// not cache in the HN.
bool enable_DMT;
// Use ReadNoSnpSep instead of ReadNoSnp for DMT requests, which allows
// the TBE to be deallocated at HNFs before the requester receives the data
bool enable_DMT_early_dealloc := "False";
// Enables direct cache transfers, i.e., use forwarding snoops whenever
// possible.
bool enable_DCT;
// Use separate Comp/DBIDResp responses for WriteUnique
bool comp_wu := "False";
// additional latency for the WU Comp response
Cycles comp_wu_latency := 0;
// Controls cache clusivity for different request types.
// set all alloc_on* to false to completelly disable caching
bool alloc_on_readshared;
bool alloc_on_readunique;
bool alloc_on_readonce;
bool alloc_on_writeback;
bool alloc_on_seq_acc;
bool alloc_on_seq_line_write;
// Controls if the clusivity is strict.
bool dealloc_on_unique;
bool dealloc_on_shared;
bool dealloc_backinv_unique;
bool dealloc_backinv_shared;
// If the responder has the line in UC or UD state, propagate this state
// on a ReadShared. Notice data won't be deallocated if dealloc_on_unique is
// set
bool fwd_unique_on_readshared := "False";
// Allow receiving data in SD state.
bool allow_SD;
// stall new requests to destinations with a pending retry
bool throttle_req_on_retry := "True";
// Use prefetcher
bool use_prefetcher, default="false";
// Message Queues
// Interface to the network
// Note vnet_type is used by Garnet only. "response" type is assumed to
// have data, so use it for data channels and "none" for the rest.
// network="To" for outbound queue; network="From" for inbound
// virtual networks: 0=request, 1=snoop, 2=response, 3=data
MessageBuffer * reqOut, network="To", virtual_network="0", vnet_type="none";
MessageBuffer * snpOut, network="To", virtual_network="1", vnet_type="none";
MessageBuffer * rspOut, network="To", virtual_network="2", vnet_type="none";
MessageBuffer * datOut, network="To", virtual_network="3", vnet_type="response";
MessageBuffer * reqIn, network="From", virtual_network="0", vnet_type="none";
MessageBuffer * snpIn, network="From", virtual_network="1", vnet_type="none";
MessageBuffer * rspIn, network="From", virtual_network="2", vnet_type="none";
MessageBuffer * datIn, network="From", virtual_network="3", vnet_type="response";
// Mandatory queue for receiving requests from the sequencer
MessageBuffer * mandatoryQueue;
// Internal queue for trigger events
MessageBuffer * triggerQueue;
// Internal queue for retry trigger events
MessageBuffer * retryTriggerQueue;
// Internal queue for accepted requests
MessageBuffer * reqRdy;
// Internal queue for accepted snoops
MessageBuffer * snpRdy;
// Internal queue for eviction requests
MessageBuffer * replTriggerQueue;
// Prefetch queue for receiving prefetch requests from prefetcher
MessageBuffer * prefetchQueue;
// Requests that originated from a prefetch in a upstream cache are treated
// as demand access in this cache. Notice the demand access stats are still
// updated only on true demand requests.
bool upstream_prefetch_trains_prefetcher := "False";
{
////////////////////////////////////////////////////////////////////////////
// States
////////////////////////////////////////////////////////////////////////////
state_declaration(State, default="Cache_State_null") {
// Stable states
I, AccessPermission:Invalid, desk="Invalid / not present locally or upstream";
// States when block is present in local cache only
SC, AccessPermission:Read_Only, desc="Shared Clean";
UC, AccessPermission:Read_Write, desc="Unique Clean";
SD, AccessPermission:Read_Only, desc="Shared Dirty";
UD, AccessPermission:Read_Write, desc="Unique Dirty";
UD_T, AccessPermission:Read_Write, desc="UD with use timeout";
// Invalid in local cache but present in upstream caches
RU, AccessPermission:Invalid, desk="Upstream requester has line in UD/UC";
RSC, AccessPermission:Invalid, desk="Upstream requester has line in SC";
RSD, AccessPermission:Invalid, desk="Upstream requester has line in SD and maybe SC";
RUSC, AccessPermission:Invalid, desk="RSC + this node stills has exclusive access";
RUSD, AccessPermission:Invalid, desk="RSD + this node stills has exclusive access";
// Both in local and upstream caches. In some cases local maybe stale
SC_RSC, AccessPermission:Read_Only, desk="SC + RSC";
SD_RSC, AccessPermission:Read_Only, desk="SD + RSC";
SD_RSD, AccessPermission:Read_Only, desk="SD + RSD";
UC_RSC, AccessPermission:Read_Write, desk="UC + RSC";
UC_RU, AccessPermission:Invalid, desk="UC + RU";
UD_RU, AccessPermission:Invalid, desk="UD + RU";
UD_RSD, AccessPermission:Read_Write, desk="UD + RSD";
UD_RSC, AccessPermission:Read_Write, desk="UD + RSC";
// Generic transient state
// There is only a transient "BUSY" state. The actions taken at this state
// and the final stable state are defined by information in the TBE.
// While on BUSY_INTR, we will reply to incoming snoops and the
// state of the cache line may change. While on BUSY_BLKD snoops
// are blocked
BUSY_INTR, AccessPermission:Busy, desc="Waiting for data and/or ack";
BUSY_BLKD, AccessPermission:Busy, desc="Waiting for data and/or ack; blocks snoops";
// Null state for debugging
null, AccessPermission:Invalid, desc="Null state";
}
////////////////////////////////////////////////////////////////////////////
// Events
////////////////////////////////////////////////////////////////////////////
enumeration(Event) {
// Events triggered by incoming requests. Allocate TBE and move
// request or snoop to the ready queue
AllocRequest, desc="Allocates a TBE for a request. Triggers a retry if table is full";
AllocRequestWithCredit, desc="Allocates a TBE for a request. Always succeeds.";
AllocSeqRequest, desc="Allocates a TBE for a sequencer request. Stalls requests if table is full";
AllocPfRequest, desc="Allocates a TBE for a prefetch request. Stalls requests if table is full";
AllocSnoop, desc="Allocates a TBE for a snoop. Stalls snoop if table is full";
// Events triggered by sequencer requests or snoops in the rdy queue
// See CHIRequestType in CHi-msg.sm for descriptions
Load;
Store;
Prefetch;
ReadShared;
ReadNotSharedDirty;
ReadUnique;
ReadUnique_PoC;
ReadOnce;
CleanUnique;
Evict;
WriteBackFull;
WriteEvictFull;
WriteCleanFull;
WriteUnique;
WriteUniquePtl_PoC;
WriteUniqueFull_PoC;
WriteUniqueFull_PoC_Alloc;
SnpCleanInvalid;
SnpShared;
SnpSharedFwd;
SnpNotSharedDirtyFwd;
SnpUnique;
SnpUniqueFwd;
SnpOnce;
SnpOnceFwd;
SnpStalled; // A snoop stall triggered from the inport
// Events triggered by incoming response messages
// See CHIResponseType in CHi-msg.sm for descriptions
CompAck;
Comp_I;
Comp_UC;
Comp_SC;
CompDBIDResp;
DBIDResp;
Comp;
ReadReceipt;
RespSepData;
SnpResp_I;
SnpResp_I_Fwded_UC;
SnpResp_I_Fwded_UD_PD;
SnpResp_SC;
SnpResp_SC_Fwded_SC;
SnpResp_SC_Fwded_SD_PD;
SnpResp_UC_Fwded_I;
SnpResp_UD_Fwded_I;
SnpResp_SC_Fwded_I;
SnpResp_SD_Fwded_I;
RetryAck;
RetryAck_PoC;
PCrdGrant;
PCrdGrant_PoC;
RetryAck_Hazard;
RetryAck_PoC_Hazard;
PCrdGrant_Hazard;
PCrdGrant_PoC_Hazard;
// Events triggered by incoming data response messages
// See CHIDataType in CHi-msg.sm for descriptions
CompData_I;
CompData_UC;
CompData_SC;
CompData_UD_PD;
CompData_SD_PD;
DataSepResp_UC;
CBWrData_I;
CBWrData_UC;
CBWrData_SC;
CBWrData_UD_PD;
CBWrData_SD_PD;
NCBWrData;
SnpRespData_I;
SnpRespData_I_PD;
SnpRespData_SC;
SnpRespData_SC_PD;
SnpRespData_SD;
SnpRespData_UC;
SnpRespData_UD;
SnpRespData_SC_Fwded_SC;
SnpRespData_SC_Fwded_SD_PD;
SnpRespData_SC_PD_Fwded_SC;
SnpRespData_I_Fwded_SD_PD;
SnpRespData_I_PD_Fwded_SC;
SnpRespData_I_Fwded_SC;
// We use special events for requests that we detect to be stale. This is
// done for debugging only. We sent a stale response so the requester can
// confirm the request is indeed stale and this is not a protocol bug.
// A Write or Evict becomes stale when the requester receives a snoop that
// changes the state of the data while the request was pending.
// Actual CHI implementations don't have this check.
Evict_Stale;
WriteBackFull_Stale;
WriteEvictFull_Stale;
WriteCleanFull_Stale;
// Cache fill handling
CheckCacheFill, desc="Check if need to write or update the cache and trigger any necessary allocation and evictions";
// Internal requests generated to evict or writeback a local copy
// to free-up cache space
Local_Eviction, desc="Evicts/WB the local copy of the line";
LocalHN_Eviction, desc="Local_Eviction triggered when is HN";
Global_Eviction, desc="Local_Eviction + back-invalidate line in all upstream requesters";
// Events triggered from tbe.actions
// In general, for each event we define a single transition from
// BUSY_BLKD and/or BUSY_INTR.
// See processNextState functions and Initiate_* actions.
// All triggered transitions execute in the same cycle until it has to wait
// for pending pending responses or data (set by expected_req_resp and
// expected_snp_resp). Triggers queued with pushNB are executed even if
// there are pending messages.
// Cache/directory access events. Notice these only model the latency.
TagArrayRead, desc="Read the cache and directory tag array";
TagArrayWrite, desc="Write the cache and directory tag array";
DataArrayRead, desc="Read the cache data array";
DataArrayWrite, desc="Write the cache data array";
DataArrayWriteOnFill, desc="Write the cache data array (cache fill)";
// Events for modeling the pipeline latency
ReadHitPipe, desc="Latency of reads served from local cache";
ReadMissPipe, desc="Latency of reads not served from local cache";
WriteFEPipe, desc="Front-end latency of write requests";
WriteBEPipe, desc="Back-end latency of write requests";
FillPipe, desc="Cache fill latency";
SnpSharedPipe, desc="Latency for SnpShared requests";
SnpInvPipe, desc="Latency for SnpUnique and SnpCleanInv requests";
SnpOncePipe, desc="Latency for SnpOnce requests";
// Send a read request downstream.
SendReadShared, desc="Send a ReadShared or ReadNotSharedDirty is allow_SD is false";
SendReadOnce, desc="Send a ReadOnce";
SendReadNoSnp, desc="Send a SendReadNoSnp";
SendReadNoSnpDMT, desc="Send a SendReadNoSnp using DMT";
SendReadUnique, desc="Send a ReadUnique";
SendCompAck, desc="Send CompAck";
// Read handling at the completer
SendCompData, desc="Send CompData";
WaitCompAck, desc="Expect to receive CompAck";
SendRespSepData, desc="Send RespSepData for a DMT request";
// Send a write request downstream.
SendWriteBackOrWriteEvict, desc="Send a WriteBackFull (if line is UD or SD) or WriteEvictFull (if UC)";
SendWriteClean, desc="Send a WriteCleanFull";
SendWriteNoSnp, desc="Send a WriteNoSnp for a full line";
SendWriteNoSnpPartial, desc="Send a WriteNoSnpPtl";
SendWriteUnique, desc="Send a WriteUniquePtl";
SendWBData, desc="Send writeback data";
SendWUData, desc="Send write unique data";
SendWUDataCB, desc="Send write unique data from a sequencer callback";
// Write handling at the completer
SendCompDBIDResp, desc="Ack WB with CompDBIDResp";
SendCompDBIDRespStale, desc="Ack stale WB with CompDBIDResp";
SendCompDBIDResp_WU, desc="Ack WU with CompDBIDResp and set expected data";
SendDBIDResp_WU, desc="Ack WU with DBIDResp and set expected data";
SendComp_WU, desc="Ack WU completion";
// Dataless requests
SendEvict, desc="Send a Evict";
SendCompIResp, desc="Ack Evict with Comp_I";
SendCleanUnique,desc="Send a CleanUnique";
SendCompUCResp, desc="Ack CleanUnique with Comp_UC";
// Checks if an upgrade using a CleanUnique was sucessfull
CheckUpgrade_FromStore, desc="Upgrade needed by a Store";
CheckUpgrade_FromCU, desc="Upgrade needed by an upstream CleanUnique";
CheckUpgrade_FromRU, desc="Upgrade needed by an upstream ReadUnique";
// Snoop requests
// SnpNotSharedDirty are sent instead of SnpShared for ReadNotSharedDirty
SendSnpShared, desc="Send a SnpShared/SnpNotSharedDirty to sharer in UC,UD, or SD state";
SendSnpSharedFwdToOwner, desc="Send a SnpSharedFwd/SnpNotSharedDirtyFwd to sharer in UC,UD, or SD state";
SendSnpSharedFwdToSharer, desc="Send a SnpSharedFwd/SnpNotSharedDirtyFwd to a sharer in SC state";
SendSnpOnce, desc="Send a SnpOnce to a sharer";
SendSnpOnceFwd, desc="Send a SnpOnceFwd to a sharer";
SendSnpUnique, desc="Send a SnpUnique to all sharers";
SendSnpUniqueRetToSrc, desc="Send a SnpUnique to all sharers. Sets RetToSrc for only one sharer.";
SendSnpUniqueFwd, desc="Send a SnpUniqueFwd to a single sharer";
SendSnpCleanInvalid, desc="Send a SnpCleanInvalid to all sharers";
SendSnpCleanInvalidNoReq, desc="Send a SnpCleanInvalid to all sharers except requestor";
// Snoop responses
SendSnpData, desc="Send SnpRespData as snoop reply";
SendSnpIResp, desc="Send SnpResp_I as snoop reply";
SendInvSnpResp, desc="Check data state and queue either SendSnpIResp or SendSnpData";
SendSnpUniqueFwdCompData, desc="Send CompData to SnpUniqueFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp";
SendSnpSharedFwdCompData, desc="Send CompData to SnpUniqueFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp";
SendSnpNotSharedDirtyFwdCompData, desc="Send CompData to SnpNotSharedDirtyFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp";
SendSnpOnceFwdCompData, desc="Send CompData to SnpOnceFwd target and queue either SendSnpFwdedData or SendSnpFwdedResp";
SendSnpFwdedData, desc="Send SnpResp for a forwarding snoop";
SendSnpFwdedResp, desc="Send SnpRespData for a forwarding snoop";
// Retry handling
SendRetryAck, desc="Send RetryAck";
SendPCrdGrant, desc="Send PCrdGrant";
DoRetry, desc="Resend the current pending request";
DoRetry_Hazard, desc="DoRetry during a hazard";
// Misc triggers
LoadHit, desc="Complete a load hit";
StoreHit, desc="Complete a store hit";
UseTimeout, desc="Transition from UD_T -> UD";
RestoreFromHazard, desc="Restore from a snoop hazard";
TX_Data, desc="Transmit pending data messages";
MaintainCoherence, desc="Queues a WriteBack or Evict before droping the only valid copy of the block";
FinishCleanUnique, desc="Sends acks and perform any writeback after a CleanUnique";
ActionStalledOnHazard, desc="Stall a trigger action because until finish handling snoop hazard";
// This is triggered once a transaction doesn't have
// any queued action and is not expecting responses/data. The transaction
// is finalized and the next stable state is stored in the cache/directory
// See the processNextState and makeFinalState functions
Final;
null;
}
////////////////////////////////////////////////////////////////////////////
// Data structures
////////////////////////////////////////////////////////////////////////////
// Cache block size
int blockSize, default="RubySystem::getBlockSizeBytes()";
// CacheEntry
structure(CacheEntry, interface="AbstractCacheEntry") {
State state, desc="SLICC line state";
DataBlock DataBlk, desc="data for the block";
bool HWPrefetched, default="false", desc="Set if this cache entry was prefetched";
}
// Directory entry
structure(DirEntry, interface="AbstractCacheEntry", main="false") {
NetDest sharers, desc="All upstream controllers that have this line (includes ownwer)";
MachineID owner, desc="Controller that has the line in UD,UC, or SD state";
bool ownerExists, default="false", desc="true if owner exists";
bool ownerIsExcl, default="false", desc="true if owner is UD or UC";
State state, desc="SLICC line state";
}
// Helper class for tracking expected response and data messages
structure(ExpectedMap, external ="yes") {
void clear(int dataChunks);
void addExpectedRespType(CHIResponseType);
void addExpectedDataType(CHIDataType);
void setExpectedCount(int val);
void addExpectedCount(int val);
bool hasExpected();
bool hasReceivedResp();
bool hasReceivedData();
int expected();
int received();
bool receiveResp(CHIResponseType);
bool receiveData(CHIDataType);
bool receivedDataType(CHIDataType);
bool receivedRespType(CHIResponseType);
}
// Tracks a pending retry
structure(RetryQueueEntry) {
Addr addr, desc="Line address";
MachineID retryDest, desc="Retry destination";
}
// Queue for event triggers. Used to specify a list of actions that need
// to be performed across multiple transitions.
// This class is also used to track pending retries
structure(TriggerQueue, external ="yes") {
Event front();
Event back();
bool frontNB();
bool backNB();
bool empty();
void push(Event);
void pushNB(Event);
void pushFront(Event);
void pushFrontNB(Event);
void pop();
// For the retry queue
void emplace(Addr,MachineID);
RetryQueueEntry next(); //SLICC won't allow to reuse front()
}
// TBE fields
structure(TBE, desc="Transaction buffer entry definition") {
// in which table was this allocated
bool is_req_tbe, desc="Allocated in the request table";
bool is_snp_tbe, desc="Allocated in the snoop table";
bool is_repl_tbe, desc="Allocated in the replacements table";
int storSlot, desc="Slot in the storage tracker occupied by this entry";
// Transaction info mostly extracted from the request message
Addr addr, desc="Line address for this TBE";
Addr accAddr, desc="Access address for Load/Store/WriteUniquePtl; otherwisse == addr";
int accSize, desc="Access size for Load/Store/WriteUniquePtl; otherwisse == blockSize";
CHIRequestType reqType, desc="Request type that initiated this transaction";
MachineID requestor, desc="Requestor ID";
MachineID fwdRequestor, desc="Requestor to receive data on fwding snoops";
bool use_DMT, desc="Use DMT for this transaction";
bool use_DCT, desc="Use DCT for this transaction";
// if either is set prefetchers are not notified on miss/hit/fill and
// demand hit/miss stats are not incremented
bool is_local_pf, desc="Request generated by a local prefetcher";
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
// NOTE: seqReq is a smart pointer pointing to original CPU request object
// that triggers transactions associated with this TBE. seqReq carries some
// information (e.g., PC of requesting instruction, virtual address of this
// request, etc.). Not all transactions have this field set if they are not
// triggered directly by a demand request from CPU.
RequestPtr seqReq, default="nullptr", desc="Pointer to original request from CPU/sequencer";
bool isSeqReqValid, default="false", desc="Set if seqReq is valid (not nullptr)";
// Transaction state information
State state, desc="SLICC line state";
// Transient state information. These are set at the beggining of a
// transactions and updated as data and responses are received. After
// finalizing the transactions these are used to create the next SLICC
// stable state.
bool hasUseTimeout, desc="Line is locked under store/use timeout";
DataBlock dataBlk, desc="Local copy of the line";
WriteMask dataBlkValid, desc="Marks which bytes in the DataBlock are valid";
bool dataValid, desc="Local copy is valid";
bool dataDirty, desc="Local copy is dirtry";
bool dataMaybeDirtyUpstream, desc="Line maybe dirty upstream";
bool dataUnique, desc="Line is unique either locally or upsatream";
bool dataToBeInvalid, desc="Local copy will be invalidated at the end of transaction";
bool dataToBeSharedClean, desc="Local copy will become SC at the end of transaction";
NetDest dir_sharers, desc="Upstream controllers that have the line (includes owner)";
MachineID dir_owner, desc="Owner ID";
bool dir_ownerExists, desc="Owner ID is valid";
bool dir_ownerIsExcl, desc="Owner is UD or UC; SD otherwise";
bool doCacheFill, desc="Write valid data to the cache when completing transaction";
// NOTE: dataMaybeDirtyUpstream and dir_ownerExists are the same except
// when we had just sent dirty data upstream and are waiting for ack to set
// dir_ownerExists
// Helper structures to track expected events and additional transient
// state info
// List of actions to be performed while on a transient state
// See the processNextState function for details
TriggerQueue actions, template="<Cache_Event>", desc="List of actions";
Event pendAction, desc="Current pending action";
Tick delayNextAction, desc="Delay next action until given tick";
State finalState, desc="Final state; set when pendAction==Final";
// List of expected responses and data. Checks the type of data against the
// expected ones for debugging purposes
// See the processNextState function for details
ExpectedMap expected_req_resp, template="<CHIResponseType,CHIDataType>";
ExpectedMap expected_snp_resp, template="<CHIResponseType,CHIDataType>";
bool defer_expected_comp; // expect to receive Comp before the end of transaction
CHIResponseType slicchack1; // fix compiler not including headers
CHIDataType slicchack2; // fix compiler not including headers
// Tracks pending data messages that need to be generated when sending
// a line
bool snd_pendEv, desc="Is there a pending tx event ?";
WriteMask snd_pendBytes, desc="Which bytes are pending transmission";
CHIDataType snd_msgType, desc="Type of message being sent";
MachineID snd_destination, desc="Data destination";
// Tracks how to update the directory when receiving a CompAck
bool updateDirOnCompAck, desc="Update directory on CompAck";
bool requestorToBeOwner, desc="Sets dir_ownerExists";
bool requestorToBeExclusiveOwner, desc="Sets dir_ownerIsExcl";
// NOTE: requestor always added to dir_sharers if updateDirOnCompAck is set
// Set for incoming snoop requests
bool snpNeedsData, desc="Set if snoop requires data as response";
State fwdedState, desc="State of CompData sent due to a forwarding snoop";
bool is_req_hazard, desc="Snoop hazard with an outstanding request";
bool is_repl_hazard, desc="Snoop hazard with an outstanding writeback request";
bool is_stale, desc="Request is now stale because of a snoop hazard";
// Tracks requests sent downstream
CHIRequestType pendReqType, desc="Sent request type";
bool pendReqAllowRetry, desc="Sent request can be retried";
bool rcvdRetryAck, desc="Received a RetryAck";
bool rcvdRetryCredit, desc="Received a PCrdGrant";
// NOTE: the message is retried only after receiving both RetryAck and
// PCrdGrant. A request can be retried only once.
// These are a copy of the retry msg fields in case we need to retry
Addr pendReqAccAddr;
int pendReqAccSize;
NetDest pendReqDest;
bool pendReqD2OrigReq;
bool pendReqRetToSrc;
// This TBE stalled a message and thus we need to call wakeUpBuffers
// at some point
bool wakeup_pending_req;
bool wakeup_pending_snp;
bool wakeup_pending_tgr;
}
// TBE table definition
structure(TBETable, external ="yes") {
TBE lookup(Addr);
void allocate(Addr);
void deallocate(Addr);
bool isPresent(Addr);
}
structure(TBEStorage, external ="yes") {
int size();
int capacity();
int reserved();
int slotsAvailable();
bool areNSlotsAvailable(int n);
void incrementReserved();
void decrementReserved();
int addEntryToNewSlot();
void addEntryToSlot(int slot);
void removeEntryFromSlot(int slot);
}
// Directory memory definition
structure(PerfectCacheMemory, external = "yes") {
void allocate(Addr);
void deallocate(Addr);
DirEntry lookup(Addr);
bool isTagPresent(Addr);
}
// Directory
PerfectCacheMemory directory, template="<Cache_DirEntry>";
// Tracks unique lines locked after a store miss
TimerTable useTimerTable;
// Multiplies sc_lock_base_latency to obtain the lock timeout.
// This is incremented at Profile_Eviction and decays on
// store miss completion
int sc_lock_multiplier, default="0";
// Definitions of the TBE tables
// Main TBE table used for incoming requests
TBETable TBEs, template="<Cache_TBE>", constructor="m_number_of_TBEs";
TBEStorage storTBEs, constructor="this, m_number_of_TBEs";
// TBE table for WriteBack/Evict requests generated by a replacement
// Notice storTBEs will be used when unify_repl_TBEs is set
TBETable replTBEs, template="<Cache_TBE>", constructor="m_unify_repl_TBEs ? m_number_of_TBEs : m_number_of_repl_TBEs";
TBEStorage storReplTBEs, constructor="this, m_number_of_repl_TBEs";
// TBE table for incoming snoops
TBETable snpTBEs, template="<Cache_TBE>", constructor="m_number_of_snoop_TBEs";
TBEStorage storSnpTBEs, constructor="this, m_number_of_snoop_TBEs";
// Retry handling
// Destinations that will be sent PCrdGrant when a TBE becomes available
TriggerQueue retryQueue, template="<Cache_RetryQueueEntry>";
// Pending RetryAck/PCrdGrant/DoRetry
structure(RetryTriggerMsg, interface="Message") {
Addr addr;
Event event;
MachineID retryDest;
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
// Destinations from we received a RetryAck. Sending new requests to these
// destinations will be blocked until a PCrdGrant is received if
// throttle_req_on_retry is set
NetDest destsWaitingRetry;
// Pending transaction actions (generated by TBE:actions)
structure(TriggerMsg, interface="Message") {
Addr addr;
bool from_hazard; // this actions was generate during a snoop hazard
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
// Internal replacement request
structure(ReplacementMsg, interface="Message") {
Addr addr;
Addr from_addr;
int slot; // set only when unify_repl_TBEs is set
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
////////////////////////////////////////////////////////////////////////////
// Input/output port definitions
////////////////////////////////////////////////////////////////////////////
include "CHI-cache-ports.sm";
// CHI-cache-ports.sm also includes CHI-cache-funcs.sm
////////////////////////////////////////////////////////////////////////////
// Actions and transitions
////////////////////////////////////////////////////////////////////////////
include "CHI-cache-actions.sm";
include "CHI-cache-transitions.sm";
}

View File

@@ -0,0 +1,792 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
machine(MachineType:Memory, "Memory controller interface") :
// no explicit modeling of allocation latency like the Caches, so add one
// cycle to the response enqueue latency as default
Cycles response_latency := 2;
Cycles data_latency := 1;
Cycles to_memory_controller_latency := 1;
int data_channel_size;
// Interface to the network
// Note vnet_type is used by Garnet only. "response" type is assumed to
// have data, so use it for data channels and "none" for the rest.
// network="To" for outbound queue; network="From" for inbound
// virtual networks: 0=request, 1=snoop, 2=response, 3=data
MessageBuffer * reqOut, network="To", virtual_network="0", vnet_type="none";
MessageBuffer * snpOut, network="To", virtual_network="1", vnet_type="none";
MessageBuffer * rspOut, network="To", virtual_network="2", vnet_type="none";
MessageBuffer * datOut, network="To", virtual_network="3", vnet_type="response";
MessageBuffer * reqIn, network="From", virtual_network="0", vnet_type="none";
MessageBuffer * snpIn, network="From", virtual_network="1", vnet_type="none";
MessageBuffer * rspIn, network="From", virtual_network="2", vnet_type="none";
MessageBuffer * datIn, network="From", virtual_network="3", vnet_type="response";
// Requests that can allocate a TBE
MessageBuffer * reqRdy;
// Data/ack to/from memory
MessageBuffer * requestToMemory;
MessageBuffer * responseFromMemory;
// Trigger queue for internal events
MessageBuffer * triggerQueue;
{
////////////////////////////////////////////////////////////////////////////
// States
////////////////////////////////////////////////////////////////////////////
state_declaration(State, desc="Transaction states", default="Memory_State_READY") {
// We don't know if the line is cached, so the memory copy is maybe stable
READY, AccessPermission:Backing_Store, desk="Ready to transfer the line";
WAITING_NET_DATA, AccessPermission:Backing_Store_Busy, desc="Waiting data from the network";
SENDING_NET_DATA, AccessPermission:Backing_Store_Busy, desc="Sending data to the network";
READING_MEM, AccessPermission:Backing_Store_Busy, desc="Waiting data from memory";
// Null state for debugging; allow writes
null, AccessPermission:Backing_Store, desc="Null state";
}
////////////////////////////////////////////////////////////////////////////
// Events
////////////////////////////////////////////////////////////////////////////
enumeration(Event, desc="Memory events") {
// Checks if a request can allocate a TBE be moved to reqRdy
CheckAllocTBE;
CheckAllocTBE_WithCredit;
// Requests
WriteNoSnpPtl;
WriteNoSnp;
ReadNoSnp;
ReadNoSnpSep;
// Data
WriteData;
// Memory side
MemoryData;
MemoryAck;
// Internal event triggers
Trigger_Send;
Trigger_SendDone;
Trigger_ReceiveDone;
Trigger_SendRetry;
Trigger_SendPCrdGrant;
}
// Is there a less tedious way to convert messages to events ??
Event reqToEvent (CHIRequestType type) {
if (type == CHIRequestType:WriteNoSnpPtl) {
return Event:WriteNoSnpPtl;
} else if (type == CHIRequestType:WriteNoSnp) {
return Event:WriteNoSnp;
} else if (type == CHIRequestType:ReadNoSnp) {
return Event:ReadNoSnp;
} else if (type == CHIRequestType:ReadNoSnpSep) {
return Event:ReadNoSnpSep;
} else {
error("Invalid CHIRequestType");
}
}
Event respToEvent (CHIResponseType type) {
error("Invalid CHIResponseType");
}
Event dataToEvent (CHIDataType type) {
if (type == CHIDataType:NCBWrData) {
return Event:WriteData;
} else {
error("Invalid CHIDataType");
}
}
////////////////////////////////////////////////////////////////////////////
// Data structures
////////////////////////////////////////////////////////////////////////////
// Cache block size
int blockSize, default="RubySystem::getBlockSizeBytes()";
// TBE fields
structure(TBE, desc="...") {
int storSlot, desc="Slot in the storage tracker occupied by this entry";
Addr addr, desc="Line address for this TBE";
Addr accAddr, desc="Original access address. Set only for Write*Ptl";
int accSize, desc="Access size. Set only for Write*Ptl";
State state, desc="Current line state";
DataBlock dataBlk, desc="Transaction data";
WriteMask dataBlkValid, desc="valid bytes in dataBlk";
int rxtxBytes, desc="Bytes sent or received";
MachineID requestor, desc="Requestor that originated this request";
MachineID destination, desc="Where we are sending data";
bool useDataSepResp, desc="Replies with DataSepResp instead of CompData";
}
structure(TBETable, external ="yes") {
TBE lookup(Addr);
void allocate(Addr);
void deallocate(Addr);
bool isPresent(Addr);
bool areNSlotsAvailable(int n, Tick curTime);
}
structure(TBEStorage, external ="yes") {
int size();
int capacity();
int reserved();
int slotsAvailable();
bool areNSlotsAvailable(int n);
void incrementReserved();
void decrementReserved();
int addEntryToNewSlot();
void removeEntryFromSlot(int slot);
}
TBETable TBEs, template="<Memory_TBE>", constructor="m_number_of_TBEs";
TBEStorage storTBEs, constructor="this, m_number_of_TBEs";
// Tracks all pending MemoryAcks (debug purposes only)
int pendingWrites, default="0";
structure(TriggerMsg, desc="...", interface="Message") {
Addr addr;
Event event;
MachineID retryDest;
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
// Tracks a pending credit request from a retry
structure(RetryQueueEntry) {
Addr addr, desc="Line address";
MachineID retryDest, desc="Retry destination";
}
structure(TriggerQueue, external ="yes") {
void pop();
bool empty();
void emplace(Addr,MachineID);
RetryQueueEntry next();
}
TriggerQueue retryQueue, template="<Memory_RetryQueueEntry>";
////////////////////////////////////////////////////////////////////////////
// External functions
////////////////////////////////////////////////////////////////////////////
Tick clockEdge();
Tick curTick();
Tick cyclesToTicks(Cycles c);
void set_tbe(TBE b);
void unset_tbe();
void wakeUpAllBuffers(Addr a);
bool respondsTo(Addr addr);
////////////////////////////////////////////////////////////////////////////
// Interface functions required by SLICC
////////////////////////////////////////////////////////////////////////////
State getState(TBE tbe, Addr addr) {
if (is_valid(tbe)) {
assert(tbe.addr == addr);
return tbe.state;
} else {
return State:READY;
}
}
void setState(TBE tbe, Addr addr, State state) {
if (is_valid(tbe)) {
assert(tbe.addr == addr);
tbe.state := state;
}
}
AccessPermission getAccessPermission(Addr addr) {
if (respondsTo(addr)) {
TBE tbe := TBEs[addr];
if (is_valid(tbe)) {
DPRINTF(RubySlicc, "%x %s,%s\n", addr, tbe.state, Memory_State_to_permission(tbe.state));
return Memory_State_to_permission(tbe.state);
} else {
DPRINTF(RubySlicc, "%x %s\n", addr, AccessPermission:Backing_Store);
return AccessPermission:Backing_Store;
}
} else {
DPRINTF(RubySlicc, "%x %s\n", addr, AccessPermission:NotPresent);
return AccessPermission:NotPresent;
}
}
void setAccessPermission(Addr addr, State state) {
}
void functionalRead(Addr addr, Packet *pkt, WriteMask &mask) {
if (respondsTo(addr)) {
DPRINTF(RubySlicc, "functionalRead %x\n", addr);
TBE tbe := TBEs[addr];
if (mask.isEmpty()) {
functionalMemoryRead(pkt);
mask.fillMask();
DPRINTF(RubySlicc, "functionalRead mem %x %s\n", addr, mask);
}
// Update with any transient data
//TODO additional handling of partial data ??
if (is_valid(tbe)) {
WriteMask read_mask;
read_mask.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize);
read_mask.andMask(tbe.dataBlkValid);
if (read_mask.isEmpty() == false) {
testAndReadMask(addr, tbe.dataBlk, read_mask, pkt);
DPRINTF(RubySlicc, "functionalRead tbe %x %s %s %s\n", addr, tbe.dataBlk, read_mask, mask);
mask.orMask(read_mask);
}
}
}
}
int functionalWrite(Addr addr, Packet *pkt) {
if(respondsTo(addr)) {
int num_functional_writes := 0;
TBE tbe := TBEs[addr];
if (is_valid(tbe)) {
num_functional_writes := num_functional_writes +
testAndWrite(addr, tbe.dataBlk, pkt);
DPRINTF(RubySlicc, "functionalWrite tbe %x %s\n", addr, tbe.dataBlk);
}
num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt);
DPRINTF(RubySlicc, "functionalWrite mem %x\n", addr);
return num_functional_writes;
} else {
return 0;
}
}
////////////////////////////////////////////////////////////////////////////
// Helper functions
////////////////////////////////////////////////////////////////////////////
void printResources() {
DPRINTF(RubySlicc, "Resources(avail/max): TBEs=%d/%d\n",
storTBEs.size(), storTBEs.capacity());
DPRINTF(RubySlicc, "Resources(in/out size): rdy=%d req=%d/%d rsp=%d/%d dat=%d/%d snp=%d/%d\n",
reqRdy.getSize(curTick()),
reqIn.getSize(curTick()), reqOut.getSize(curTick()),
rspIn.getSize(curTick()), rspOut.getSize(curTick()),
datIn.getSize(curTick()), datOut.getSize(curTick()),
snpIn.getSize(curTick()), snpOut.getSize(curTick()));
}
////////////////////////////////////////////////////////////////////////////
// Input/output port definitions
////////////////////////////////////////////////////////////////////////////
// Outbound port definitions
out_port(reqOutPort, CHIRequestMsg, reqOut);
out_port(snpOutPort, CHIRequestMsg, snpOut);
out_port(rspOutPort, CHIResponseMsg, rspOut);
out_port(datOutPort, CHIDataMsg, datOut);
out_port(triggerOutPort, TriggerMsg, triggerQueue);
out_port(memQueue_out, MemoryMsg, requestToMemory);
out_port(reqRdyOutPort, CHIRequestMsg, reqRdy);
// Inbound port definitions
// Response
in_port(rspInPort, CHIResponseMsg, rspIn, rank=6) {
if (rspInPort.isReady(clockEdge())) {
printResources();
peek(rspInPort, CHIResponseMsg) {
error("Unexpected message");
}
}
}
// Data
in_port(datInPort, CHIDataMsg, datIn, rank=5) {
if (datInPort.isReady(clockEdge())) {
printResources();
peek(datInPort, CHIDataMsg) {
int received := in_msg.bitMask.count();
assert((received <= data_channel_size) && (received > 0));
trigger(dataToEvent(in_msg.type), in_msg.addr, TBEs[in_msg.addr]);
}
}
}
// Data/Ack from memory
in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=4) {
if (memQueue_in.isReady(clockEdge())) {
printResources();
peek(memQueue_in, MemoryMsg) {
Addr addr := makeLineAddress(in_msg.addr);
if (in_msg.Type == MemoryRequestType:MEMORY_READ) {
trigger(Event:MemoryData, addr, TBEs[addr]);
} else if (in_msg.Type == MemoryRequestType:MEMORY_WB) {
trigger(Event:MemoryAck, addr, TBEs[addr]);
} else {
error("Invalid message");
}
}
}
}
// Trigger
in_port(triggerInPort, TriggerMsg, triggerQueue, rank=3) {
if (triggerInPort.isReady(clockEdge())) {
printResources();
peek(triggerInPort, TriggerMsg) {
trigger(in_msg.event, in_msg.addr, TBEs[in_msg.addr]);
}
}
}
// Snoops
in_port(snpInPort, CHIRequestMsg, snpIn, rank=2) {
if (snpInPort.isReady(clockEdge())) {
printResources();
peek(snpInPort, CHIRequestMsg) {
error("Unexpected message");
}
}
}
// Requests
in_port(reqRdyInPort, CHIRequestMsg, reqRdy, rank=1) {
if (reqRdyInPort.isReady(clockEdge())) {
printResources();
peek(reqRdyInPort, CHIRequestMsg) {
trigger(reqToEvent(in_msg.type), in_msg.addr, TBEs[in_msg.addr]);
}
}
}
in_port(reqInPort, CHIRequestMsg, reqIn, rank=0) {
if (reqInPort.isReady(clockEdge())) {
printResources();
peek(reqInPort, CHIRequestMsg) {
if (in_msg.allowRetry) {
trigger(Event:CheckAllocTBE, in_msg.addr, TBEs[in_msg.addr]);
} else {
// Only expected requests that do not allow retry are the ones that
// are being retried after receiving credit
trigger(Event:CheckAllocTBE_WithCredit,
in_msg.addr, TBEs[in_msg.addr]);
}
}
}
}
////////////////////////////////////////////////////////////////////////////
// Actions
////////////////////////////////////////////////////////////////////////////
action(checkAllocateTBE, desc="") {
// Move to reqRdy if resources available, otherwise send retry
if (storTBEs.areNSlotsAvailable(1)) {
// reserve a slot for this request
storTBEs.incrementReserved();
peek(reqInPort, CHIRequestMsg) {
enqueue(reqRdyOutPort, CHIRequestMsg, 0) {
out_msg := in_msg;
}
}
} else {
peek(reqInPort, CHIRequestMsg) {
assert(in_msg.allowRetry);
enqueue(triggerOutPort, TriggerMsg, 0) {
out_msg.addr := in_msg.addr;
out_msg.event := Event:Trigger_SendRetry;
out_msg.retryDest := in_msg.requestor;
retryQueue.emplace(in_msg.addr,in_msg.requestor);
}
}
}
reqInPort.dequeue(clockEdge());
}
action(checkAllocateTBE_withCredit, desc="") {
// We must have reserved resources for this request
peek(reqInPort, CHIRequestMsg) {
assert(in_msg.allowRetry == false);
enqueue(reqRdyOutPort, CHIRequestMsg, 0) {
out_msg := in_msg;
}
}
reqInPort.dequeue(clockEdge());
}
action(allocateTBE, "atbe", desc="Allocate TBEs for a miss") {
// We must have reserved resources for this allocation
storTBEs.decrementReserved();
assert(storTBEs.areNSlotsAvailable(1));
TBEs.allocate(address);
set_tbe(TBEs[address]);
tbe.storSlot := storTBEs.addEntryToNewSlot();
tbe.addr := address;
tbe.rxtxBytes := 0;
tbe.useDataSepResp := false;
}
action(initializeFromReqTBE, "itbe", desc="Initialize TBE fields") {
peek(reqRdyInPort, CHIRequestMsg) {
tbe.requestor := in_msg.requestor;
if (in_msg.dataToFwdRequestor) {
tbe.destination := in_msg.fwdRequestor;
} else {
tbe.destination := in_msg.requestor;
}
tbe.accAddr := in_msg.accAddr;
tbe.accSize := in_msg.accSize;
}
}
action(decWritePending, "dwp", desc="Decrement pending writes") {
assert(pendingWrites >= 1);
pendingWrites := pendingWrites - 1;
}
action(deallocateTBE, "dtbe", desc="Deallocate TBEs") {
assert(is_valid(tbe));
storTBEs.removeEntryFromSlot(tbe.storSlot);
TBEs.deallocate(address);
unset_tbe();
// send credit if requestor waiting for it
if (retryQueue.empty() == false) {
assert(storTBEs.areNSlotsAvailable(1));
storTBEs.incrementReserved();
RetryQueueEntry e := retryQueue.next();
retryQueue.pop();
enqueue(triggerOutPort, TriggerMsg, 0) {
out_msg.addr := e.addr;
out_msg.retryDest := e.retryDest;
out_msg.event := Event:Trigger_SendPCrdGrant;
}
}
}
action(sendReadReceipt, "sRR", desc="Send receipt to requestor") {
assert(is_valid(tbe));
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
out_msg.addr := address;
out_msg.type := CHIResponseType:ReadReceipt;
out_msg.responder := machineID;
out_msg.Destination.add(tbe.requestor);
}
// also send different type of data when ready
tbe.useDataSepResp := true;
}
action(sendCompDBIDResp, "sCbid", desc="Send ack to requestor") {
assert(is_valid(tbe));
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
out_msg.addr := address;
out_msg.type := CHIResponseType:CompDBIDResp;
out_msg.responder := machineID;
out_msg.Destination.add(tbe.requestor);
}
}
action(sendMemoryRead, "smr", desc="Send request to memory") {
assert(is_valid(tbe));
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := address;
out_msg.Type := MemoryRequestType:MEMORY_READ;
out_msg.Sender := tbe.requestor;
out_msg.MessageSize := MessageSizeType:Request_Control;
out_msg.Len := 0;
}
}
action(sendMemoryWrite, "smw", desc="Send request to memory") {
assert(is_valid(tbe));
enqueue(memQueue_out, MemoryMsg, to_memory_controller_latency) {
out_msg.addr := tbe.accAddr;
out_msg.Type := MemoryRequestType:MEMORY_WB;
out_msg.Sender := tbe.requestor;
out_msg.MessageSize := MessageSizeType:Writeback_Data;
out_msg.DataBlk := tbe.dataBlk;
out_msg.Len := tbe.accSize;
}
tbe.dataBlkValid.clear();
pendingWrites := pendingWrites + 1;
}
action(prepareSend, "ps", desc="Copies received memory data to TBE") {
assert(is_valid(tbe));
peek(memQueue_in, MemoryMsg) {
tbe.dataBlk := in_msg.DataBlk;
}
tbe.rxtxBytes := 0;
tbe.dataBlkValid.setMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize);
}
action(copyWriteDataToTBE, "cpWDat", desc="Copies received net data to TBE") {
peek(datInPort, CHIDataMsg) {
assert(is_valid(tbe));
tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask);
tbe.dataBlkValid.orMask(in_msg.bitMask);
tbe.rxtxBytes := tbe.rxtxBytes + in_msg.bitMask.count();
}
}
action(sendDataAndCheck, "sd", desc="Send received data to requestor") {
assert(is_valid(tbe));
assert(tbe.rxtxBytes < blockSize);
enqueue(datOutPort, CHIDataMsg, data_latency) {
out_msg.addr := tbe.addr;
if (tbe.useDataSepResp) {
out_msg.type := CHIDataType:DataSepResp_UC;
} else {
out_msg.type := CHIDataType:CompData_UC;
}
out_msg.dataBlk := tbe.dataBlk;
// Called in order for the whole block so use rxtxBytes as offset
out_msg.bitMask.setMask(tbe.rxtxBytes, data_channel_size);
out_msg.Destination.add(tbe.destination);
}
//DPRINTF(RubySlicc, "rxtxBytes=%d\n", tbe.rxtxBytes);
tbe.rxtxBytes := tbe.rxtxBytes + data_channel_size;
// end or send next chunk next cycle
Event next := Event:Trigger_SendDone;
Cycles delay := intToCycles(0);
if (tbe.rxtxBytes < blockSize) {
next := Event:Trigger_Send;
delay := intToCycles(1);
}
enqueue(triggerOutPort, TriggerMsg, delay) {
out_msg.addr := address;
out_msg.event := next;
}
}
action(checkForReceiveCompletion, "cWc", desc="Check if all data is received") {
assert(is_valid(tbe));
DPRINTF(RubySlicc, "rxtxBytes=%d\n", tbe.rxtxBytes);
assert((tbe.rxtxBytes <= tbe.accSize) && (tbe.rxtxBytes > 0));
if (tbe.rxtxBytes == tbe.accSize) {
enqueue(triggerOutPort, TriggerMsg, 0) {
out_msg.addr := address;
out_msg.event := Event:Trigger_ReceiveDone;
}
tbe.rxtxBytes := 0;
assert(tbe.dataBlkValid.getMask(addressOffset(tbe.accAddr, tbe.addr), tbe.accSize));
}
}
action(popReqInQueue, "preq", desc="Pop request queue.") {
reqRdyInPort.dequeue(clockEdge());
}
action(popDataInQueue, "pdata", desc="Pop data queue.") {
datInPort.dequeue(clockEdge());
}
action(popTriggerQueue, "ptrigger", desc="Pop trigger queue.") {
triggerInPort.dequeue(clockEdge());
}
action(popMemoryQueue, "pmem", desc="Pop memory queue.") {
memQueue_in.dequeue(clockEdge());
}
// Stall/wake-up only used for requests that arrive when we are on the
// WAITING_NET_DATA state. For all other case the line should be either
// ready or we can overlap
action(stallRequestQueue, "str", desc="Stall and wait on the address") {
peek(reqRdyInPort, CHIRequestMsg){
stall_and_wait(reqRdyInPort, address);
}
}
action(wakeUpStalled, "wa", desc="Wake up any requests waiting for this address") {
wakeUpAllBuffers(address);
}
action(sendRetryAck, desc="") {
peek(triggerInPort, TriggerMsg) {
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
out_msg.addr := in_msg.addr;
out_msg.type := CHIResponseType:RetryAck;
out_msg.responder := machineID;
out_msg.Destination.add(in_msg.retryDest);
}
}
}
action(sendPCrdGrant, desc="") {
peek(triggerInPort, TriggerMsg) {
enqueue(rspOutPort, CHIResponseMsg, response_latency) {
out_msg.addr := in_msg.addr;
out_msg.type := CHIResponseType:PCrdGrant;
out_msg.responder := machineID;
out_msg.Destination.add(in_msg.retryDest);
}
}
}
////////////////////////////////////////////////////////////////////////////
// Transitions
////////////////////////////////////////////////////////////////////////////
transition(READY, ReadNoSnp, READING_MEM) {
allocateTBE;
initializeFromReqTBE;
sendMemoryRead;
popReqInQueue;
}
transition(READY, ReadNoSnpSep, READING_MEM) {
allocateTBE;
initializeFromReqTBE;
sendMemoryRead;
sendReadReceipt;
popReqInQueue;
}
transition(READING_MEM, MemoryData, SENDING_NET_DATA) {
prepareSend;
sendDataAndCheck;
popMemoryQueue;
}
transition(SENDING_NET_DATA, Trigger_Send) {
sendDataAndCheck;
popTriggerQueue;
}
transition(READY, WriteNoSnpPtl, WAITING_NET_DATA) {
allocateTBE;
initializeFromReqTBE;
sendCompDBIDResp;
popReqInQueue;
}
transition(READY, WriteNoSnp, WAITING_NET_DATA) {
allocateTBE;
initializeFromReqTBE;
sendCompDBIDResp;
popReqInQueue;
}
transition(WAITING_NET_DATA, WriteData) {
copyWriteDataToTBE;
checkForReceiveCompletion;
popDataInQueue;
}
transition(WAITING_NET_DATA, Trigger_ReceiveDone, READY) {
sendMemoryWrite;
deallocateTBE;
wakeUpStalled;
popTriggerQueue;
}
transition(SENDING_NET_DATA, Trigger_SendDone, READY) {
deallocateTBE;
wakeUpStalled;
popTriggerQueue;
}
// Just sanity check against counter of pending acks
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY},
MemoryAck) {
decWritePending;
popMemoryQueue;
}
// Notice we only use this here and call wakeUp when leaving this state
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA},
{ReadNoSnp, ReadNoSnpSep, WriteNoSnpPtl}) {
stallRequestQueue;
}
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY},
Trigger_SendRetry) {
sendRetryAck;
popTriggerQueue;
}
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY},
Trigger_SendPCrdGrant) {
sendPCrdGrant;
popTriggerQueue;
}
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY},
CheckAllocTBE) {
checkAllocateTBE;
}
transition({READING_MEM,WAITING_NET_DATA,SENDING_NET_DATA,READY},
CheckAllocTBE_WithCredit) {
checkAllocateTBE_withCredit;
}
}

View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2021 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// All CHI request and response types match the name style in the standard doc.
// For a description of a specific message type, refer to the Arm's AMBA 5
// CHI specification (issue D):
// https://static.docs.arm.com/ihi0050/d/
// IHI0050D_amba_5_chi_architecture_spec.pdf
enumeration(CHIRequestType, desc="") {
// Incoming requests generated by the sequencer
Load;
Store;
StoreLine;
// CHI request types
ReadShared;
ReadNotSharedDirty;
ReadUnique;
ReadOnce;
CleanUnique;
Evict;
WriteBackFull;
WriteCleanFull;
WriteEvictFull;
WriteUniquePtl;
WriteUniqueFull;
SnpSharedFwd;
SnpNotSharedDirtyFwd;
SnpUniqueFwd;
SnpOnceFwd;
SnpOnce;
SnpShared;
SnpUnique;
SnpCleanInvalid;
WriteNoSnpPtl;
WriteNoSnp;
ReadNoSnp;
ReadNoSnpSep;
null;
}
structure(CHIRequestMsg, desc="", interface="Message") {
Addr addr, desc="Request line address";
Addr accAddr, desc="Original access address. Set for Write*Ptl and requests from the sequencer";
int accSize, desc="Access size. Set for Write*Ptl and requests from the sequencer";
CHIRequestType type, desc="Request type";
MachineID requestor, desc="Requestor ID";
MachineID fwdRequestor, desc="Where to send data for DMT/DCT requests";
bool dataToFwdRequestor, desc="Data has to be forwarded to fwdRequestor";
bool retToSrc, desc="Affects whether or not a snoop resp returns data";
bool allowRetry, desc="This request can be retried";
NetDest Destination, desc="Message destination";
RequestPtr seqReq, default="nullptr", desc="Pointer to original request from CPU/sequencer (nullptr if not valid)";
bool isSeqReqValid, default="false", desc="Set if seqReq is valid (not nullptr)";
bool is_local_pf, desc="Request generated by a local prefetcher";
bool is_remote_pf, desc="Request generated a prefetcher in another cache";
MessageSizeType MessageSize, default="MessageSizeType_Control";
// No data for functional access
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
enumeration(CHIResponseType, desc="...") {
// CHI response types
Comp_I;
Comp_UC;
Comp_SC;
CompAck;
CompDBIDResp;
DBIDResp;
Comp;
ReadReceipt;
RespSepData;
SnpResp_I;
SnpResp_I_Fwded_UC;
SnpResp_I_Fwded_UD_PD;
SnpResp_SC;
SnpResp_SC_Fwded_SC;
SnpResp_SC_Fwded_SD_PD;
SnpResp_UC_Fwded_I;
SnpResp_UD_Fwded_I;
SnpResp_SC_Fwded_I;
SnpResp_SD_Fwded_I;
RetryAck;
PCrdGrant;
null;
}
structure(CHIResponseMsg, desc="", interface="Message") {
Addr addr, desc="Line address";
CHIResponseType type, desc="Response type";
MachineID responder, desc="Responder ID";
NetDest Destination, desc="Response destination";
bool stale, desc="Response to a stale request";
//NOTE: not in CHI and for debuging only
MessageSizeType MessageSize, default="MessageSizeType_Control";
// No data for functional access
bool functionalRead(Packet *pkt) { return false; }
bool functionalRead(Packet *pkt, WriteMask &mask) { return false; }
bool functionalWrite(Packet *pkt) { return false; }
}
enumeration(CHIDataType, desc="...") {
// CHI data response types
CompData_I;
CompData_UC;
CompData_SC;
CompData_UD_PD;
CompData_SD_PD;
DataSepResp_UC;
CBWrData_UC;
CBWrData_SC;
CBWrData_UD_PD;
CBWrData_SD_PD;
CBWrData_I;
NCBWrData;
SnpRespData_I;
SnpRespData_I_PD;
SnpRespData_SC;
SnpRespData_SC_PD;
SnpRespData_SD;
SnpRespData_UC;
SnpRespData_UD;
SnpRespData_SC_Fwded_SC;
SnpRespData_SC_Fwded_SD_PD;
SnpRespData_SC_PD_Fwded_SC;
SnpRespData_I_Fwded_SD_PD;
SnpRespData_I_PD_Fwded_SC;
SnpRespData_I_Fwded_SC;
null;
}
structure(CHIDataMsg, desc="", interface="Message") {
Addr addr, desc="Line address";
CHIDataType type, desc="Response type";
MachineID responder, desc="Responder ID";
NetDest Destination, desc="Response destination";
DataBlock dataBlk, desc="Line data";
WriteMask bitMask, desc="Which bytes in the data block are valid";
MessageSizeType MessageSize, default="MessageSizeType_Data";
bool functionalRead(Packet *pkt) {
if(bitMask.isFull()) {
return testAndRead(addr, dataBlk, pkt);
} else {
return false;
}
}
bool functionalRead(Packet *pkt, WriteMask &mask) {
// read if bitmask has bytes not in mask or if data is dirty
bool is_dirty := (type == CHIDataType:CompData_UD_PD) ||
(type == CHIDataType:CompData_SD_PD) ||
(type == CHIDataType:CBWrData_UD_PD) ||
(type == CHIDataType:CBWrData_SD_PD) ||
(type == CHIDataType:NCBWrData) ||
(type == CHIDataType:SnpRespData_I_PD) ||
(type == CHIDataType:SnpRespData_SC_PD) ||
(type == CHIDataType:SnpRespData_SD) ||
(type == CHIDataType:SnpRespData_UD) ||
(type == CHIDataType:SnpRespData_SC_Fwded_SD_PD) ||
(type == CHIDataType:SnpRespData_SC_PD_Fwded_SC) ||
(type == CHIDataType:SnpRespData_I_Fwded_SD_PD) ||
(type == CHIDataType:SnpRespData_I_PD_Fwded_SC);
assert(bitMask.isEmpty() == false);
WriteMask test_mask := mask;
test_mask.orMask(bitMask);
if ((test_mask.cmpMask(mask) == false) || is_dirty) {
if (testAndReadMask(addr, dataBlk, bitMask, pkt)) {
mask.orMask(bitMask);
return true;
}
}
return false;
}
bool functionalWrite(Packet *pkt) {
return testAndWrite(addr, dataBlk, pkt);
}
}

View File

@@ -0,0 +1,6 @@
protocol "CHI";
include "RubySlicc_interfaces.slicc";
include "CHI-msg.sm";
include "CHI-cache.sm";
include "CHI-mem.sm";

View File

@@ -0,0 +1,47 @@
# -*- mode:python -*-
# Copyright (c) 2021 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Import('*')
# Register this protocol with gem5/SCons
all_protocols.append('CHI')
# CHI requires Ruby's inerface to support partial functional reads
need_partial_func_reads.append('CHI')
protocol_dirs.append(Dir('.').abspath)

View File

@@ -45,9 +45,6 @@ if env['PROTOCOL'] == 'None':
env.Append(CPPDEFINES=['PROTOCOL_' + env['PROTOCOL']])
# list of protocols that require the partial functional read interface
need_partial_func_reads = []
if env['PROTOCOL'] in need_partial_func_reads:
env.Append(CPPDEFINES=['PARTIAL_FUNC_READS'])

View File

@@ -368,7 +368,7 @@ def createCxxConfigDirectoryEntryFile(code, name, simobj, is_header):
if not is_header:
code('{')
if hasattr(simobj, 'abstract') and simobj.abstract:
if getattr(simobj, 'abstract', False):
code(' return NULL;')
else:
code(' return this->create();')
@@ -700,6 +700,80 @@ class MetaSimObject(type):
def pybind_predecls(cls, code):
code('#include "${{cls.cxx_header}}"')
def cxx_param_def(cls, code):
code('''
#include <type_traits>
#include "base/compiler.hh"
#include "${{cls.cxx_header}}"
#include "params/${cls}.hh"
''')
code()
code('namespace')
code('{')
code()
# If we can't define a default create() method for this params struct
# because the SimObject doesn't have the right constructor, use
# template magic to make it so we're actually defining a create method
# for this class instead.
code('class Dummy${cls}ParamsClass')
code('{')
code(' public:')
code(' ${{cls.cxx_class}} *create() const;')
code('};')
code()
code('template <class CxxClass, class Enable=void>')
code('class Dummy${cls}Shunt;')
code()
# This version directs to the real Params struct and the default
# behavior of create if there's an appropriate constructor.
code('template <class CxxClass>')
code('class Dummy${cls}Shunt<CxxClass, std::enable_if_t<')
code(' std::is_constructible<CxxClass,')
code(' const ${cls}Params &>::value>>')
code('{')
code(' public:')
code(' using Params = ${cls}Params;')
code(' static ${{cls.cxx_class}} *')
code(' create(const Params &p)')
code(' {')
code(' return new CxxClass(p);')
code(' }')
code('};')
code()
# This version diverts to the DummyParamsClass and a dummy
# implementation of create if the appropriate constructor does not
# exist.
code('template <class CxxClass>')
code('class Dummy${cls}Shunt<CxxClass, std::enable_if_t<')
code(' !std::is_constructible<CxxClass,')
code(' const ${cls}Params &>::value>>')
code('{')
code(' public:')
code(' using Params = Dummy${cls}ParamsClass;')
code(' static ${{cls.cxx_class}} *')
code(' create(const Params &p)')
code(' {')
code(' return nullptr;')
code(' }')
code('};')
code()
code('} // anonymous namespace')
code()
# An implementation of either the real Params struct's create
# method, or the Dummy one. Either an implementation is
# mandantory since this was shunted off to the dummy class, or
# one is optional which will override this weak version.
code('M5_VAR_USED ${{cls.cxx_class}} *')
code('Dummy${cls}Shunt<${{cls.cxx_class}}>::Params::create() const')
code('{')
code(' return Dummy${cls}Shunt<${{cls.cxx_class}}>::')
code(' create(*this);')
code('}')
def pybind_decl(cls, code):
py_class_name = cls.pybind_class
@@ -713,9 +787,6 @@ class MetaSimObject(type):
code('''#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include <type_traits>
#include "base/compiler.hh"
#include "params/$cls.hh"
#include "python/pybind11/core.hh"
#include "sim/init.hh"
@@ -797,76 +868,6 @@ module_init(py::module_ &m_internal)
code()
code('static EmbeddedPyBind embed_obj("${0}", module_init, "${1}");',
cls, cls._base.type if cls._base else "")
if not hasattr(cls, 'abstract') or not cls.abstract:
if 'type' in cls.__dict__:
code()
# This namespace can't *actually* be anonymous, or the compiler
# gets upset about having a weak symbol init.
code('namespace anonymous_params')
code('{')
code()
# If we can't define a default create() method for this params
# struct because the SimObject doesn't have the right
# constructor, use template magic to make it so we're actually
# defining a create method for this class instead.
code('class Dummy${cls}ParamsClass')
code('{')
code(' public:')
code(' ${{cls.cxx_class}} *create() const;')
code('};')
code()
code('template <class CxxClass, class Enable=void>')
code('class DummyShunt;')
code()
# This version directs to the real Params struct and the
# default behavior of create if there's an appropriate
# constructor.
code('template <class CxxClass>')
code('class DummyShunt<CxxClass, std::enable_if_t<')
code(' std::is_constructible<CxxClass,')
code(' const ${cls}Params &>::value>>')
code('{')
code(' public:')
code(' using Params = ${cls}Params;')
code(' static ${{cls.cxx_class}} *')
code(' create(const Params &p)')
code(' {')
code(' return new CxxClass(p);')
code(' }')
code('};')
code()
# This version diverts to the DummyParamsClass and a dummy
# implementation of create if the appropriate constructor does
# not exist.
code('template <class CxxClass>')
code('class DummyShunt<CxxClass, std::enable_if_t<')
code(' !std::is_constructible<CxxClass,')
code(' const ${cls}Params &>::value>>')
code('{')
code(' public:')
code(' using Params = Dummy${cls}ParamsClass;')
code(' static ${{cls.cxx_class}} *')
code(' create(const Params &p)')
code(' {')
code(' return nullptr;')
code(' }')
code('};')
code()
code('} // namespace anonymous_params')
code()
code('using namespace anonymous_params;')
code()
# A weak implementation of either the real Params struct's
# create method, or the Dummy one if we don't want to have
# any default implementation. Either an implementation is
# mandantory since this was shunted off to the dummy class, or
# one is optional which will override this weak version.
code('M5_WEAK ${{cls.cxx_class}} *')
code('DummyShunt<${{cls.cxx_class}}>::Params::create() const')
code('{')
code(' return DummyShunt<${{cls.cxx_class}}>::')
code(' create(*this);')
code('}')
_warned_about_nested_templates = False

View File

@@ -230,7 +230,7 @@ def _prepare_stats(group: _m5.stats.Group):
for stat in group.getStats():
stat.prepare()
for child in getStatGroups().values():
for child in group.getStatGroups().values():
_prepare_stats(child)

View File

@@ -166,7 +166,7 @@ Root::timeSyncSpinThreshold(Time newThreshold)
timeSyncEnable(en);
}
Root::Root(const RootParams &p)
Root::Root(const RootParams &p, int)
: SimObject(p), _enabled(false), _periodTick(p.time_sync_period),
syncEvent([this]{ timeSync(); }, name())
{
@@ -216,5 +216,5 @@ RootParams::create() const
FullSystem = full_system;
FullSystemInt = full_system ? 1 : 0;
return new Root(*this);
return new Root(*this, 0);
}

View File

@@ -134,7 +134,9 @@ class Root : public SimObject
PARAMS(Root);
Root(const Params &p);
// The int parameter is ignored, it's just so we can define a custom
// create() method.
Root(const Params &p, int);
/** Schedule the timesync event at startup().
*/

View File

@@ -54,7 +54,7 @@ bool Kernel::endOfSimulationComplete() { return endComplete; }
sc_core::sc_status Kernel::status() { return _status; }
void Kernel::status(sc_core::sc_status s) { _status = s; }
Kernel::Kernel(const Params &params) :
Kernel::Kernel(const Params &params, int) :
SimObject(params), t0Event(this, false, EventBase::Default_Pri - 1)
{
// Install ourselves as the scheduler's event manager.
@@ -187,6 +187,6 @@ SystemC_KernelParams::create() const
{
panic_if(sc_gem5::kernel,
"Only one systemc kernel object may be defined.\n");
sc_gem5::kernel = new sc_gem5::Kernel(*this);
sc_gem5::kernel = new sc_gem5::Kernel(*this, 0);
return sc_gem5::kernel;
}

View File

@@ -46,7 +46,7 @@ class Kernel : public SimObject
{
public:
typedef SystemC_KernelParams Params;
Kernel(const Params &params);
Kernel(const Params &params, int);
void init() override;
void regStats() override;

View File

@@ -55,7 +55,7 @@ env.Append(CPPPATH=[gem5_root + '/build/' + gem5_arch,
'#examples/common',
])
env.Append(CXXFLAGS=['-std=c++11',
env.Append(CXXFLAGS=['-std=c++14',
'-DSC_INCLUDE_DYNAMIC_PROCESSES',
'-DTRACING_ON',
])
@@ -78,6 +78,7 @@ sys.path.append(gem5_root + '/src/python')
AddOption('--no-colors', dest='use_colors', action='store_false',
help="Don't add color to abbreviated scons output")
main.SConsignFile('build/systemc/sconsign')
SConscript(gem5_root + '/ext/systemc/SConscript',
variant_dir='build/systemc',
exports='main')

View File

@@ -93,8 +93,7 @@ SCMasterPort::SCMasterPort(const std::string& name_,
transactor(nullptr),
simControl(simControl)
{
system =
dynamic_cast<const ExternalMasterParams*>(owner_.params())->system;
system = dynamic_cast<const ExternalMasterParams&>(owner_.params()).system;
}
void

View File

@@ -301,7 +301,7 @@ SCSlavePort::pec(
packet->makeResponse();
}
if (packet->isResponse()) {
need_retry = !bridgeResponsePort.sendTimingResp(packet);
need_retry = !sendTimingResp(packet);
}
if (need_retry) {