python: Apply Black formatter to Python files

The command executed was `black src configs tests util`.

Change-Id: I8dfaa6ab04658fea37618127d6ac19270028d771
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/47024
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Bobby R. Bruce
2022-07-05 11:02:25 -07:00
committed by Giacomo Travaglini
parent 1cfaa8da83
commit 787204c92d
980 changed files with 35668 additions and 22233 deletions

View File

@@ -34,7 +34,7 @@ from m5.defines import buildEnv
from m5.util import addToPath, convert
from .CntrlBase import *
addToPath('../')
addToPath("../")
from topologies.Cluster import Cluster
@@ -44,23 +44,27 @@ from topologies.Cluster import Cluster
class L1Cache(RubyCache):
latency = 1
resourceStalls = False
def create(self, size, assoc, options):
self.size = MemorySize(size)
self.assoc = assoc
self.replacement_policy = TreePLRURP()
#
# Note: the L2 Cache latency is not currently used
#
class L2Cache(RubyCache):
latency = 10
resourceStalls = False
def create(self, size, assoc, options):
self.size = MemorySize(size)
self.assoc = assoc
self.replacement_policy = TreePLRURP()
class CPCntrl(AMD_Base_Controller, CntrlBase):
class CPCntrl(AMD_Base_Controller, CntrlBase):
def create(self, options, ruby_system, system):
self.version = self.versionCount()
self.cntrl_id = self.cntrlCount()
@@ -96,16 +100,20 @@ class CPCntrl(AMD_Base_Controller, CntrlBase):
if options.recycle_latency:
self.recycle_latency = options.recycle_latency
def define_options(parser):
parser.add_argument("--cpu-to-dir-latency", type=int, default=15)
def construct(options, system, ruby_system):
if buildEnv['PROTOCOL'] != 'GPU_VIPER':
panic("This script requires VIPER based protocols \
to be built.")
if buildEnv["PROTOCOL"] != "GPU_VIPER":
panic(
"This script requires VIPER based protocols \
to be built."
)
cpu_sequencers = []
cpuCluster = None
cpuCluster = Cluster(name="CPU Cluster", extBW = 8, intBW=8) # 16 GB/s
cpuCluster = Cluster(name="CPU Cluster", extBW=8, intBW=8) # 16 GB/s
for i in range((options.num_cpus + 1) // 2):
cp_cntrl = CPCntrl()

View File

@@ -38,33 +38,42 @@ from m5.objects import *
from m5.defines import buildEnv
from .Ruby import create_topology
def define_options(parser):
parser.add_argument("--chi-config", action="store", type=str,
default=None,
help="NoC config. parameters and bindings. "
"Required for CustomMesh topology")
parser.add_argument(
"--chi-config",
action="store",
type=str,
default=None,
help="NoC config. parameters and bindings. "
"Required for CustomMesh topology",
)
parser.add_argument("--enable-dvm", default=False, action="store_true")
def read_config_file(file):
''' Read file as a module and return it '''
""" Read file as a module and return it """
import types
import importlib.machinery
loader = importlib.machinery.SourceFileLoader('chi_configs', file)
loader = importlib.machinery.SourceFileLoader("chi_configs", file)
chi_configs = types.ModuleType(loader.name)
loader.exec_module(chi_configs)
return chi_configs
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'CHI':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "CHI":
m5.panic("This script requires the CHI build")
if options.num_dirs < 1:
m5.fatal('--num-dirs must be at least 1')
m5.fatal("--num-dirs must be at least 1")
if options.num_l3caches < 1:
m5.fatal('--num-l3caches must be at least 1')
m5.fatal("--num-l3caches must be at least 1")
if full_system and options.enable_dvm:
if len(cpus) <= 1:
@@ -76,8 +85,8 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# read specialized classes from config file if provided
if options.chi_config:
chi_defs = read_config_file(options.chi_config)
elif options.topology == 'CustomMesh':
m5.fatal('--noc-config must be provided if topology is CustomMesh')
elif options.topology == "CustomMesh":
m5.fatal("--noc-config must be provided if topology is CustomMesh")
else:
# Use the defaults from CHI_config
from . import CHI_config as chi_defs
@@ -93,7 +102,6 @@ def create_system(options, full_system, system, dma_ports, bootmem,
CHI_RNI_DMA = chi_defs.CHI_RNI_DMA
CHI_RNI_IO = chi_defs.CHI_RNI_IO
# Declare caches and controller types used by the protocol
# Notice tag and data accesses are not concurrent, so the a cache hit
# latency = tag + data + response latencies.
@@ -127,7 +135,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
assoc = options.l3_assoc
# other functions use system.cache_line_size assuming it has been set
assert(system.cache_line_size.value == options.cacheline_size)
assert system.cache_line_size.value == options.cacheline_size
cpu_sequencers = []
mem_cntrls = []
@@ -138,10 +146,17 @@ def create_system(options, full_system, system, dma_ports, bootmem,
all_cntrls = []
# Creates on RNF per cpu with priv l2 caches
assert(len(cpus) == options.num_cpus)
ruby_system.rnf = [ CHI_RNF([cpu], ruby_system, L1ICache, L1DCache,
system.cache_line_size.value)
for cpu in cpus ]
assert len(cpus) == options.num_cpus
ruby_system.rnf = [
CHI_RNF(
[cpu],
ruby_system,
L1ICache,
L1DCache,
system.cache_line_size.value,
)
for cpu in cpus
]
for rnf in ruby_system.rnf:
rnf.addPrivL2Cache(L2Cache)
cpu_sequencers.extend(rnf.getSequencers())
@@ -150,20 +165,20 @@ def create_system(options, full_system, system, dma_ports, bootmem,
network_cntrls.extend(rnf.getNetworkSideControllers())
# Creates one Misc Node
ruby_system.mn = [ CHI_MN(ruby_system, [cpu.l1d for cpu in cpus]) ]
ruby_system.mn = [CHI_MN(ruby_system, [cpu.l1d for cpu in cpus])]
for mn in ruby_system.mn:
all_cntrls.extend(mn.getAllControllers())
network_nodes.append(mn)
network_cntrls.extend(mn.getNetworkSideControllers())
assert(mn.getAllControllers() == mn.getNetworkSideControllers())
assert mn.getAllControllers() == mn.getNetworkSideControllers()
# Look for other memories
other_memories = []
if bootmem:
other_memories.append(bootmem)
if getattr(system, 'sram', None):
other_memories.append(getattr(system, 'sram', None))
on_chip_mem_ports = getattr(system, '_on_chip_mem_ports', None)
if getattr(system, "sram", None):
other_memories.append(getattr(system, "sram", None))
on_chip_mem_ports = getattr(system, "_on_chip_mem_ports", None)
if on_chip_mem_ports:
other_memories.extend([p.simobj for p in on_chip_mem_ports])
@@ -174,15 +189,16 @@ def create_system(options, full_system, system, dma_ports, bootmem,
sysranges.append(m.range)
hnf_list = [i for i in range(options.num_l3caches)]
CHI_HNF.createAddrRanges(sysranges, system.cache_line_size.value,
hnf_list)
ruby_system.hnf = [ CHI_HNF(i, ruby_system, HNFCache, None)
for i in range(options.num_l3caches) ]
CHI_HNF.createAddrRanges(sysranges, system.cache_line_size.value, hnf_list)
ruby_system.hnf = [
CHI_HNF(i, ruby_system, HNFCache, None)
for i in range(options.num_l3caches)
]
for hnf in ruby_system.hnf:
network_nodes.append(hnf)
network_cntrls.extend(hnf.getNetworkSideControllers())
assert(hnf.getAllControllers() == hnf.getNetworkSideControllers())
assert hnf.getAllControllers() == hnf.getNetworkSideControllers()
all_cntrls.extend(hnf.getAllControllers())
hnf_dests.extend(hnf.getAllControllers())
@@ -190,31 +206,34 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Notice we don't define a Directory_Controller type so we don't use
# create_directories shared by other protocols.
ruby_system.snf = [ CHI_SNF_MainMem(ruby_system, None, None)
for i in range(options.num_dirs) ]
ruby_system.snf = [
CHI_SNF_MainMem(ruby_system, None, None)
for i in range(options.num_dirs)
]
for snf in ruby_system.snf:
network_nodes.append(snf)
network_cntrls.extend(snf.getNetworkSideControllers())
assert(snf.getAllControllers() == snf.getNetworkSideControllers())
assert snf.getAllControllers() == snf.getNetworkSideControllers()
mem_cntrls.extend(snf.getAllControllers())
all_cntrls.extend(snf.getAllControllers())
mem_dests.extend(snf.getAllControllers())
if len(other_memories) > 0:
ruby_system.rom_snf = [ CHI_SNF_BootMem(ruby_system, None, m)
for m in other_memories ]
ruby_system.rom_snf = [
CHI_SNF_BootMem(ruby_system, None, m) for m in other_memories
]
for snf in ruby_system.rom_snf:
network_nodes.append(snf)
network_cntrls.extend(snf.getNetworkSideControllers())
all_cntrls.extend(snf.getAllControllers())
mem_dests.extend(snf.getAllControllers())
# Creates the controller for dma ports and io
if len(dma_ports) > 0:
ruby_system.dma_rni = [ CHI_RNI_DMA(ruby_system, dma_port, None)
for dma_port in dma_ports ]
ruby_system.dma_rni = [
CHI_RNI_DMA(ruby_system, dma_port, None) for dma_port in dma_ports
]
for rni in ruby_system.dma_rni:
network_nodes.append(rni)
network_cntrls.extend(rni.getNetworkSideControllers())
@@ -226,7 +245,6 @@ def create_system(options, full_system, system, dma_ports, bootmem,
network_cntrls.extend(ruby_system.io_rni.getNetworkSideControllers())
all_cntrls.extend(ruby_system.io_rni.getAllControllers())
# Assign downstream destinations
for rnf in ruby_system.rnf:
rnf.setDownstream(hnf_dests)
@@ -253,12 +271,12 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Incorporate the params into options so it's propagated to
# makeTopology and create_topology the parent scripts
for k in dir(params):
if not k.startswith('__'):
if not k.startswith("__"):
setattr(options, k, getattr(params, k))
if options.topology == 'CustomMesh':
if options.topology == "CustomMesh":
topology = create_topology(network_nodes, options)
elif options.topology in ['Crossbar', 'Pt2Pt']:
elif options.topology in ["Crossbar", "Pt2Pt"]:
topology = create_topology(network_cntrls, options)
else:
m5.fatal("%s not supported!" % options.topology)

View File

@@ -33,7 +33,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''
"""
Definitions for CHI nodes and controller types. These are used by
create_system in configs/ruby/CHI.py or may be used in custom configuration
scripts. When used with create_system, the user may provide an additional
@@ -43,18 +43,21 @@ defined here.
When using the CustomMesh topology, --chi-config must be provided with
specialization of the NoC_Param classes defining the NoC dimensions and
node to router binding. See configs/example/noc_config/2x4.py for an example.
'''
"""
import math
import m5
from m5.objects import *
class Versions:
'''
"""
Helper class to obtain unique ids for a given controller class.
These are passed as the 'version' parameter when creating the controller.
'''
"""
_seqs = 0
@classmethod
def getSeqId(cls):
val = cls._seqs
@@ -62,6 +65,7 @@ class Versions:
return val
_version = {}
@classmethod
def getVersion(cls, tp):
if tp not in cls._version:
@@ -72,11 +76,12 @@ class Versions:
class NoC_Params:
'''
"""
Default parameters for the interconnect. The value of data_width is
also used to set the data_channel_size for all CHI controllers.
(see configs/ruby/CHI.py)
'''
"""
router_link_latency = 1
node_link_latency = 1
router_latency = 1
@@ -86,16 +91,17 @@ class NoC_Params:
cross_links = []
cross_link_latency = 0
class CHI_Node(SubSystem):
'''
"""
Base class with common functions for setting up Cache or Memory
controllers that are part of a CHI RNF, RNFI, HNF, or SNF nodes.
Notice getNetworkSideControllers and getAllControllers must be implemented
in the derived classes.
'''
"""
class NoC_Params:
'''
"""
NoC config. parameters and bindings required for CustomMesh topology.
Maps 'num_nodes_per_router' CHI nodes to each router provided in
@@ -104,7 +110,8 @@ class CHI_Node(SubSystem):
If 'num_nodes_per_router' is left undefined, we circulate around
'router_list' until all nodes are mapped.
See 'distributeNodes' in configs/topologies/CustomMesh.py
'''
"""
num_nodes_per_router = None
router_list = None
@@ -114,30 +121,30 @@ class CHI_Node(SubSystem):
self._network = ruby_system.network
def getNetworkSideControllers(self):
'''
"""
Returns all ruby controllers that need to be connected to the
network
'''
"""
raise NotImplementedError()
def getAllControllers(self):
'''
"""
Returns all ruby controllers associated with this node
'''
"""
raise NotImplementedError()
def setDownstream(self, cntrls):
'''
"""
Sets cntrls as the downstream list of all controllers in this node
'''
"""
for c in self.getNetworkSideControllers():
c.downstream_destinations = cntrls
def connectController(self, cntrl):
'''
"""
Creates and configures the messages buffers for the CHI input/output
ports that connect to the network
'''
"""
cntrl.reqOut = MessageBuffer()
cntrl.rspOut = MessageBuffer()
cntrl.snpOut = MessageBuffer()
@@ -162,35 +169,39 @@ class CHI_Node(SubSystem):
class TriggerMessageBuffer(MessageBuffer):
'''
"""
MessageBuffer for triggering internal controller events.
These buffers should not be affected by the Ruby tester randomization
and allow poping messages enqueued in the same cycle.
'''
randomization = 'disabled'
"""
randomization = "disabled"
allow_zero_latency = True
class OrderedTriggerMessageBuffer(TriggerMessageBuffer):
ordered = True
class CHI_Cache_Controller(Cache_Controller):
'''
"""
Default parameters for a Cache controller
The Cache_Controller can also be used as a DMA requester or as
a pure directory if all cache allocation policies are disabled.
'''
"""
def __init__(self, ruby_system):
super(CHI_Cache_Controller, self).__init__(
version = Versions.getVersion(Cache_Controller),
ruby_system = ruby_system,
mandatoryQueue = MessageBuffer(),
prefetchQueue = MessageBuffer(),
triggerQueue = TriggerMessageBuffer(),
retryTriggerQueue = OrderedTriggerMessageBuffer(),
replTriggerQueue = OrderedTriggerMessageBuffer(),
reqRdy = TriggerMessageBuffer(),
snpRdy = TriggerMessageBuffer())
version=Versions.getVersion(Cache_Controller),
ruby_system=ruby_system,
mandatoryQueue=MessageBuffer(),
prefetchQueue=MessageBuffer(),
triggerQueue=TriggerMessageBuffer(),
retryTriggerQueue=OrderedTriggerMessageBuffer(),
replTriggerQueue=OrderedTriggerMessageBuffer(),
reqRdy=TriggerMessageBuffer(),
snpRdy=TriggerMessageBuffer(),
)
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
# params such as: input port buffer size, cache banks, and output
@@ -200,10 +211,11 @@ class CHI_Cache_Controller(Cache_Controller):
# timeouts on unique lines when a store conditional fails
self.sc_lock_enabled = False
class CHI_L1Controller(CHI_Cache_Controller):
'''
"""
Default parameters for a L1 Cache controller
'''
"""
def __init__(self, ruby_system, sequencer, cache, prefetcher):
super(CHI_L1Controller, self).__init__(ruby_system)
@@ -235,10 +247,11 @@ class CHI_L1Controller(CHI_Cache_Controller):
self.unify_repl_TBEs = False
class CHI_L2Controller(CHI_Cache_Controller):
'''
"""
Default parameters for a L2 Cache controller
'''
"""
def __init__(self, ruby_system, cache, prefetcher):
super(CHI_L2Controller, self).__init__(ruby_system)
@@ -265,14 +278,15 @@ class CHI_L2Controller(CHI_Cache_Controller):
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 16
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CHI_HNFController(CHI_Cache_Controller):
'''
"""
Default parameters for a coherent home node (HNF) cache controller
'''
"""
def __init__(self, ruby_system, cache, prefetcher, addr_ranges):
super(CHI_HNFController, self).__init__(ruby_system)
@@ -299,27 +313,29 @@ class CHI_HNFController(CHI_Cache_Controller):
# Some reasonable default TBE params
self.number_of_TBEs = 32
self.number_of_repl_TBEs = 32
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CHI_MNController(MiscNode_Controller):
'''
Default parameters for a Misc Node
'''
def __init__(self, ruby_system, addr_range, l1d_caches,
early_nonsync_comp):
class CHI_MNController(MiscNode_Controller):
"""
Default parameters for a Misc Node
"""
def __init__(
self, ruby_system, addr_range, l1d_caches, early_nonsync_comp
):
super(CHI_MNController, self).__init__(
version = Versions.getVersion(MiscNode_Controller),
ruby_system = ruby_system,
mandatoryQueue = MessageBuffer(),
triggerQueue = TriggerMessageBuffer(),
retryTriggerQueue = TriggerMessageBuffer(),
schedRspTriggerQueue = TriggerMessageBuffer(),
reqRdy = TriggerMessageBuffer(),
snpRdy = TriggerMessageBuffer(),
version=Versions.getVersion(MiscNode_Controller),
ruby_system=ruby_system,
mandatoryQueue=MessageBuffer(),
triggerQueue=TriggerMessageBuffer(),
retryTriggerQueue=TriggerMessageBuffer(),
schedRspTriggerQueue=TriggerMessageBuffer(),
reqRdy=TriggerMessageBuffer(),
snpRdy=TriggerMessageBuffer(),
)
# Set somewhat large number since we really a lot on internal
# triggers. To limit the controller performance, tweak other
@@ -335,19 +351,22 @@ class CHI_MNController(MiscNode_Controller):
# "upstream_destinations" = targets for DVM snoops
self.upstream_destinations = l1d_caches
class CHI_DMAController(CHI_Cache_Controller):
'''
"""
Default parameters for a DMA controller
'''
"""
def __init__(self, ruby_system, sequencer):
super(CHI_DMAController, self).__init__(ruby_system)
self.sequencer = sequencer
class DummyCache(RubyCache):
dataAccessLatency = 0
tagAccessLatency = 1
size = "128"
assoc = 1
self.use_prefetcher = False
self.cache = DummyCache()
self.sequencer.dcache = NULL
@@ -370,37 +389,39 @@ class CHI_DMAController(CHI_Cache_Controller):
self.send_evictions = False
self.number_of_TBEs = 16
self.number_of_repl_TBEs = 1
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.number_of_snoop_TBEs = 1 # should not receive any snoop
self.number_of_DVM_TBEs = 1 # should not receive any dvm
self.number_of_DVM_snoop_TBEs = 1 # should not receive any dvm
self.unify_repl_TBEs = False
class CPUSequencerWrapper:
'''
"""
Other generic configuration scripts assume a matching number of sequencers
and cpus. This wraps the instruction and data sequencer so they are
compatible with the other scripts. This assumes all scripts are using
connectCpuPorts/connectIOPorts to bind ports
'''
"""
def __init__(self, iseq, dseq):
# use this style due to __setattr__ override below
self.__dict__['inst_seq'] = iseq
self.__dict__['data_seq'] = dseq
self.__dict__['support_data_reqs'] = True
self.__dict__['support_inst_reqs'] = True
self.__dict__["inst_seq"] = iseq
self.__dict__["data_seq"] = dseq
self.__dict__["support_data_reqs"] = True
self.__dict__["support_inst_reqs"] = True
# Compatibility with certain scripts that wire up ports
# without connectCpuPorts
self.__dict__['in_ports'] = dseq.in_ports
self.__dict__["in_ports"] = dseq.in_ports
def connectCpuPorts(self, cpu):
assert(isinstance(cpu, BaseCPU))
assert isinstance(cpu, BaseCPU)
cpu.icache_port = self.inst_seq.in_ports
for p in cpu._cached_ports:
if str(p) != 'icache_port':
exec('cpu.%s = self.data_seq.in_ports' % p)
if str(p) != "icache_port":
exec("cpu.%s = self.data_seq.in_ports" % p)
cpu.connectUncachedPorts(
self.data_seq.in_ports, self.data_seq.interrupt_out_port)
self.data_seq.in_ports, self.data_seq.interrupt_out_port
)
def connectIOPorts(self, piobus):
self.data_seq.connectIOPorts(piobus)
@@ -409,18 +430,25 @@ class CPUSequencerWrapper:
setattr(self.inst_seq, name, value)
setattr(self.data_seq, name, value)
class CHI_RNF(CHI_Node):
'''
"""
Defines a CHI request node.
Notice all contollers and sequencers are set as children of the cpus, so
this object acts more like a proxy for seting things up and has no topology
significance unless the cpus are set as its children at the top level
'''
"""
def __init__(self, cpus, ruby_system,
l1Icache_type, l1Dcache_type,
cache_line_size,
l1Iprefetcher_type=None, l1Dprefetcher_type=None):
def __init__(
self,
cpus,
ruby_system,
l1Icache_type,
l1Dcache_type,
cache_line_size,
l1Iprefetcher_type=None,
l1Dprefetcher_type=None,
):
super(CHI_RNF, self).__init__(ruby_system)
self._block_size_bits = int(math.log(cache_line_size, 2))
@@ -437,33 +465,40 @@ class CHI_RNF(CHI_Node):
# First creates L1 caches and sequencers
for cpu in self._cpus:
cpu.inst_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
cpu.data_sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system)
cpu.inst_sequencer = RubySequencer(
version=Versions.getSeqId(), ruby_system=ruby_system
)
cpu.data_sequencer = RubySequencer(
version=Versions.getSeqId(), ruby_system=ruby_system
)
self._seqs.append(CPUSequencerWrapper(cpu.inst_sequencer,
cpu.data_sequencer))
self._seqs.append(
CPUSequencerWrapper(cpu.inst_sequencer, cpu.data_sequencer)
)
# caches
l1i_cache = l1Icache_type(start_index_bit = self._block_size_bits,
is_icache = True)
l1i_cache = l1Icache_type(
start_index_bit=self._block_size_bits, is_icache=True
)
l1d_cache = l1Dcache_type(start_index_bit = self._block_size_bits,
is_icache = False)
l1d_cache = l1Dcache_type(
start_index_bit=self._block_size_bits, is_icache=False
)
# Placeholders for future prefetcher support
if l1Iprefetcher_type != None or l1Dprefetcher_type != None:
m5.fatal('Prefetching not supported yet')
m5.fatal("Prefetching not supported yet")
l1i_pf = NULL
l1d_pf = NULL
# cache controllers
cpu.l1i = CHI_L1Controller(ruby_system, cpu.inst_sequencer,
l1i_cache, l1i_pf)
cpu.l1i = CHI_L1Controller(
ruby_system, cpu.inst_sequencer, l1i_cache, l1i_pf
)
cpu.l1d = CHI_L1Controller(ruby_system, cpu.data_sequencer,
l1d_cache, l1d_pf)
cpu.l1d = CHI_L1Controller(
ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf
)
cpu.inst_sequencer.dcache = NULL
cpu.data_sequencer.dcache = cpu.l1d.cache
@@ -496,10 +531,11 @@ class CHI_RNF(CHI_Node):
def addPrivL2Cache(self, cache_type, pf_type=None):
self._ll_cntrls = []
for cpu in self._cpus:
l2_cache = cache_type(start_index_bit = self._block_size_bits,
is_icache = False)
l2_cache = cache_type(
start_index_bit=self._block_size_bits, is_icache=False
)
if pf_type != None:
m5.fatal('Prefetching not supported yet')
m5.fatal("Prefetching not supported yet")
l2_pf = NULL
cpu.l2 = CHI_L2Controller(self._ruby_system, l2_cache, l2_pf)
@@ -515,18 +551,20 @@ class CHI_RNF(CHI_Node):
class CHI_HNF(CHI_Node):
'''
"""
Encapsulates an HNF cache/directory controller.
Before the first controller is created, the class method
CHI_HNF.createAddrRanges must be called before creating any CHI_HNF object
to set-up the interleaved address ranges used by the HNFs
'''
"""
class NoC_Params(CHI_Node.NoC_Params):
'''HNFs may also define the 'pairing' parameter to allow pairing'''
"""HNFs may also define the 'pairing' parameter to allow pairing"""
pairing = None
_addr_ranges = {}
@classmethod
def createAddrRanges(cls, sys_mem_ranges, cache_line_size, hnfs):
# Create the HNFs interleaved addr ranges
@@ -536,16 +574,19 @@ class CHI_HNF(CHI_Node):
for i, hnf in enumerate(hnfs):
ranges = []
for r in sys_mem_ranges:
addr_range = AddrRange(r.start, size = r.size(),
intlvHighBit = numa_bit,
intlvBits = llc_bits,
intlvMatch = i)
addr_range = AddrRange(
r.start,
size=r.size(),
intlvHighBit=numa_bit,
intlvBits=llc_bits,
intlvMatch=i,
)
ranges.append(addr_range)
cls._addr_ranges[hnf] = (ranges, numa_bit)
@classmethod
def getAddrRanges(cls, hnf_idx):
assert(len(cls._addr_ranges) != 0)
assert len(cls._addr_ranges) != 0
return cls._addr_ranges[hnf_idx]
# The CHI controller can be a child of this object or another if
@@ -553,13 +594,14 @@ class CHI_HNF(CHI_Node):
def __init__(self, hnf_idx, ruby_system, llcache_type, parent):
super(CHI_HNF, self).__init__(ruby_system)
addr_ranges,intlvHighBit = self.getAddrRanges(hnf_idx)
addr_ranges, intlvHighBit = self.getAddrRanges(hnf_idx)
# All ranges should have the same interleaving
assert(len(addr_ranges) >= 1)
assert len(addr_ranges) >= 1
ll_cache = llcache_type(start_index_bit = intlvHighBit + 1)
self._cntrl = CHI_HNFController(ruby_system, ll_cache, NULL,
addr_ranges)
ll_cache = llcache_type(start_index_bit=intlvHighBit + 1)
self._cntrl = CHI_HNFController(
ruby_system, ll_cache, NULL, addr_ranges
)
if parent == None:
self.cntrl = self._cntrl
@@ -576,14 +618,14 @@ class CHI_HNF(CHI_Node):
class CHI_MN(CHI_Node):
'''
"""
Encapsulates a Misc Node controller.
'''
"""
class NoC_Params(CHI_Node.NoC_Params):
'''HNFs may also define the 'pairing' parameter to allow pairing'''
pairing = None
"""HNFs may also define the 'pairing' parameter to allow pairing"""
pairing = None
# The CHI controller can be a child of this object or another if
# 'parent' if specified
@@ -591,10 +633,11 @@ class CHI_MN(CHI_Node):
super(CHI_MN, self).__init__(ruby_system)
# MiscNode has internal address range starting at 0
addr_range = AddrRange(0, size = "1kB")
addr_range = AddrRange(0, size="1kB")
self._cntrl = CHI_MNController(ruby_system, addr_range, l1d_caches,
early_nonsync_comp)
self._cntrl = CHI_MNController(
ruby_system, addr_range, l1d_caches, early_nonsync_comp
)
self.cntrl = self._cntrl
@@ -609,10 +652,11 @@ class CHI_MN(CHI_Node):
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_SNF_Base(CHI_Node):
'''
"""
Creates CHI node controllers for the memory controllers
'''
"""
# The CHI controller can be a child of this object or another if
# 'parent' if specified
@@ -620,12 +664,13 @@ class CHI_SNF_Base(CHI_Node):
super(CHI_SNF_Base, self).__init__(ruby_system)
self._cntrl = Memory_Controller(
version = Versions.getVersion(Memory_Controller),
ruby_system = ruby_system,
triggerQueue = TriggerMessageBuffer(),
responseFromMemory = MessageBuffer(),
requestToMemory = MessageBuffer(ordered = True),
reqRdy = TriggerMessageBuffer())
version=Versions.getVersion(Memory_Controller),
ruby_system=ruby_system,
triggerQueue=TriggerMessageBuffer(),
responseFromMemory=MessageBuffer(),
requestToMemory=MessageBuffer(ordered=True),
reqRdy=TriggerMessageBuffer(),
)
self.connectController(self._cntrl)
@@ -643,46 +688,51 @@ class CHI_SNF_Base(CHI_Node):
def getMemRange(self, mem_ctrl):
# TODO need some kind of transparent API for
# MemCtrl+DRAM vs SimpleMemory
if hasattr(mem_ctrl, 'range'):
if hasattr(mem_ctrl, "range"):
return mem_ctrl.range
else:
return mem_ctrl.dram.range
class CHI_SNF_BootMem(CHI_SNF_Base):
'''
"""
Create the SNF for the boot memory
'''
"""
def __init__(self, ruby_system, parent, bootmem):
super(CHI_SNF_BootMem, self).__init__(ruby_system, parent)
self._cntrl.memory_out_port = bootmem.port
self._cntrl.addr_ranges = self.getMemRange(bootmem)
class CHI_SNF_MainMem(CHI_SNF_Base):
'''
Create the SNF for a list main memory controllers
'''
def __init__(self, ruby_system, parent, mem_ctrl = None):
class CHI_SNF_MainMem(CHI_SNF_Base):
"""
Create the SNF for a list main memory controllers
"""
def __init__(self, ruby_system, parent, mem_ctrl=None):
super(CHI_SNF_MainMem, self).__init__(ruby_system, parent)
if mem_ctrl:
self._cntrl.memory_out_port = mem_ctrl.port
self._cntrl.addr_ranges = self.getMemRange(mem_ctrl)
# else bind ports and range later
class CHI_RNI_Base(CHI_Node):
'''
"""
Request node without cache / DMA
'''
"""
# The CHI controller can be a child of this object or another if
# 'parent' if specified
def __init__(self, ruby_system, parent):
super(CHI_RNI_Base, self).__init__(ruby_system)
self._sequencer = RubySequencer(version = Versions.getSeqId(),
ruby_system = ruby_system,
clk_domain = ruby_system.clk_domain)
self._sequencer = RubySequencer(
version=Versions.getSeqId(),
ruby_system=ruby_system,
clk_domain=ruby_system.clk_domain,
)
self._cntrl = CHI_DMAController(ruby_system, self._sequencer)
if parent:
@@ -698,20 +748,22 @@ class CHI_RNI_Base(CHI_Node):
def getNetworkSideControllers(self):
return [self._cntrl]
class CHI_RNI_DMA(CHI_RNI_Base):
'''
"""
DMA controller wiredup to a given dma port
'''
"""
def __init__(self, ruby_system, dma_port, parent):
super(CHI_RNI_DMA, self).__init__(ruby_system, parent)
assert(dma_port != None)
assert dma_port != None
self._sequencer.in_ports = dma_port
class CHI_RNI_IO(CHI_RNI_Base):
'''
"""
DMA controller wiredup to ruby_system IO port
'''
"""
def __init__(self, ruby_system, parent):
super(CHI_RNI_IO, self).__init__(ruby_system, parent)

View File

@@ -24,8 +24,10 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
class CntrlBase:
_seqs = 0
@classmethod
def seqCount(cls):
# Use SeqCount not class since we need global count
@@ -33,6 +35,7 @@ class CntrlBase:
return CntrlBase._seqs - 1
_cntrls = 0
@classmethod
def cntrlCount(cls):
# Use CntlCount not class since we need global count
@@ -40,7 +43,8 @@ class CntrlBase:
return CntrlBase._cntrls - 1
_version = 0
@classmethod
def versionCount(cls):
cls._version += 1 # Use count for this particular type
cls._version += 1 # Use count for this particular type
return cls._version - 1

File diff suppressed because it is too large Load Diff

View File

@@ -34,14 +34,18 @@ from .Ruby import create_topology, create_directories
#
# Declare caches used by the protocol
#
class L1Cache(RubyCache): pass
class L1Cache(RubyCache):
pass
def define_options(parser):
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'Garnet_standalone':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "Garnet_standalone":
panic("This script requires Garnet_standalone protocol to be built.")
cpu_sequencers = []
@@ -49,7 +53,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# The Garnet_standalone protocol does not support fs nor dma
#
assert(dma_ports == [])
assert dma_ports == []
#
# The ruby network creation expects the list of nodes in the system to be
@@ -69,19 +73,18 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Only one cache exists for this protocol, so by default use the L1D
# config parameters.
#
cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc)
cache = L1Cache(size=options.l1d_size, assoc=options.l1d_assoc)
#
# Only one unified L1 cache exists. Can cache instructions and data.
#
l1_cntrl = L1Cache_Controller(version = i,
cacheMemory = cache,
ruby_system = ruby_system)
l1_cntrl = L1Cache_Controller(
version=i, cacheMemory=cache, ruby_system=ruby_system
)
cpu_seq = RubySequencer(dcache = cache,
garnet_standalone = True,
ruby_system = ruby_system)
cpu_seq = RubySequencer(
dcache=cache, garnet_standalone=True, ruby_system=ruby_system
)
l1_cntrl.sequencer = cpu_seq
exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
@@ -97,7 +100,8 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.forwardFromCache = MessageBuffer()
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -107,7 +111,6 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.forwardToDir = MessageBuffer()
dir_cntrl.responseToDir = MessageBuffer()
all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes
ruby_system.network.number_of_virtual_networks = 3
topology = create_topology(all_cntrls, options)

View File

@@ -38,15 +38,26 @@ from common import FileSystemConfig
#
# Declare caches used by the protocol
#
class L0Cache(RubyCache): pass
class L1Cache(RubyCache): pass
class L2Cache(RubyCache): pass
class L0Cache(RubyCache):
pass
class L1Cache(RubyCache):
pass
class L2Cache(RubyCache):
pass
def define_options(parser):
parser.add_argument(
"--num-clusters", type=int, default=1,
"--num-clusters",
type=int,
default=1,
help="number of clusters in a design in which there are shared\
caches private to clusters")
caches private to clusters",
)
parser.add_argument("--l0i_size", type=str, default="4096B")
parser.add_argument("--l0d_size", type=str, default="4096B")
parser.add_argument("--l0i_assoc", type=int, default=1)
@@ -55,16 +66,23 @@ def define_options(parser):
parser.add_argument("--l1_transitions_per_cycle", type=int, default=32)
parser.add_argument("--l2_transitions_per_cycle", type=int, default=4)
parser.add_argument(
"--enable-prefetch", action="store_true", default=False,
help="Enable Ruby hardware prefetcher")
"--enable-prefetch",
action="store_true",
default=False,
help="Enable Ruby hardware prefetcher",
)
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MESI_Three_Level':
fatal("This script requires the MESI_Three_Level protocol to be\
built.")
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MESI_Three_Level":
fatal(
"This script requires the MESI_Three_Level protocol to be\
built."
)
cpu_sequencers = []
@@ -79,10 +97,10 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l2_cntrl_nodes = []
dma_cntrl_nodes = []
assert (options.num_cpus % options.num_clusters == 0)
assert options.num_cpus % options.num_clusters == 0
num_cpus_per_cluster = options.num_cpus // options.num_clusters
assert (options.num_l2caches % options.num_clusters == 0)
assert options.num_l2caches % options.num_clusters == 0
num_l2caches_per_cluster = options.num_l2caches // options.num_clusters
l2_bits = int(math.log(num_l2caches_per_cluster, 2))
@@ -98,63 +116,79 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l0i_cache = L0Cache(size = options.l0i_size,
assoc = options.l0i_assoc,
is_icache = True,
start_index_bit = block_size_bits,
replacement_policy = LRURP())
l0i_cache = L0Cache(
size=options.l0i_size,
assoc=options.l0i_assoc,
is_icache=True,
start_index_bit=block_size_bits,
replacement_policy=LRURP(),
)
l0d_cache = L0Cache(size = options.l0d_size,
assoc = options.l0d_assoc,
is_icache = False,
start_index_bit = block_size_bits,
replacement_policy = LRURP())
l0d_cache = L0Cache(
size=options.l0d_size,
assoc=options.l0d_assoc,
is_icache=False,
start_index_bit=block_size_bits,
replacement_policy=LRURP(),
)
clk_domain = cpus[i].clk_domain
# Ruby prefetcher
prefetcher = RubyPrefetcher(
num_streams=16,
unit_filter = 256,
nonunit_filter = 256,
train_misses = 5,
num_startup_pfs = 4,
cross_page = True
unit_filter=256,
nonunit_filter=256,
train_misses=5,
num_startup_pfs=4,
cross_page=True,
)
l0_cntrl = L0Cache_Controller(
version = i * num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
transitions_per_cycle = options.l0_transitions_per_cycle,
prefetcher = prefetcher,
enable_prefetch = options.enable_prefetch,
send_evictions = send_evicts(options),
clk_domain = clk_domain,
ruby_system = ruby_system)
version=i * num_cpus_per_cluster + j,
Icache=l0i_cache,
Dcache=l0d_cache,
transitions_per_cycle=options.l0_transitions_per_cycle,
prefetcher=prefetcher,
enable_prefetch=options.enable_prefetch,
send_evictions=send_evicts(options),
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubySequencer(version = i * num_cpus_per_cluster + j,
clk_domain = clk_domain,
dcache = l0d_cache,
ruby_system = ruby_system)
cpu_seq = RubySequencer(
version=i * num_cpus_per_cluster + j,
clk_domain=clk_domain,
dcache=l0d_cache,
ruby_system=ruby_system,
)
l0_cntrl.sequencer = cpu_seq
l1_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits,
is_icache = False)
l1_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
is_icache=False,
)
l1_cntrl = L1Cache_Controller(
version = i * num_cpus_per_cluster + j,
cache = l1_cache, l2_select_num_bits = l2_bits,
cluster_id = i,
transitions_per_cycle = options.l1_transitions_per_cycle,
ruby_system = ruby_system)
version=i * num_cpus_per_cluster + j,
cache=l1_cache,
l2_select_num_bits=l2_bits,
cluster_id=i,
transitions_per_cycle=options.l1_transitions_per_cycle,
ruby_system=ruby_system,
)
exec("ruby_system.l0_cntrl%d = l0_cntrl"
% ( i * num_cpus_per_cluster + j))
exec("ruby_system.l1_cntrl%d = l1_cntrl"
% ( i * num_cpus_per_cluster + j))
exec(
"ruby_system.l0_cntrl%d = l0_cntrl"
% (i * num_cpus_per_cluster + j)
)
exec(
"ruby_system.l1_cntrl%d = l1_cntrl"
% (i * num_cpus_per_cluster + j)
)
#
# Add controllers and sequencers to the appropriate lists
@@ -166,9 +200,9 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Connect the L0 and L1 controllers
l0_cntrl.prefetchQueue = MessageBuffer()
l0_cntrl.mandatoryQueue = MessageBuffer()
l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
l0_cntrl.bufferToL1 = MessageBuffer(ordered=True)
l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1
l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True)
l0_cntrl.bufferFromL1 = MessageBuffer(ordered=True)
l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1
# Connect the L1 controllers and the network
@@ -184,28 +218,36 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.responseFromL2 = MessageBuffer()
l1_cntrl.responseFromL2.in_port = ruby_system.network.out_port
for j in range(num_l2caches_per_cluster):
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = l2_index_start)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=l2_index_start,
)
l2_cntrl = L2Cache_Controller(
version = i * num_l2caches_per_cluster + j,
L2cache = l2_cache, cluster_id = i,
transitions_per_cycle =\
options.l2_transitions_per_cycle,
ruby_system = ruby_system)
version=i * num_l2caches_per_cluster + j,
L2cache=l2_cache,
cluster_id=i,
transitions_per_cycle=options.l2_transitions_per_cycle,
ruby_system=ruby_system,
)
exec("ruby_system.l2_cntrl%d = l2_cntrl"
% (i * num_l2caches_per_cluster + j))
exec(
"ruby_system.l2_cntrl%d = l2_cntrl"
% (i * num_l2caches_per_cluster + j)
)
l2_cntrl_nodes.append(l2_cntrl)
# Connect the L2 controllers and the network
l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
l2_cntrl.DirRequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.DirRequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
l2_cntrl.L1RequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.L1RequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.responseFromL2Cache = MessageBuffer()
l2_cntrl.responseFromL2Cache.out_port = ruby_system.network.in_port
@@ -220,10 +262,12 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain = ruby_system.clk_domain, clk_divider = 3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -242,12 +286,14 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i, ruby_system = ruby_system)
dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
exec("ruby_system.dma_cntrl%d.dma_sequencer.in_ports = dma_port" % i)
@@ -255,29 +301,33 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Connect the dma controller to the network
dma_cntrl.mandatoryQueue = MessageBuffer()
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
dma_cntrl.requestToDir = MessageBuffer()
dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
all_cntrls = l0_cntrl_nodes + \
l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
all_cntrls = (
l0_cntrl_nodes
+ l1_cntrl_nodes
+ l2_cntrl_nodes
+ dir_cntrl_nodes
+ dma_cntrl_nodes
)
# Create the io controller and the sequencer
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.mandatoryQueue = MessageBuffer()
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
io_controller.requestToDir = MessageBuffer()
io_controller.requestToDir.out_port = ruby_system.network.in_port
@@ -287,41 +337,56 @@ def create_system(options, full_system, system, dma_ports, bootmem,
else:
for i in range(options.num_clusters):
for j in range(num_cpus_per_cluster):
FileSystemConfig.register_cpu(physical_package_id = 0,
core_siblings = range(options.num_cpus),
core_id = i*num_cpus_per_cluster+j,
thread_siblings = [])
FileSystemConfig.register_cpu(
physical_package_id=0,
core_siblings=range(options.num_cpus),
core_id=i * num_cpus_per_cluster + j,
thread_siblings=[],
)
FileSystemConfig.register_cache(level = 0,
idu_type = 'Instruction',
size = options.l0i_size,
line_size =\
options.cacheline_size,
assoc = 1,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(level = 0,
idu_type = 'Data',
size = options.l0d_size,
line_size =\
options.cacheline_size,
assoc = 1,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(
level=0,
idu_type="Instruction",
size=options.l0i_size,
line_size=options.cacheline_size,
assoc=1,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(
level=0,
idu_type="Data",
size=options.l0d_size,
line_size=options.cacheline_size,
assoc=1,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(level = 1,
idu_type = 'Unified',
size = options.l1d_size,
line_size = options.cacheline_size,
assoc = options.l1d_assoc,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(
level=1,
idu_type="Unified",
size=options.l1d_size,
line_size=options.cacheline_size,
assoc=options.l1d_assoc,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(level = 2,
idu_type = 'Unified',
size = str(MemorySize(options.l2_size) * \
num_l2caches_per_cluster)+'B',
line_size = options.cacheline_size,
assoc = options.l2_assoc,
cpus = [n for n in range(i*num_cpus_per_cluster, \
(i+1)*num_cpus_per_cluster)])
FileSystemConfig.register_cache(
level=2,
idu_type="Unified",
size=str(
MemorySize(options.l2_size) * num_l2caches_per_cluster
)
+ "B",
line_size=options.cacheline_size,
assoc=options.l2_assoc,
cpus=[
n
for n in range(
i * num_cpus_per_cluster,
(i + 1) * num_cpus_per_cluster,
)
],
)
ruby_system.network.number_of_virtual_networks = 3
topology = create_topology(all_cntrls, options)

View File

@@ -38,14 +38,26 @@ from common import FileSystemConfig
#
# Declare caches used by the protocol
#
class L0Cache(RubyCache): pass
class L1Cache(RubyCache): pass
class L2Cache(RubyCache): pass
class L0Cache(RubyCache):
pass
class L1Cache(RubyCache):
pass
class L2Cache(RubyCache):
pass
def define_options(parser):
parser.add_argument("--num-clusters", type=int, default=1,
help = "number of clusters in a design in which there are shared\
caches private to clusters")
parser.add_argument(
"--num-clusters",
type=int,
default=1,
help="number of clusters in a design in which there are shared\
caches private to clusters",
)
parser.add_argument("--l0i_size", type=str, default="4096B")
parser.add_argument("--l0d_size", type=str, default="4096B")
parser.add_argument("--l0i_assoc", type=int, default=1)
@@ -54,16 +66,23 @@ def define_options(parser):
parser.add_argument("--l1_transitions_per_cycle", type=int, default=32)
parser.add_argument("--l2_transitions_per_cycle", type=int, default=4)
parser.add_argument(
"--enable-prefetch", action="store_true", default=False,
help="Enable Ruby hardware prefetcher")
"--enable-prefetch",
action="store_true",
default=False,
help="Enable Ruby hardware prefetcher",
)
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MESI_Three_Level_HTM':
fatal("This script requires the MESI_Three_Level protocol to be\
built.")
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MESI_Three_Level_HTM":
fatal(
"This script requires the MESI_Three_Level protocol to be\
built."
)
cpu_sequencers = []
@@ -78,10 +97,10 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l2_cntrl_nodes = []
dma_cntrl_nodes = []
assert (options.num_cpus % options.num_clusters == 0)
assert options.num_cpus % options.num_clusters == 0
num_cpus_per_cluster = options.num_cpus // options.num_clusters
assert (options.num_l2caches % options.num_clusters == 0)
assert options.num_l2caches % options.num_clusters == 0
num_l2caches_per_cluster = options.num_l2caches // options.num_clusters
l2_bits = int(math.log(num_l2caches_per_cluster, 2))
@@ -97,63 +116,79 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l0i_cache = L0Cache(size = options.l0i_size,
assoc = options.l0i_assoc,
is_icache = True,
start_index_bit = block_size_bits,
replacement_policy = LRURP())
l0i_cache = L0Cache(
size=options.l0i_size,
assoc=options.l0i_assoc,
is_icache=True,
start_index_bit=block_size_bits,
replacement_policy=LRURP(),
)
l0d_cache = L0Cache(size = options.l0d_size,
assoc = options.l0d_assoc,
is_icache = False,
start_index_bit = block_size_bits,
replacement_policy = LRURP())
l0d_cache = L0Cache(
size=options.l0d_size,
assoc=options.l0d_assoc,
is_icache=False,
start_index_bit=block_size_bits,
replacement_policy=LRURP(),
)
clk_domain = cpus[i].clk_domain
# Ruby prefetcher
prefetcher = RubyPrefetcher(
num_streams=16,
unit_filter = 256,
nonunit_filter = 256,
train_misses = 5,
num_startup_pfs = 4,
cross_page = True
unit_filter=256,
nonunit_filter=256,
train_misses=5,
num_startup_pfs=4,
cross_page=True,
)
l0_cntrl = L0Cache_Controller(
version = i * num_cpus_per_cluster + j,
Icache = l0i_cache, Dcache = l0d_cache,
transitions_per_cycle = options.l0_transitions_per_cycle,
prefetcher = prefetcher,
enable_prefetch = options.enable_prefetch,
send_evictions = send_evicts(options),
clk_domain = clk_domain,
ruby_system = ruby_system)
version=i * num_cpus_per_cluster + j,
Icache=l0i_cache,
Dcache=l0d_cache,
transitions_per_cycle=options.l0_transitions_per_cycle,
prefetcher=prefetcher,
enable_prefetch=options.enable_prefetch,
send_evictions=send_evicts(options),
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubyHTMSequencer(version = i * num_cpus_per_cluster + j,
clk_domain = clk_domain,
dcache = l0d_cache,
ruby_system = ruby_system)
cpu_seq = RubyHTMSequencer(
version=i * num_cpus_per_cluster + j,
clk_domain=clk_domain,
dcache=l0d_cache,
ruby_system=ruby_system,
)
l0_cntrl.sequencer = cpu_seq
l1_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits,
is_icache = False)
l1_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
is_icache=False,
)
l1_cntrl = L1Cache_Controller(
version = i * num_cpus_per_cluster + j,
cache = l1_cache, l2_select_num_bits = l2_bits,
cluster_id = i,
transitions_per_cycle = options.l1_transitions_per_cycle,
ruby_system = ruby_system)
version=i * num_cpus_per_cluster + j,
cache=l1_cache,
l2_select_num_bits=l2_bits,
cluster_id=i,
transitions_per_cycle=options.l1_transitions_per_cycle,
ruby_system=ruby_system,
)
exec("ruby_system.l0_cntrl%d = l0_cntrl"
% ( i * num_cpus_per_cluster + j))
exec("ruby_system.l1_cntrl%d = l1_cntrl"
% ( i * num_cpus_per_cluster + j))
exec(
"ruby_system.l0_cntrl%d = l0_cntrl"
% (i * num_cpus_per_cluster + j)
)
exec(
"ruby_system.l1_cntrl%d = l1_cntrl"
% (i * num_cpus_per_cluster + j)
)
#
# Add controllers and sequencers to the appropriate lists
@@ -165,9 +200,9 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Connect the L0 and L1 controllers
l0_cntrl.prefetchQueue = MessageBuffer()
l0_cntrl.mandatoryQueue = MessageBuffer()
l0_cntrl.bufferToL1 = MessageBuffer(ordered = True)
l0_cntrl.bufferToL1 = MessageBuffer(ordered=True)
l1_cntrl.bufferFromL0 = l0_cntrl.bufferToL1
l0_cntrl.bufferFromL1 = MessageBuffer(ordered = True)
l0_cntrl.bufferFromL1 = MessageBuffer(ordered=True)
l1_cntrl.bufferToL0 = l0_cntrl.bufferFromL1
# Connect the L1 controllers and the network
@@ -183,28 +218,36 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.responseFromL2 = MessageBuffer()
l1_cntrl.responseFromL2.in_port = ruby_system.network.out_port
for j in range(num_l2caches_per_cluster):
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = l2_index_start)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=l2_index_start,
)
l2_cntrl = L2Cache_Controller(
version = i * num_l2caches_per_cluster + j,
L2cache = l2_cache, cluster_id = i,
transitions_per_cycle =\
options.l2_transitions_per_cycle,
ruby_system = ruby_system)
version=i * num_l2caches_per_cluster + j,
L2cache=l2_cache,
cluster_id=i,
transitions_per_cycle=options.l2_transitions_per_cycle,
ruby_system=ruby_system,
)
exec("ruby_system.l2_cntrl%d = l2_cntrl"
% (i * num_l2caches_per_cluster + j))
exec(
"ruby_system.l2_cntrl%d = l2_cntrl"
% (i * num_l2caches_per_cluster + j)
)
l2_cntrl_nodes.append(l2_cntrl)
# Connect the L2 controllers and the network
l2_cntrl.DirRequestFromL2Cache = MessageBuffer()
l2_cntrl.DirRequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.DirRequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
l2_cntrl.L1RequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.L1RequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.responseFromL2Cache = MessageBuffer()
l2_cntrl.responseFromL2Cache.out_port = ruby_system.network.in_port
@@ -219,10 +262,12 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain = ruby_system.clk_domain, clk_divider = 3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -241,12 +286,14 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i, ruby_system = ruby_system)
dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
exec("ruby_system.dma_cntrl%d.dma_sequencer.in_ports = dma_port" % i)
@@ -254,29 +301,33 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Connect the dma controller to the network
dma_cntrl.mandatoryQueue = MessageBuffer()
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
dma_cntrl.requestToDir = MessageBuffer()
dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
all_cntrls = l0_cntrl_nodes + \
l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
all_cntrls = (
l0_cntrl_nodes
+ l1_cntrl_nodes
+ l2_cntrl_nodes
+ dir_cntrl_nodes
+ dma_cntrl_nodes
)
# Create the io controller and the sequencer
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.mandatoryQueue = MessageBuffer()
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
io_controller.requestToDir = MessageBuffer()
io_controller.requestToDir.out_port = ruby_system.network.in_port
@@ -286,41 +337,56 @@ def create_system(options, full_system, system, dma_ports, bootmem,
else:
for i in range(options.num_clusters):
for j in range(num_cpus_per_cluster):
FileSystemConfig.register_cpu(physical_package_id = 0,
core_siblings = range(options.num_cpus),
core_id = i*num_cpus_per_cluster+j,
thread_siblings = [])
FileSystemConfig.register_cpu(
physical_package_id=0,
core_siblings=range(options.num_cpus),
core_id=i * num_cpus_per_cluster + j,
thread_siblings=[],
)
FileSystemConfig.register_cache(level = 0,
idu_type = 'Instruction',
size = options.l0i_size,
line_size =\
options.cacheline_size,
assoc = 1,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(level = 0,
idu_type = 'Data',
size = options.l0d_size,
line_size =\
options.cacheline_size,
assoc = 1,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(
level=0,
idu_type="Instruction",
size=options.l0i_size,
line_size=options.cacheline_size,
assoc=1,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(
level=0,
idu_type="Data",
size=options.l0d_size,
line_size=options.cacheline_size,
assoc=1,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(level = 1,
idu_type = 'Unified',
size = options.l1d_size,
line_size = options.cacheline_size,
assoc = options.l1d_assoc,
cpus = [i*num_cpus_per_cluster+j])
FileSystemConfig.register_cache(
level=1,
idu_type="Unified",
size=options.l1d_size,
line_size=options.cacheline_size,
assoc=options.l1d_assoc,
cpus=[i * num_cpus_per_cluster + j],
)
FileSystemConfig.register_cache(level = 2,
idu_type = 'Unified',
size = str(MemorySize(options.l2_size) * \
num_l2caches_per_cluster)+'B',
line_size = options.cacheline_size,
assoc = options.l2_assoc,
cpus = [n for n in range(i*num_cpus_per_cluster, \
(i+1)*num_cpus_per_cluster)])
FileSystemConfig.register_cache(
level=2,
idu_type="Unified",
size=str(
MemorySize(options.l2_size) * num_l2caches_per_cluster
)
+ "B",
line_size=options.cacheline_size,
assoc=options.l2_assoc,
cpus=[
n
for n in range(
i * num_cpus_per_cluster,
(i + 1) * num_cpus_per_cluster,
)
],
)
ruby_system.network.number_of_virtual_networks = 3
topology = create_topology(all_cntrls, options)

View File

@@ -35,16 +35,23 @@ from .Ruby import send_evicts
#
# Declare caches used by the protocol
#
class L1Cache(RubyCache): pass
class L2Cache(RubyCache): pass
class L1Cache(RubyCache):
pass
class L2Cache(RubyCache):
pass
def define_options(parser):
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MESI_Two_Level':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MESI_Two_Level":
fatal("This script requires the MESI_Two_Level protocol to be built.")
cpu_sequencers = []
@@ -69,33 +76,42 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l1i_cache = L1Cache(size = options.l1i_size,
assoc = options.l1i_assoc,
start_index_bit = block_size_bits,
is_icache = True)
l1d_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits,
is_icache = False)
l1i_cache = L1Cache(
size=options.l1i_size,
assoc=options.l1i_assoc,
start_index_bit=block_size_bits,
is_icache=True,
)
l1d_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
is_icache=False,
)
prefetcher = RubyPrefetcher()
clk_domain = cpus[i].clk_domain
l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache,
L1Dcache = l1d_cache,
l2_select_num_bits = l2_bits,
send_evictions = send_evicts(options),
prefetcher = prefetcher,
ruby_system = ruby_system,
clk_domain = clk_domain,
transitions_per_cycle = options.ports,
enable_prefetch = False)
cpu_seq = RubySequencer(version = i,
dcache = l1d_cache, clk_domain = clk_domain,
ruby_system = ruby_system)
l1_cntrl = L1Cache_Controller(
version=i,
L1Icache=l1i_cache,
L1Dcache=l1d_cache,
l2_select_num_bits=l2_bits,
send_evictions=send_evicts(options),
prefetcher=prefetcher,
ruby_system=ruby_system,
clk_domain=clk_domain,
transitions_per_cycle=options.ports,
enable_prefetch=False,
)
cpu_seq = RubySequencer(
version=i,
dcache=l1d_cache,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
l1_cntrl.sequencer = cpu_seq
exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
@@ -120,21 +136,24 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.responseToL1Cache = MessageBuffer()
l1_cntrl.responseToL1Cache.in_port = ruby_system.network.out_port
l2_index_start = block_size_bits + l2_bits
for i in range(options.num_l2caches):
#
# First create the Ruby objects associated with this cpu
#
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = l2_index_start)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=l2_index_start,
)
l2_cntrl = L2Cache_Controller(version = i,
L2cache = l2_cache,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
l2_cntrl = L2Cache_Controller(
version=i,
L2cache=l2_cache,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
l2_cntrl_nodes.append(l2_cntrl)
@@ -154,16 +173,16 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l2_cntrl.responseToL2Cache = MessageBuffer()
l2_cntrl.responseToL2Cache.in_port = ruby_system.network.out_port
# Run each of the ruby memory controllers at a ratio of the frequency of
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain = ruby_system.clk_domain,
clk_divider = 3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -178,44 +197,47 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
for i, dma_port in enumerate(dma_ports):
# Create the Ruby objects associated with the dma controller
dma_seq = DMASequencer(version = i, ruby_system = ruby_system,
in_ports = dma_port)
dma_seq = DMASequencer(
version=i, ruby_system=ruby_system, in_ports=dma_port
)
dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
dma_cntrl_nodes.append(dma_cntrl)
# Connect the dma controller to the network
dma_cntrl.mandatoryQueue = MessageBuffer()
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
dma_cntrl.requestToDir = MessageBuffer()
dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
all_cntrls = l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
all_cntrls = (
l1_cntrl_nodes + l2_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes
)
# Create the io controller and the sequencer
if full_system:
io_seq = DMASequencer(version = len(dma_ports),
ruby_system = ruby_system)
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.mandatoryQueue = MessageBuffer()
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
io_controller.requestToDir = MessageBuffer()
io_controller.requestToDir.out_port = ruby_system.network.in_port

View File

@@ -35,15 +35,19 @@ from .Ruby import send_evicts
#
# Declare caches used by the protocol
#
class L1Cache(RubyCache): pass
class L1Cache(RubyCache):
pass
def define_options(parser):
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MI_example':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MI_example":
panic("This script requires the MI_example protocol to be built.")
cpu_sequencers = []
@@ -68,22 +72,30 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Only one cache exists for this protocol, so by default use the L1D
# config parameters.
#
cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits)
cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
)
clk_domain = cpus[i].clk_domain
# Only one unified L1 cache exists. Can cache instructions and data.
l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system)
l1_cntrl = L1Cache_Controller(
version=i,
cacheMemory=cache,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubySequencer(version=i, dcache=cache,
clk_domain=clk_domain, ruby_system=ruby_system)
cpu_seq = RubySequencer(
version=i,
dcache=cache,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
l1_cntrl.sequencer = cpu_seq
exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
@@ -94,59 +106,60 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Connect the L1 controllers and the network
l1_cntrl.mandatoryQueue = MessageBuffer()
l1_cntrl.requestFromCache = MessageBuffer(ordered = True)
l1_cntrl.requestFromCache = MessageBuffer(ordered=True)
l1_cntrl.requestFromCache.out_port = ruby_system.network.in_port
l1_cntrl.responseFromCache = MessageBuffer(ordered = True)
l1_cntrl.responseFromCache = MessageBuffer(ordered=True)
l1_cntrl.responseFromCache.out_port = ruby_system.network.in_port
l1_cntrl.forwardToCache = MessageBuffer(ordered = True)
l1_cntrl.forwardToCache = MessageBuffer(ordered=True)
l1_cntrl.forwardToCache.in_port = ruby_system.network.out_port
l1_cntrl.responseToCache = MessageBuffer(ordered = True)
l1_cntrl.responseToCache = MessageBuffer(ordered=True)
l1_cntrl.responseToCache.in_port = ruby_system.network.out_port
phys_mem_size = sum([r.size() for r in system.mem_ranges])
assert(phys_mem_size % options.num_dirs == 0)
assert phys_mem_size % options.num_dirs == 0
mem_module_size = phys_mem_size / options.num_dirs
# Run each of the ruby memory controllers at a ratio of the frequency of
# the ruby system.
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain=ruby_system.clk_domain,
clk_divider=3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
for dir_cntrl in dir_cntrl_nodes:
# Connect the directory controllers and the network
dir_cntrl.requestToDir = MessageBuffer(ordered = True)
dir_cntrl.requestToDir = MessageBuffer(ordered=True)
dir_cntrl.requestToDir.in_port = ruby_system.network.out_port
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True)
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True)
dir_cntrl.dmaRequestToDir.in_port = ruby_system.network.out_port
dir_cntrl.responseFromDir = MessageBuffer()
dir_cntrl.responseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True)
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True)
dir_cntrl.dmaResponseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.forwardFromDir = MessageBuffer()
dir_cntrl.forwardFromDir.out_port = ruby_system.network.in_port
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
for i, dma_port in enumerate(dma_ports):
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i,
ruby_system = ruby_system)
dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
exec("ruby_system.dma_cntrl%d.dma_sequencer.in_ports = dma_port" % i)
@@ -156,7 +169,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dma_cntrl.mandatoryQueue = MessageBuffer()
dma_cntrl.requestToDir = MessageBuffer()
dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
all_cntrls = l1_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes
@@ -165,16 +178,18 @@ def create_system(options, full_system, system, dma_ports, bootmem,
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.mandatoryQueue = MessageBuffer()
io_controller.requestToDir = MessageBuffer()
io_controller.requestToDir.out_port = ruby_system.network.in_port
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
all_cntrls = all_cntrls + [io_controller]

View File

@@ -36,13 +36,15 @@ from .Ruby import create_topology
from .Ruby import send_evicts
from common import FileSystemConfig
addToPath('../')
addToPath("../")
from topologies.Cluster import Cluster
from topologies.Crossbar import Crossbar
class CntrlBase:
_seqs = 0
@classmethod
def seqCount(cls):
# Use SeqCount not class since we need global count
@@ -50,6 +52,7 @@ class CntrlBase:
return CntrlBase._seqs - 1
_cntrls = 0
@classmethod
def cntrlCount(cls):
# Use CntlCount not class since we need global count
@@ -57,34 +60,41 @@ class CntrlBase:
return CntrlBase._cntrls - 1
_version = 0
@classmethod
def versionCount(cls):
cls._version += 1 # Use count for this particular type
cls._version += 1 # Use count for this particular type
return cls._version - 1
class L1DCache(RubyCache):
resourceStalls = False
def create(self, options):
self.size = MemorySize(options.l1d_size)
self.assoc = options.l1d_assoc
self.replacement_policy = TreePLRURP()
class L1ICache(RubyCache):
resourceStalls = False
def create(self, options):
self.size = MemorySize(options.l1i_size)
self.assoc = options.l1i_assoc
self.replacement_policy = TreePLRURP()
class L2Cache(RubyCache):
resourceStalls = False
def create(self, options):
self.size = MemorySize(options.l2_size)
self.assoc = options.l2_assoc
self.replacement_policy = TreePLRURP()
class CPCntrl(CorePair_Controller, CntrlBase):
class CPCntrl(CorePair_Controller, CntrlBase):
def create(self, options, ruby_system, system):
self.version = self.versionCount()
@@ -122,6 +132,7 @@ class CPCntrl(CorePair_Controller, CntrlBase):
if options.recycle_latency:
self.recycle_latency = options.recycle_latency
class L3Cache(RubyCache):
assoc = 8
dataArrayBanks = 256
@@ -139,21 +150,30 @@ class L3Cache(RubyCache):
self.resourceStalls = options.no_resource_stalls
self.replacement_policy = TreePLRURP()
class L3Cntrl(L3Cache_Controller, CntrlBase):
def create(self, options, ruby_system, system):
self.version = self.versionCount()
self.L3cache = L3Cache()
self.L3cache.create(options, ruby_system, system)
self.l3_response_latency = max(self.L3cache.dataAccessLatency,
self.L3cache.tagAccessLatency)
self.l3_response_latency = max(
self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency
)
self.ruby_system = ruby_system
if options.recycle_latency:
self.recycle_latency = options.recycle_latency
def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
req_to_l3, probe_to_l3, resp_to_l3):
def connectWireBuffers(
self,
req_to_dir,
resp_to_dir,
l3_unblock_to_dir,
req_to_l3,
probe_to_l3,
resp_to_l3,
):
self.reqToDir = req_to_dir
self.respToDir = resp_to_dir
self.l3UnblockToDir = l3_unblock_to_dir
@@ -161,6 +181,7 @@ class L3Cntrl(L3Cache_Controller, CntrlBase):
self.probeToL3 = probe_to_l3
self.respToL3 = resp_to_l3
class DirCntrl(Directory_Controller, CntrlBase):
def create(self, options, dir_ranges, ruby_system, system):
self.version = self.versionCount()
@@ -173,8 +194,10 @@ class DirCntrl(Directory_Controller, CntrlBase):
self.L3CacheMemory = L3Cache()
self.L3CacheMemory.create(options, ruby_system, system)
self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
self.L3CacheMemory.tagAccessLatency)
self.l3_hit_latency = max(
self.L3CacheMemory.dataAccessLatency,
self.L3CacheMemory.tagAccessLatency,
)
self.number_of_TBEs = options.num_tbes
@@ -185,8 +208,15 @@ class DirCntrl(Directory_Controller, CntrlBase):
self.CPUonly = True
def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
req_to_l3, probe_to_l3, resp_to_l3):
def connectWireBuffers(
self,
req_to_dir,
resp_to_dir,
l3_unblock_to_dir,
req_to_l3,
probe_to_l3,
resp_to_l3,
):
self.reqToDir = req_to_dir
self.respToDir = resp_to_dir
self.l3UnblockToDir = l3_unblock_to_dir
@@ -194,19 +224,23 @@ class DirCntrl(Directory_Controller, CntrlBase):
self.probeToL3 = probe_to_l3
self.respToL3 = resp_to_l3
def define_options(parser):
parser.add_argument("--num-subcaches", type=int, default=4)
parser.add_argument("--l3-data-latency", type=int, default=20)
parser.add_argument("--l3-tag-latency", type=int, default=15)
parser.add_argument("--cpu-to-dir-latency", type=int, default=15)
parser.add_argument("--no-resource-stalls", action="store_false",
default=True)
parser.add_argument(
"--no-resource-stalls", action="store_false", default=True
)
parser.add_argument("--num-tbes", type=int, default=256)
parser.add_argument("--l2-latency", type=int, default=50) # load to use
parser.add_argument("--l2-latency", type=int, default=50) # load to use
def create_system(options, full_system, system, dma_devices, bootmem,
ruby_system):
if buildEnv['PROTOCOL'] != 'MOESI_AMD_Base':
def create_system(
options, full_system, system, dma_devices, bootmem, ruby_system
):
if buildEnv["PROTOCOL"] != "MOESI_AMD_Base":
panic("This script requires the MOESI_AMD_Base protocol.")
cpu_sequencers = []
@@ -230,7 +264,7 @@ def create_system(options, full_system, system, dma_devices, bootmem,
# This is the base crossbar that connects the L3s, Dirs, and cpu
# Cluster
mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
mainCluster = Cluster(extBW=512, intBW=512) # 1 TB/s
if options.numa_high_bit:
numa_bit = options.numa_high_bit
@@ -245,18 +279,20 @@ def create_system(options, full_system, system, dma_devices, bootmem,
for i in range(options.num_dirs):
dir_ranges = []
for r in system.mem_ranges:
addr_range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = numa_bit,
intlvBits = dir_bits,
intlvMatch = i)
addr_range = m5.objects.AddrRange(
r.start,
size=r.size(),
intlvHighBit=numa_bit,
intlvBits=dir_bits,
intlvMatch=i,
)
dir_ranges.append(addr_range)
dir_cntrl = DirCntrl(TCC_select_num_bits = 0)
dir_cntrl = DirCntrl(TCC_select_num_bits=0)
dir_cntrl.create(options, dir_ranges, ruby_system, system)
# Connect the Directory controller to the ruby network
dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
dir_cntrl.requestFromCores = MessageBuffer(ordered=True)
dir_cntrl.requestFromCores.in_port = ruby_system.network.out_port
dir_cntrl.responseFromCores = MessageBuffer()
@@ -271,8 +307,8 @@ def create_system(options, full_system, system, dma_devices, bootmem,
dir_cntrl.responseToCore = MessageBuffer()
dir_cntrl.responseToCore.out_port = ruby_system.network.in_port
dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
dir_cntrl.L3triggerQueue = MessageBuffer(ordered=True)
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
@@ -286,10 +322,10 @@ def create_system(options, full_system, system, dma_devices, bootmem,
# level config files, such as the ruby_random_tester, will get confused if
# the number of cpus does not equal the number of sequencers. Thus make
# sure that an even number of cpus is specified.
assert((options.num_cpus % 2) == 0)
assert (options.num_cpus % 2) == 0
# For an odd number of CPUs, still create the right number of controllers
cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s
cpuCluster = Cluster(extBW=512, intBW=512) # 1 TB/s
for i in range((options.num_cpus + 1) // 2):
cp_cntrl = CPCntrl()
@@ -318,64 +354,75 @@ def create_system(options, full_system, system, dma_devices, bootmem,
cp_cntrl.responseToCore.in_port = ruby_system.network.out_port
cp_cntrl.mandatoryQueue = MessageBuffer()
cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
cp_cntrl.triggerQueue = MessageBuffer(ordered=True)
cpuCluster.add(cp_cntrl)
# Register CPUs and caches for each CorePair and directory (SE mode only)
if not full_system:
for i in range((options.num_cpus + 1) // 2):
FileSystemConfig.register_cpu(physical_package_id = 0,
core_siblings =
range(options.num_cpus),
core_id = i*2,
thread_siblings = [])
FileSystemConfig.register_cpu(
physical_package_id=0,
core_siblings=range(options.num_cpus),
core_id=i * 2,
thread_siblings=[],
)
FileSystemConfig.register_cpu(physical_package_id = 0,
core_siblings =
range(options.num_cpus),
core_id = i*2+1,
thread_siblings = [])
FileSystemConfig.register_cpu(
physical_package_id=0,
core_siblings=range(options.num_cpus),
core_id=i * 2 + 1,
thread_siblings=[],
)
FileSystemConfig.register_cache(level = 0,
idu_type = 'Instruction',
size = options.l1i_size,
line_size = options.cacheline_size,
assoc = options.l1i_assoc,
cpus = [i*2, i*2+1])
FileSystemConfig.register_cache(
level=0,
idu_type="Instruction",
size=options.l1i_size,
line_size=options.cacheline_size,
assoc=options.l1i_assoc,
cpus=[i * 2, i * 2 + 1],
)
FileSystemConfig.register_cache(level = 0,
idu_type = 'Data',
size = options.l1d_size,
line_size = options.cacheline_size,
assoc = options.l1d_assoc,
cpus = [i*2])
FileSystemConfig.register_cache(
level=0,
idu_type="Data",
size=options.l1d_size,
line_size=options.cacheline_size,
assoc=options.l1d_assoc,
cpus=[i * 2],
)
FileSystemConfig.register_cache(level = 0,
idu_type = 'Data',
size = options.l1d_size,
line_size = options.cacheline_size,
assoc = options.l1d_assoc,
cpus = [i*2+1])
FileSystemConfig.register_cache(
level=0,
idu_type="Data",
size=options.l1d_size,
line_size=options.cacheline_size,
assoc=options.l1d_assoc,
cpus=[i * 2 + 1],
)
FileSystemConfig.register_cache(level = 1,
idu_type = 'Unified',
size = options.l2_size,
line_size = options.cacheline_size,
assoc = options.l2_assoc,
cpus = [i*2, i*2+1])
FileSystemConfig.register_cache(
level=1,
idu_type="Unified",
size=options.l2_size,
line_size=options.cacheline_size,
assoc=options.l2_assoc,
cpus=[i * 2, i * 2 + 1],
)
for i in range(options.num_dirs):
FileSystemConfig.register_cache(level = 2,
idu_type = 'Unified',
size = options.l3_size,
line_size = options.cacheline_size,
assoc = options.l3_assoc,
cpus = [n for n in
range(options.num_cpus)])
FileSystemConfig.register_cache(
level=2,
idu_type="Unified",
size=options.l3_size,
line_size=options.cacheline_size,
assoc=options.l3_assoc,
cpus=[n for n in range(options.num_cpus)],
)
# Assuming no DMA devices
assert(len(dma_devices) == 0)
assert len(dma_devices) == 0
# Add cpu/gpu clusters to main cluster
mainCluster.add(cpuCluster)

View File

@@ -51,18 +51,24 @@ class L1Cache(RubyCache):
dataAccessLatency = 1
tagAccessLatency = 1
class L2Cache(RubyCache):
dataAccessLatency = 20
tagAccessLatency = 20
def define_options(parser):
return
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MOESI_CMP_directory':
panic("This script requires the MOESI_CMP_directory protocol to be built.")
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MOESI_CMP_directory":
panic(
"This script requires the MOESI_CMP_directory protocol to be built."
)
cpu_sequencers = []
@@ -85,27 +91,37 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l1i_cache = L1Cache(size = options.l1i_size,
assoc = options.l1i_assoc,
start_index_bit = block_size_bits,
is_icache = True)
l1d_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits,
is_icache = False)
l1i_cache = L1Cache(
size=options.l1i_size,
assoc=options.l1i_assoc,
start_index_bit=block_size_bits,
is_icache=True,
)
l1d_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
is_icache=False,
)
clk_domain = cpus[i].clk_domain
l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
L1Dcache=l1d_cache,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system)
l1_cntrl = L1Cache_Controller(
version=i,
L1Icache=l1i_cache,
L1Dcache=l1d_cache,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubySequencer(version=i,
dcache=l1d_cache, clk_domain=clk_domain,
ruby_system=ruby_system)
cpu_seq = RubySequencer(
version=i,
dcache=l1d_cache,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
l1_cntrl.sequencer = cpu_seq
exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
@@ -124,22 +140,25 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.requestToL1Cache.in_port = ruby_system.network.out_port
l1_cntrl.responseToL1Cache = MessageBuffer()
l1_cntrl.responseToL1Cache.in_port = ruby_system.network.out_port
l1_cntrl.triggerQueue = MessageBuffer(ordered = True)
l1_cntrl.triggerQueue = MessageBuffer(ordered=True)
# Create the L2s interleaved addr ranges
l2_addr_ranges = []
l2_bits = int(math.log(options.num_l2caches, 2))
numa_bit = block_size_bits + l2_bits - 1
sysranges = [] + system.mem_ranges
if bootmem: sysranges.append(bootmem.range)
if bootmem:
sysranges.append(bootmem.range)
for i in range(options.num_l2caches):
ranges = []
for r in sysranges:
addr_range = AddrRange(r.start, size = r.size(),
intlvHighBit = numa_bit,
intlvBits = l2_bits,
intlvMatch = i)
addr_range = AddrRange(
r.start,
size=r.size(),
intlvHighBit=numa_bit,
intlvBits=l2_bits,
intlvMatch=i,
)
ranges.append(addr_range)
l2_addr_ranges.append(ranges)
@@ -147,22 +166,28 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = block_size_bits + l2_bits)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=block_size_bits + l2_bits,
)
l2_cntrl = L2Cache_Controller(version = i,
L2cache = l2_cache,
transitions_per_cycle = options.ports,
ruby_system = ruby_system,
addr_ranges = l2_addr_ranges[i])
l2_cntrl = L2Cache_Controller(
version=i,
L2cache=l2_cache,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
addr_ranges=l2_addr_ranges[i],
)
exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
l2_cntrl_nodes.append(l2_cntrl)
# Connect the L2 controllers and the network
l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer()
l2_cntrl.GlobalRequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.GlobalRequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
l2_cntrl.L1RequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.responseFromL2Cache = MessageBuffer()
@@ -174,18 +199,18 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l2_cntrl.L1RequestToL2Cache.in_port = ruby_system.network.out_port
l2_cntrl.responseToL2Cache = MessageBuffer()
l2_cntrl.responseToL2Cache.in_port = ruby_system.network.out_port
l2_cntrl.triggerQueue = MessageBuffer(ordered = True)
l2_cntrl.triggerQueue = MessageBuffer(ordered=True)
# Run each of the ruby memory controllers at a ratio of the frequency of
# the ruby system.
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain=ruby_system.clk_domain,
clk_divider=3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -201,21 +226,22 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.forwardFromDir.out_port = ruby_system.network.in_port
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
for i, dma_port in enumerate(dma_ports):
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i,
ruby_system = ruby_system,
in_ports = dma_port)
dma_seq = DMASequencer(
version=i, ruby_system=ruby_system, in_ports=dma_port
)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
dma_cntrl_nodes.append(dma_cntrl)
@@ -228,21 +254,21 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dma_cntrl.reqToDir.out_port = ruby_system.network.in_port
dma_cntrl.respToDir = MessageBuffer()
dma_cntrl.respToDir.out_port = ruby_system.network.in_port
dma_cntrl.triggerQueue = MessageBuffer(ordered = True)
dma_cntrl.triggerQueue = MessageBuffer(ordered=True)
all_cntrls = l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
all_cntrls = (
l1_cntrl_nodes + l2_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes
)
# Create the io controller and the sequencer
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
@@ -253,7 +279,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
io_controller.reqToDir.out_port = ruby_system.network.in_port
io_controller.respToDir = MessageBuffer()
io_controller.respToDir.out_port = ruby_system.network.in_port
io_controller.triggerQueue = MessageBuffer(ordered = True)
io_controller.triggerQueue = MessageBuffer(ordered=True)
all_cntrls = all_cntrls + [io_controller]

View File

@@ -35,27 +35,44 @@ from .Ruby import send_evicts
#
# Declare caches used by the protocol
#
class L1Cache(RubyCache): pass
class L2Cache(RubyCache): pass
class L1Cache(RubyCache):
pass
class L2Cache(RubyCache):
pass
def define_options(parser):
parser.add_argument(
"--l1-retries", type=int, default=1,
help="Token_CMP: # of l1 retries before going persistent")
"--l1-retries",
type=int,
default=1,
help="Token_CMP: # of l1 retries before going persistent",
)
parser.add_argument(
"--timeout-latency", type=int, default=300,
help="Token_CMP: cycles until issuing again");
"--timeout-latency",
type=int,
default=300,
help="Token_CMP: cycles until issuing again",
)
parser.add_argument(
"--disable-dyn-timeouts", action="store_true",
help="Token_CMP: disable dyanimc timeouts, use fixed latency instead")
"--disable-dyn-timeouts",
action="store_true",
help="Token_CMP: disable dyanimc timeouts, use fixed latency instead",
)
parser.add_argument(
"--allow-atomic-migration", action="store_true",
help="allow migratory sharing for atomic only accessed blocks")
"--allow-atomic-migration",
action="store_true",
help="allow migratory sharing for atomic only accessed blocks",
)
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MOESI_CMP_token':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MOESI_CMP_token":
panic("This script requires the MOESI_CMP_token protocol to be built.")
#
@@ -86,34 +103,41 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l1i_cache = L1Cache(size = options.l1i_size,
assoc = options.l1i_assoc,
start_index_bit = block_size_bits)
l1d_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits)
l1i_cache = L1Cache(
size=options.l1i_size,
assoc=options.l1i_assoc,
start_index_bit=block_size_bits,
)
l1d_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
)
clk_domain = cpus[i].clk_domain
l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
L1Dcache=l1d_cache,
l2_select_num_bits=l2_bits,
N_tokens=n_tokens,
retry_threshold=options.l1_retries,
fixed_timeout_latency=\
options.timeout_latency,
dynamic_timeout_enabled=\
not options.disable_dyn_timeouts,
no_mig_atomic=not \
options.allow_atomic_migration,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system)
l1_cntrl = L1Cache_Controller(
version=i,
L1Icache=l1i_cache,
L1Dcache=l1d_cache,
l2_select_num_bits=l2_bits,
N_tokens=n_tokens,
retry_threshold=options.l1_retries,
fixed_timeout_latency=options.timeout_latency,
dynamic_timeout_enabled=not options.disable_dyn_timeouts,
no_mig_atomic=not options.allow_atomic_migration,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubySequencer(version=i,
dcache=l1d_cache, clk_domain=clk_domain,
ruby_system=ruby_system)
cpu_seq = RubySequencer(
version=i,
dcache=l1d_cache,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
l1_cntrl.sequencer = cpu_seq
exec("ruby_system.l1_cntrl%d = l1_cntrl" % i)
@@ -127,7 +151,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.requestFromL1Cache.out_port = ruby_system.network.in_port
l1_cntrl.responseFromL1Cache = MessageBuffer()
l1_cntrl.responseFromL1Cache.out_port = ruby_system.network.in_port
l1_cntrl.persistentFromL1Cache = MessageBuffer(ordered = True)
l1_cntrl.persistentFromL1Cache = MessageBuffer(ordered=True)
l1_cntrl.persistentFromL1Cache.out_port = ruby_system.network.in_port
l1_cntrl.mandatoryQueue = MessageBuffer()
@@ -135,32 +159,37 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.requestToL1Cache.in_port = ruby_system.network.out_port
l1_cntrl.responseToL1Cache = MessageBuffer()
l1_cntrl.responseToL1Cache.in_port = ruby_system.network.out_port
l1_cntrl.persistentToL1Cache = MessageBuffer(ordered = True)
l1_cntrl.persistentToL1Cache = MessageBuffer(ordered=True)
l1_cntrl.persistentToL1Cache.in_port = ruby_system.network.out_port
l2_index_start = block_size_bits + l2_bits
for i in range(options.num_l2caches):
#
# First create the Ruby objects associated with this cpu
#
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = l2_index_start)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=l2_index_start,
)
l2_cntrl = L2Cache_Controller(version = i,
L2cache = l2_cache,
N_tokens = n_tokens,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
l2_cntrl = L2Cache_Controller(
version=i,
L2cache=l2_cache,
N_tokens=n_tokens,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.l2_cntrl%d = l2_cntrl" % i)
l2_cntrl_nodes.append(l2_cntrl)
# Connect the L2 controllers and the network
l2_cntrl.GlobalRequestFromL2Cache = MessageBuffer()
l2_cntrl.GlobalRequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.GlobalRequestFromL2Cache.out_port = (
ruby_system.network.in_port
)
l2_cntrl.L1RequestFromL2Cache = MessageBuffer()
l2_cntrl.L1RequestFromL2Cache.out_port = ruby_system.network.in_port
l2_cntrl.responseFromL2Cache = MessageBuffer()
@@ -172,19 +201,19 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l2_cntrl.L1RequestToL2Cache.in_port = ruby_system.network.out_port
l2_cntrl.responseToL2Cache = MessageBuffer()
l2_cntrl.responseToL2Cache.in_port = ruby_system.network.out_port
l2_cntrl.persistentToL2Cache = MessageBuffer(ordered = True)
l2_cntrl.persistentToL2Cache = MessageBuffer(ordered=True)
l2_cntrl.persistentToL2Cache.in_port = ruby_system.network.out_port
# Run each of the ruby memory controllers at a ratio of the frequency of
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain=ruby_system.clk_domain,
clk_divider=3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
@@ -195,63 +224,65 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.requestToDir.in_port = ruby_system.network.out_port
dir_cntrl.responseToDir = MessageBuffer()
dir_cntrl.responseToDir.in_port = ruby_system.network.out_port
dir_cntrl.persistentToDir = MessageBuffer(ordered = True)
dir_cntrl.persistentToDir = MessageBuffer(ordered=True)
dir_cntrl.persistentToDir.in_port = ruby_system.network.out_port
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True)
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True)
dir_cntrl.dmaRequestToDir.in_port = ruby_system.network.out_port
dir_cntrl.requestFromDir = MessageBuffer()
dir_cntrl.requestFromDir.out_port = ruby_system.network.in_port
dir_cntrl.responseFromDir = MessageBuffer()
dir_cntrl.responseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.persistentFromDir = MessageBuffer(ordered = True)
dir_cntrl.persistentFromDir = MessageBuffer(ordered=True)
dir_cntrl.persistentFromDir.out_port = ruby_system.network.in_port
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True)
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True)
dir_cntrl.dmaResponseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
for i, dma_port in enumerate(dma_ports):
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i,
ruby_system = ruby_system,
in_ports = dma_port)
dma_seq = DMASequencer(
version=i, ruby_system=ruby_system, in_ports=dma_port
)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
dma_cntrl_nodes.append(dma_cntrl)
# Connect the dma controller to the network
dma_cntrl.mandatoryQueue = MessageBuffer()
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
dma_cntrl.reqToDirectory = MessageBuffer()
dma_cntrl.reqToDirectory.out_port = ruby_system.network.in_port
all_cntrls = l1_cntrl_nodes + \
l2_cntrl_nodes + \
dir_cntrl_nodes + \
dma_cntrl_nodes
all_cntrls = (
l1_cntrl_nodes + l2_cntrl_nodes + dir_cntrl_nodes + dma_cntrl_nodes
)
# Create the io controller and the sequencer
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.mandatoryQueue = MessageBuffer()
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
io_controller.reqToDirectory = MessageBuffer()
io_controller.reqToDirectory.out_port = ruby_system.network.in_port

View File

@@ -36,25 +36,42 @@ from common import FileSystemConfig
#
# Declare caches used by the protocol
#
class L1Cache(RubyCache): pass
class L2Cache(RubyCache): pass
class L1Cache(RubyCache):
pass
class L2Cache(RubyCache):
pass
#
# Probe filter is a cache
#
class ProbeFilter(RubyCache): pass
class ProbeFilter(RubyCache):
pass
def define_options(parser):
parser.add_argument("--allow-atomic-migration", action="store_true",
help="allow migratory sharing for atomic only accessed blocks")
parser.add_argument("--pf-on", action="store_true",
help="Hammer: enable Probe Filter")
parser.add_argument("--dir-on", action="store_true",
help="Hammer: enable Full-bit Directory")
parser.add_argument(
"--allow-atomic-migration",
action="store_true",
help="allow migratory sharing for atomic only accessed blocks",
)
parser.add_argument(
"--pf-on", action="store_true", help="Hammer: enable Probe Filter"
)
parser.add_argument(
"--dir-on",
action="store_true",
help="Hammer: enable Full-bit Directory",
)
def create_system(options, full_system, system, dma_ports, bootmem,
ruby_system, cpus):
if buildEnv['PROTOCOL'] != 'MOESI_hammer':
def create_system(
options, full_system, system, dma_ports, bootmem, ruby_system, cpus
):
if buildEnv["PROTOCOL"] != "MOESI_hammer":
panic("This script requires the MOESI_hammer protocol to be built.")
cpu_sequencers = []
@@ -77,31 +94,43 @@ def create_system(options, full_system, system, dma_ports, bootmem,
#
# First create the Ruby objects associated with this cpu
#
l1i_cache = L1Cache(size = options.l1i_size,
assoc = options.l1i_assoc,
start_index_bit = block_size_bits,
is_icache = True)
l1d_cache = L1Cache(size = options.l1d_size,
assoc = options.l1d_assoc,
start_index_bit = block_size_bits)
l2_cache = L2Cache(size = options.l2_size,
assoc = options.l2_assoc,
start_index_bit = block_size_bits)
l1i_cache = L1Cache(
size=options.l1i_size,
assoc=options.l1i_assoc,
start_index_bit=block_size_bits,
is_icache=True,
)
l1d_cache = L1Cache(
size=options.l1d_size,
assoc=options.l1d_assoc,
start_index_bit=block_size_bits,
)
l2_cache = L2Cache(
size=options.l2_size,
assoc=options.l2_assoc,
start_index_bit=block_size_bits,
)
clk_domain = cpus[i].clk_domain
l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache,
L1Dcache=l1d_cache, L2cache=l2_cache,
no_mig_atomic=not \
options.allow_atomic_migration,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system)
l1_cntrl = L1Cache_Controller(
version=i,
L1Icache=l1i_cache,
L1Dcache=l1d_cache,
L2cache=l2_cache,
no_mig_atomic=not options.allow_atomic_migration,
send_evictions=send_evicts(options),
transitions_per_cycle=options.ports,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
cpu_seq = RubySequencer(version=i,
dcache=l1d_cache,clk_domain=clk_domain,
ruby_system=ruby_system)
cpu_seq = RubySequencer(
version=i,
dcache=l1d_cache,
clk_domain=clk_domain,
ruby_system=ruby_system,
)
l1_cntrl.sequencer = cpu_seq
if options.recycle_latency:
@@ -131,7 +160,6 @@ def create_system(options, full_system, system, dma_ports, bootmem,
l1_cntrl.responseToCache = MessageBuffer()
l1_cntrl.responseToCache.in_port = ruby_system.network.out_port
#
# determine size and index bits for probe filter
# By default, the probe filter size is configured to be twice the
@@ -145,7 +173,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
if options.pf_on or options.dir_on:
# if numa high bit explicitly set, make sure it does not overlap
# with the probe filter index
assert(options.numa_high_bit - dir_bits > pf_bits)
assert options.numa_high_bit - dir_bits > pf_bits
# set the probe filter start bit to just above the block offset
pf_start_bit = block_size_bits
@@ -159,17 +187,17 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# the ruby system
# clk_divider value is a fix to pass regression.
ruby_system.memctrl_clk_domain = DerivedClockDomain(
clk_domain=ruby_system.clk_domain,
clk_divider=3)
clk_domain=ruby_system.clk_domain, clk_divider=3
)
mem_dir_cntrl_nodes, rom_dir_cntrl_node = create_directories(
options, bootmem, ruby_system, system)
options, bootmem, ruby_system, system
)
dir_cntrl_nodes = mem_dir_cntrl_nodes[:]
if rom_dir_cntrl_node is not None:
dir_cntrl_nodes.append(rom_dir_cntrl_node)
for dir_cntrl in dir_cntrl_nodes:
pf = ProbeFilter(size = pf_size, assoc = 4,
start_index_bit = pf_start_bit)
pf = ProbeFilter(size=pf_size, assoc=4, start_index_bit=pf_start_bit)
dir_cntrl.probeFilter = pf
dir_cntrl.probe_filter_enabled = options.pf_on
@@ -183,10 +211,10 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.forwardFromDir.out_port = ruby_system.network.in_port
dir_cntrl.responseFromDir = MessageBuffer()
dir_cntrl.responseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered = True)
dir_cntrl.dmaResponseFromDir = MessageBuffer(ordered=True)
dir_cntrl.dmaResponseFromDir.out_port = ruby_system.network.in_port
dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
dir_cntrl.triggerQueue = MessageBuffer(ordered=True)
dir_cntrl.unblockToDir = MessageBuffer()
dir_cntrl.unblockToDir.in_port = ruby_system.network.out_port
@@ -194,24 +222,25 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dir_cntrl.responseToDir.in_port = ruby_system.network.out_port
dir_cntrl.requestToDir = MessageBuffer()
dir_cntrl.requestToDir.in_port = ruby_system.network.out_port
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered = True)
dir_cntrl.dmaRequestToDir = MessageBuffer(ordered=True)
dir_cntrl.dmaRequestToDir.in_port = ruby_system.network.out_port
dir_cntrl.requestToMemory = MessageBuffer()
dir_cntrl.responseFromMemory = MessageBuffer()
for i, dma_port in enumerate(dma_ports):
#
# Create the Ruby objects associated with the dma controller
#
dma_seq = DMASequencer(version = i,
ruby_system = ruby_system,
in_ports = dma_port)
dma_seq = DMASequencer(
version=i, ruby_system=ruby_system, in_ports=dma_port
)
dma_cntrl = DMA_Controller(version = i,
dma_sequencer = dma_seq,
transitions_per_cycle = options.ports,
ruby_system = ruby_system)
dma_cntrl = DMA_Controller(
version=i,
dma_sequencer=dma_seq,
transitions_per_cycle=options.ports,
ruby_system=ruby_system,
)
exec("ruby_system.dma_cntrl%d = dma_cntrl" % i)
dma_cntrl_nodes.append(dma_cntrl)
@@ -220,7 +249,7 @@ def create_system(options, full_system, system, dma_ports, bootmem,
dma_cntrl.recycle_latency = options.recycle_latency
# Connect the dma controller to the network
dma_cntrl.responseFromDir = MessageBuffer(ordered = True)
dma_cntrl.responseFromDir = MessageBuffer(ordered=True)
dma_cntrl.responseFromDir.in_port = ruby_system.network.out_port
dma_cntrl.requestToDir = MessageBuffer()
dma_cntrl.requestToDir.out_port = ruby_system.network.in_port
@@ -232,13 +261,15 @@ def create_system(options, full_system, system, dma_ports, bootmem,
if full_system:
io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system)
ruby_system._io_port = io_seq
io_controller = DMA_Controller(version = len(dma_ports),
dma_sequencer = io_seq,
ruby_system = ruby_system)
io_controller = DMA_Controller(
version=len(dma_ports),
dma_sequencer=io_seq,
ruby_system=ruby_system,
)
ruby_system.io_controller = io_controller
# Connect the dma controller to the network
io_controller.responseFromDir = MessageBuffer(ordered = True)
io_controller.responseFromDir = MessageBuffer(ordered=True)
io_controller.responseFromDir.in_port = ruby_system.network.out_port
io_controller.requestToDir = MessageBuffer()
io_controller.requestToDir.out_port = ruby_system.network.in_port
@@ -248,30 +279,38 @@ def create_system(options, full_system, system, dma_ports, bootmem,
# Register configuration with filesystem
else:
for i in range(options.num_cpus):
FileSystemConfig.register_cpu(physical_package_id = 0,
core_siblings = [],
core_id = i,
thread_siblings = [])
FileSystemConfig.register_cpu(
physical_package_id=0,
core_siblings=[],
core_id=i,
thread_siblings=[],
)
FileSystemConfig.register_cache(level = 1,
idu_type = 'Instruction',
size = options.l1i_size,
line_size = options.cacheline_size,
assoc = options.l1i_assoc,
cpus = [i])
FileSystemConfig.register_cache(level = 1,
idu_type = 'Data',
size = options.l1d_size,
line_size = options.cacheline_size,
assoc = options.l1d_assoc,
cpus = [i])
FileSystemConfig.register_cache(
level=1,
idu_type="Instruction",
size=options.l1i_size,
line_size=options.cacheline_size,
assoc=options.l1i_assoc,
cpus=[i],
)
FileSystemConfig.register_cache(
level=1,
idu_type="Data",
size=options.l1d_size,
line_size=options.cacheline_size,
assoc=options.l1d_assoc,
cpus=[i],
)
FileSystemConfig.register_cache(level = 2,
idu_type = 'Unified',
size = options.l2_size,
line_size = options.cacheline_size,
assoc = options.l2_assoc,
cpus = [i])
FileSystemConfig.register_cache(
level=2,
idu_type="Unified",
size=options.l2_size,
line_size=options.cacheline_size,
assoc=options.l2_assoc,
cpus=[i],
)
ruby_system.network.number_of_virtual_networks = 6
topology = create_topology(all_cntrls, options)

View File

@@ -43,7 +43,7 @@ from m5.objects import *
from m5.defines import buildEnv
from m5.util import addToPath, fatal
addToPath('../')
addToPath("../")
from common import ObjectList
from common import MemConfig
@@ -52,57 +52,82 @@ from common import FileSystemConfig
from topologies import *
from network import Network
def define_options(parser):
# By default, ruby uses the simple timing cpu
parser.set_defaults(cpu_type="TimingSimpleCPU")
parser.add_argument(
"--ruby-clock", action="store", type=str,
default='2GHz',
help="Clock for blocks running at Ruby system's speed")
"--ruby-clock",
action="store",
type=str,
default="2GHz",
help="Clock for blocks running at Ruby system's speed",
)
parser.add_argument(
"--access-backing-store", action="store_true", default=False,
help="Should ruby maintain a second copy of memory")
"--access-backing-store",
action="store_true",
default=False,
help="Should ruby maintain a second copy of memory",
)
# Options related to cache structure
parser.add_argument(
"--ports", action="store", type=int, default=4,
"--ports",
action="store",
type=int,
default=4,
help="used of transitions per cycle which is a proxy \
for the number of ports.")
for the number of ports.",
)
# network options are in network/Network.py
# ruby mapping options
parser.add_argument(
"--numa-high-bit", type=int, default=0,
"--numa-high-bit",
type=int,
default=0,
help="high order address bit to use for numa mapping. "
"0 = highest bit, not specified = lowest bit")
"0 = highest bit, not specified = lowest bit",
)
parser.add_argument(
"--interleaving-bits", type=int, default=0,
help="number of bits to specify interleaving " \
"in directory, memory controllers and caches. "
"0 = not specified")
"--interleaving-bits",
type=int,
default=0,
help="number of bits to specify interleaving "
"in directory, memory controllers and caches. "
"0 = not specified",
)
parser.add_argument(
"--xor-low-bit", type=int, default=20,
help="hashing bit for channel selection" \
"see MemConfig for explanation of the default"\
"parameter. If set to 0, xor_high_bit is also"\
"set to 0.")
"--xor-low-bit",
type=int,
default=20,
help="hashing bit for channel selection"
"see MemConfig for explanation of the default"
"parameter. If set to 0, xor_high_bit is also"
"set to 0.",
)
parser.add_argument(
"--recycle-latency", type=int, default=10,
help="Recycle latency for ruby controller input buffers")
"--recycle-latency",
type=int,
default=10,
help="Recycle latency for ruby controller input buffers",
)
protocol = buildEnv['PROTOCOL']
protocol = buildEnv["PROTOCOL"]
exec("from . import %s" % protocol)
eval("%s.define_options(parser)" % protocol)
Network.define_options(parser)
def setup_memory_controllers(system, ruby, dir_cntrls, options):
if (options.numa_high_bit):
block_size_bits = options.numa_high_bit + 1 - \
int(math.log(options.num_dirs, 2))
if options.numa_high_bit:
block_size_bits = (
options.numa_high_bit + 1 - int(math.log(options.num_dirs, 2))
)
ruby.block_size_bytes = 2 ** (block_size_bits)
else:
ruby.block_size_bytes = options.cacheline_size
@@ -135,16 +160,21 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options):
dir_ranges = []
for r in system.mem_ranges:
mem_type = ObjectList.mem_list.get(options.mem_type)
dram_intf = MemConfig.create_mem_intf(mem_type, r, index,
dram_intf = MemConfig.create_mem_intf(
mem_type,
r,
index,
int(math.log(options.num_dirs, 2)),
intlv_size, options.xor_low_bit)
intlv_size,
options.xor_low_bit,
)
if issubclass(mem_type, DRAMInterface):
mem_ctrl = m5.objects.MemCtrl(dram = dram_intf)
mem_ctrl = m5.objects.MemCtrl(dram=dram_intf)
else:
mem_ctrl = dram_intf
if options.access_backing_store:
dram_intf.kvm_map=False
dram_intf.kvm_map = False
mem_ctrls.append(mem_ctrl)
dir_ranges.append(dram_intf.range)
@@ -156,8 +186,9 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options):
# Enable low-power DRAM states if option is set
if issubclass(mem_type, DRAMInterface):
mem_ctrl.dram.enable_dram_powerdown = \
options.enable_dram_powerdown
mem_ctrl.dram.enable_dram_powerdown = (
options.enable_dram_powerdown
)
index += 1
dir_cntrl.addr_ranges = dir_ranges
@@ -178,8 +209,16 @@ def create_topology(controllers, options):
topology = eval("Topo.%s(controllers)" % options.topology)
return topology
def create_system(options, full_system, system, piobus = None, dma_ports = [],
bootmem=None, cpus=None):
def create_system(
options,
full_system,
system,
piobus=None,
dma_ports=[],
bootmem=None,
cpus=None,
):
system.ruby = RubySystem()
ruby = system.ruby
@@ -188,40 +227,46 @@ def create_system(options, full_system, system, piobus = None, dma_ports = [],
FileSystemConfig.config_filesystem(system, options)
# Create the network object
(network, IntLinkClass, ExtLinkClass, RouterClass, InterfaceClass) = \
Network.create_network(options, ruby)
(
network,
IntLinkClass,
ExtLinkClass,
RouterClass,
InterfaceClass,
) = Network.create_network(options, ruby)
ruby.network = network
if cpus is None:
cpus = system.cpu
protocol = buildEnv['PROTOCOL']
protocol = buildEnv["PROTOCOL"]
exec("from . import %s" % protocol)
try:
(cpu_sequencers, dir_cntrls, topology) = \
eval("%s.create_system(options, full_system, system, dma_ports,\
(cpu_sequencers, dir_cntrls, topology) = eval(
"%s.create_system(options, full_system, system, dma_ports,\
bootmem, ruby, cpus)"
% protocol)
% protocol
)
except:
print("Error: could not create sytem for ruby protocol %s" % protocol)
raise
# Create the network topology
topology.makeTopology(options, network, IntLinkClass, ExtLinkClass,
RouterClass)
topology.makeTopology(
options, network, IntLinkClass, ExtLinkClass, RouterClass
)
# Register the topology elements with faux filesystem (SE mode only)
if not full_system:
topology.registerTopology(options)
# Initialize network based on topology
Network.init_network(options, network, InterfaceClass)
# Create a port proxy for connecting the system port. This is
# independent of the protocol and kept in the protocol-agnostic
# part (i.e. here).
sys_port_proxy = RubyPortProxy(ruby_system = ruby)
sys_port_proxy = RubyPortProxy(ruby_system=ruby)
if piobus is not None:
sys_port_proxy.pio_request_port = piobus.cpu_side_ports
@@ -246,8 +291,10 @@ def create_system(options, full_system, system, piobus = None, dma_ports = [],
# Create a backing copy of physical memory in case required
if options.access_backing_store:
ruby.access_backing_store = True
ruby.phys_mem = SimpleMemory(range=system.mem_ranges[0],
in_addr_map=False)
ruby.phys_mem = SimpleMemory(
range=system.mem_ranges[0], in_addr_map=False
)
def create_directories(options, bootmem, ruby_system, system):
dir_cntrl_nodes = []
@@ -271,12 +318,15 @@ def create_directories(options, bootmem, ruby_system, system):
return (dir_cntrl_nodes, None)
def send_evicts(options):
# currently, 2 scenarios warrant forwarding evictions to the CPU:
# 1. The O3 model must keep the LSQ coherent with the caches
# 2. The x86 mwait instruction is built on top of coherence invalidations
# 3. The local exclusive monitor in ARM systems
if options.cpu_type == "DerivO3CPU" or \
buildEnv['TARGET_ISA'] in ('x86', 'arm'):
if options.cpu_type == "DerivO3CPU" or buildEnv["TARGET_ISA"] in (
"x86",
"arm",
):
return True
return False