This commit contains the rest of the base 2 vs base 10 cache/memory size clarifications. It also changes the warning message to use warn(). With these changes, the warning message should now no longer show up during a fresh compilation of gem5. Change-Id: Ia63f841bdf045b76473437f41548fab27dc19631
523 lines
16 KiB
Python
523 lines
16 KiB
Python
# Copyright (c) 2016-2017, 2019, 2021-2023 Arm Limited
|
|
# All rights reserved.
|
|
#
|
|
# The license below extends only to copyright in the software and shall
|
|
# not be construed as granting a license to any other intellectual
|
|
# property including but not limited to intellectual property relating
|
|
# to a hardware implementation of the functionality of the software
|
|
# licensed hereunder. You may use the software subject to the license
|
|
# terms below provided that you ensure that this notice is replicated
|
|
# unmodified and in its entirety in all distributions of the software,
|
|
# modified or unmodified, in source code or in binary form.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met: redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer;
|
|
# redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution;
|
|
# neither the name of the copyright holders nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
# System components used by the bigLITTLE.py configuration script
|
|
|
|
import m5
|
|
from m5.objects import *
|
|
|
|
m5.util.addToPath("../../")
|
|
from common import ObjectList
|
|
from common.Caches import *
|
|
|
|
have_kvm = "ArmV8KvmCPU" in ObjectList.cpu_list.get_names()
|
|
have_fastmodel = "FastModelCortexA76" in ObjectList.cpu_list.get_names()
|
|
|
|
|
|
class L1I(L1_ICache):
|
|
tag_latency = 1
|
|
data_latency = 1
|
|
response_latency = 1
|
|
mshrs = 4
|
|
tgts_per_mshr = 8
|
|
size = "48KiB"
|
|
assoc = 3
|
|
|
|
|
|
class L1D(L1_DCache):
|
|
tag_latency = 2
|
|
data_latency = 2
|
|
response_latency = 1
|
|
mshrs = 16
|
|
tgts_per_mshr = 16
|
|
size = "32KiB"
|
|
assoc = 2
|
|
write_buffers = 16
|
|
|
|
|
|
class L2(L2Cache):
|
|
tag_latency = 12
|
|
data_latency = 12
|
|
response_latency = 5
|
|
mshrs = 32
|
|
tgts_per_mshr = 8
|
|
size = "1MiB"
|
|
assoc = 16
|
|
write_buffers = 8
|
|
clusivity = "mostly_excl"
|
|
|
|
|
|
class L3(Cache):
|
|
size = "16MiB"
|
|
assoc = 16
|
|
tag_latency = 20
|
|
data_latency = 20
|
|
response_latency = 20
|
|
mshrs = 20
|
|
tgts_per_mshr = 12
|
|
clusivity = "mostly_excl"
|
|
|
|
|
|
class MemBus(SystemXBar):
|
|
badaddr_responder = BadAddr(warn_access="warn")
|
|
default = Self.badaddr_responder.pio
|
|
|
|
|
|
class ArmCpuCluster(CpuCluster):
|
|
def __init__(
|
|
self,
|
|
system,
|
|
num_cpus,
|
|
cpu_clock,
|
|
cpu_voltage,
|
|
cpu_type,
|
|
l1i_type,
|
|
l1d_type,
|
|
l2_type,
|
|
tarmac_gen=False,
|
|
tarmac_dest=None,
|
|
):
|
|
super().__init__()
|
|
self._cpu_type = cpu_type
|
|
self._l1i_type = l1i_type
|
|
self._l1d_type = l1d_type
|
|
self._l2_type = l2_type
|
|
|
|
assert num_cpus > 0
|
|
|
|
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
|
|
self.clk_domain = SrcClockDomain(
|
|
clock=cpu_clock, voltage_domain=self.voltage_domain
|
|
)
|
|
|
|
self.generate_cpus(cpu_type, num_cpus)
|
|
|
|
for cpu in self.cpus:
|
|
if tarmac_gen:
|
|
cpu.tracer = TarmacTracer()
|
|
if tarmac_dest is not None:
|
|
cpu.tracer.outfile = tarmac_dest
|
|
|
|
system.addCpuCluster(self)
|
|
|
|
def addL1(self):
|
|
for cpu in self.cpus:
|
|
l1i = None if self._l1i_type is None else self._l1i_type()
|
|
l1d = None if self._l1d_type is None else self._l1d_type()
|
|
cpu.addPrivateSplitL1Caches(l1i, l1d)
|
|
|
|
def addL2(self, clk_domain):
|
|
if self._l2_type is None:
|
|
return
|
|
self.toL2Bus = L2XBar(width=64, clk_domain=clk_domain)
|
|
self.l2 = self._l2_type()
|
|
for cpu in self.cpus:
|
|
cpu.connectCachedPorts(self.toL2Bus.cpu_side_ports)
|
|
self.toL2Bus.mem_side_ports = self.l2.cpu_side
|
|
|
|
def addPMUs(
|
|
self,
|
|
ints,
|
|
events=[],
|
|
exit_sim_on_control=False,
|
|
exit_sim_on_interrupt=False,
|
|
):
|
|
"""
|
|
Instantiates 1 ArmPMU per PE. The method is accepting a list of
|
|
interrupt numbers (ints) used by the PMU and a list of events to
|
|
register in it.
|
|
|
|
:param ints: List of interrupt numbers. The code will iterate over
|
|
the cpu list in order and will assign to every cpu in the cluster
|
|
a PMU with the matching interrupt.
|
|
:type ints: List[int]
|
|
:param events: Additional events to be measured by the PMUs
|
|
:type events: List[Union[ProbeEvent, SoftwareIncrement]]
|
|
:param exit_sim_on_control: If true, exit the sim loop when the PMU is
|
|
enabled, disabled, or reset.
|
|
:type exit_on_control: bool
|
|
:param exit_sim_on_interrupt: If true, exit the sim loop when the PMU
|
|
triggers an interrupt.
|
|
:type exit_on_control: bool
|
|
|
|
"""
|
|
assert len(ints) == len(self.cpus)
|
|
for cpu, pint in zip(self.cpus, ints):
|
|
int_cls = ArmPPI if pint < 32 else ArmSPI
|
|
for isa in cpu.isa:
|
|
isa.pmu = ArmPMU(interrupt=int_cls(num=pint))
|
|
isa.pmu.exitOnPMUControl = exit_sim_on_control
|
|
isa.pmu.exitOnPMUInterrupt = exit_sim_on_interrupt
|
|
isa.pmu.addArchEvents(
|
|
cpu=cpu,
|
|
itb=cpu.mmu.itb,
|
|
dtb=cpu.mmu.dtb,
|
|
icache=getattr(cpu, "icache", None),
|
|
dcache=getattr(cpu, "dcache", None),
|
|
l2cache=getattr(self, "l2", None),
|
|
)
|
|
for ev in events:
|
|
isa.pmu.addEvent(ev)
|
|
|
|
def connectMemSide(self, bus):
|
|
try:
|
|
self.l2.mem_side = bus.cpu_side_ports
|
|
except AttributeError:
|
|
for cpu in self.cpus:
|
|
cpu.connectCachedPorts(bus.cpu_side_ports)
|
|
|
|
|
|
class AtomicCluster(ArmCpuCluster):
|
|
def __init__(
|
|
self,
|
|
system,
|
|
num_cpus,
|
|
cpu_clock,
|
|
cpu_voltage="1.0V",
|
|
tarmac_gen=False,
|
|
tarmac_dest=None,
|
|
):
|
|
super().__init__(
|
|
system,
|
|
num_cpus,
|
|
cpu_clock,
|
|
cpu_voltage,
|
|
cpu_type=ObjectList.cpu_list.get("AtomicSimpleCPU"),
|
|
l1i_type=None,
|
|
l1d_type=None,
|
|
l2_type=None,
|
|
tarmac_gen=tarmac_gen,
|
|
tarmac_dest=tarmac_dest,
|
|
)
|
|
|
|
def addL1(self):
|
|
pass
|
|
|
|
|
|
class KvmCluster(ArmCpuCluster):
|
|
def __init__(
|
|
self,
|
|
system,
|
|
num_cpus,
|
|
cpu_clock,
|
|
cpu_voltage="1.0V",
|
|
tarmac_gen=False,
|
|
tarmac_dest=None,
|
|
):
|
|
super().__init__(
|
|
system,
|
|
num_cpus,
|
|
cpu_clock,
|
|
cpu_voltage,
|
|
cpu_type=ObjectList.cpu_list.get("ArmV8KvmCPU"),
|
|
l1i_type=None,
|
|
l1d_type=None,
|
|
l2_type=None,
|
|
tarmac_gen=tarmac_gen,
|
|
tarmac_dest=tarmac_dest,
|
|
)
|
|
|
|
def addL1(self):
|
|
pass
|
|
|
|
|
|
class FastmodelCluster(CpuCluster):
|
|
def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
|
|
super().__init__()
|
|
|
|
# Setup GIC
|
|
gic = system.realview.gic
|
|
gic.sc_gic.cpu_affinities = ",".join(
|
|
["0.0.%d.0" % i for i in range(num_cpus)]
|
|
)
|
|
|
|
# Parse the base address of redistributor.
|
|
redist_base = gic.get_redist_bases()[0]
|
|
redist_frame_size = 0x40000 if gic.sc_gic.has_gicv4_1 else 0x20000
|
|
gic.sc_gic.reg_base_per_redistributor = ",".join(
|
|
[
|
|
"0.0.%d.0=%#x" % (i, redist_base + redist_frame_size * i)
|
|
for i in range(num_cpus)
|
|
]
|
|
)
|
|
|
|
gic_a2t = AmbaToTlmBridge64(amba=gic.amba_m)
|
|
gic_t2g = TlmToGem5Bridge64(
|
|
tlm=gic_a2t.tlm, gem5=system.iobus.cpu_side_ports
|
|
)
|
|
gic_g2t = Gem5ToTlmBridge64(gem5=system.membus.mem_side_ports)
|
|
gic_g2t.addr_ranges = gic.get_addr_ranges()
|
|
gic_t2a = AmbaFromTlmBridge64(tlm=gic_g2t.tlm)
|
|
gic.amba_s = gic_t2a.amba
|
|
|
|
system.gic_hub = SubSystem()
|
|
system.gic_hub.gic_a2t = gic_a2t
|
|
system.gic_hub.gic_t2g = gic_t2g
|
|
system.gic_hub.gic_g2t = gic_g2t
|
|
system.gic_hub.gic_t2a = gic_t2a
|
|
|
|
self.voltage_domain = VoltageDomain(voltage=cpu_voltage)
|
|
self.clk_domain = SrcClockDomain(
|
|
clock=cpu_clock, voltage_domain=self.voltage_domain
|
|
)
|
|
|
|
# Setup CPU
|
|
assert num_cpus <= 4
|
|
CpuClasses = [
|
|
FastModelCortexA76x1,
|
|
FastModelCortexA76x2,
|
|
FastModelCortexA76x3,
|
|
FastModelCortexA76x4,
|
|
]
|
|
CpuClass = CpuClasses[num_cpus - 1]
|
|
|
|
cpu = CpuClass(
|
|
GICDISABLE=False, BROADCASTATOMIC=False, BROADCASTOUTER=False
|
|
)
|
|
for core in cpu.cores:
|
|
core.semihosting_enable = False
|
|
core.RVBARADDR = 0x10
|
|
core.redistributor = gic.redistributor
|
|
core.createThreads()
|
|
core.createInterruptController()
|
|
self.cpus = [cpu]
|
|
|
|
self.cpu_hub = SubSystem()
|
|
a2t = AmbaToTlmBridge64(amba=cpu.amba)
|
|
t2g = TlmToGem5Bridge64(tlm=a2t.tlm, gem5=system.membus.cpu_side_ports)
|
|
self.cpu_hub.a2t = a2t
|
|
self.cpu_hub.t2g = t2g
|
|
|
|
system.addCpuCluster(self)
|
|
|
|
def require_caches(self):
|
|
return False
|
|
|
|
def memory_mode(self):
|
|
return "atomic_noncaching"
|
|
|
|
def addL1(self):
|
|
pass
|
|
|
|
def addL2(self, clk_domain):
|
|
pass
|
|
|
|
def connectMemSide(self, bus):
|
|
pass
|
|
|
|
|
|
class ClusterSystem:
|
|
"""
|
|
Base class providing cpu clusters generation/handling methods to
|
|
SE/FS systems
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
self._clusters = []
|
|
|
|
def numCpuClusters(self):
|
|
return len(self._clusters)
|
|
|
|
def addCpuCluster(self, cpu_cluster):
|
|
self._clusters.append(cpu_cluster)
|
|
|
|
def addCaches(self, need_caches, last_cache_level):
|
|
if not need_caches:
|
|
# connect each cluster to the memory hierarchy
|
|
for cluster in self._clusters:
|
|
cluster.connectMemSide(self.membus)
|
|
return
|
|
|
|
cluster_mem_bus = self.membus
|
|
assert last_cache_level >= 1 and last_cache_level <= 3
|
|
for cluster in self._clusters:
|
|
cluster.addL1()
|
|
if last_cache_level > 1:
|
|
for cluster in self._clusters:
|
|
cluster.addL2(cluster.clk_domain)
|
|
if last_cache_level > 2:
|
|
max_clock_cluster = max(
|
|
self._clusters, key=lambda c: c.clk_domain.clock[0]
|
|
)
|
|
self.l3 = L3(clk_domain=max_clock_cluster.clk_domain)
|
|
self.toL3Bus = L2XBar(width=64)
|
|
self.toL3Bus.mem_side_ports = self.l3.cpu_side
|
|
self.l3.mem_side = self.membus.cpu_side_ports
|
|
cluster_mem_bus = self.toL3Bus
|
|
|
|
# connect each cluster to the memory hierarchy
|
|
for cluster in self._clusters:
|
|
cluster.connectMemSide(cluster_mem_bus)
|
|
|
|
|
|
class SimpleSeSystem(System, ClusterSystem):
|
|
"""
|
|
Example system class for syscall emulation mode
|
|
"""
|
|
|
|
# Use a fixed cache line size of 64 bytes
|
|
cache_line_size = 64
|
|
|
|
def __init__(self, **kwargs):
|
|
System.__init__(self, **kwargs)
|
|
ClusterSystem.__init__(self, **kwargs)
|
|
# Create a voltage and clock domain for system components
|
|
self.voltage_domain = VoltageDomain(voltage="3.3V")
|
|
self.clk_domain = SrcClockDomain(
|
|
clock="1GHz", voltage_domain=self.voltage_domain
|
|
)
|
|
|
|
# Create the off-chip memory bus.
|
|
self.membus = SystemXBar()
|
|
|
|
def connect(self):
|
|
self.system_port = self.membus.cpu_side_ports
|
|
|
|
|
|
class BaseSimpleSystem(ArmSystem, ClusterSystem):
|
|
cache_line_size = 64
|
|
|
|
def __init__(self, mem_size, platform, **kwargs):
|
|
ArmSystem.__init__(self, **kwargs)
|
|
ClusterSystem.__init__(self, **kwargs)
|
|
|
|
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
|
self.clk_domain = SrcClockDomain(
|
|
clock="1GHz", voltage_domain=Parent.voltage_domain
|
|
)
|
|
|
|
if platform is None:
|
|
self.realview = VExpress_GEM5_V1()
|
|
else:
|
|
self.realview = platform
|
|
|
|
if hasattr(self.realview.gic, "cpu_addr"):
|
|
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
|
|
|
self.terminal = Terminal()
|
|
self.vncserver = VncServer()
|
|
|
|
self.iobus = IOXBar()
|
|
# Device DMA -> MEM
|
|
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
|
|
|
|
def getMemRanges(self, mem_size):
|
|
"""
|
|
Define system memory ranges. This depends on the physical
|
|
memory map provided by the realview platform and by the memory
|
|
size provided by the user (mem_size argument).
|
|
The method is iterating over all platform ranges until they cover
|
|
the entire user's memory requirements.
|
|
"""
|
|
mem_ranges = []
|
|
for mem_range in self.realview._mem_regions:
|
|
size_in_range = min(mem_size, mem_range.size())
|
|
|
|
mem_ranges.append(
|
|
AddrRange(start=mem_range.start, size=size_in_range)
|
|
)
|
|
|
|
mem_size -= size_in_range
|
|
if mem_size == 0:
|
|
return mem_ranges
|
|
|
|
raise ValueError("memory size too big for platform capabilities")
|
|
|
|
|
|
class SimpleSystem(BaseSimpleSystem):
|
|
"""
|
|
Meant to be used with the classic memory model
|
|
"""
|
|
|
|
def __init__(self, caches, mem_size, platform=None, **kwargs):
|
|
super().__init__(mem_size, platform, **kwargs)
|
|
|
|
self.membus = MemBus()
|
|
# CPUs->PIO
|
|
self.iobridge = Bridge(delay="50ns")
|
|
|
|
self._caches = caches
|
|
if self._caches:
|
|
self.iocache = IOCache(addr_ranges=self.mem_ranges)
|
|
else:
|
|
self.dmabridge = Bridge(delay="50ns", ranges=self.mem_ranges)
|
|
|
|
def connect(self):
|
|
self.iobridge.mem_side_port = self.iobus.cpu_side_ports
|
|
self.iobridge.cpu_side_port = self.membus.mem_side_ports
|
|
|
|
if self._caches:
|
|
self.iocache.mem_side = self.membus.cpu_side_ports
|
|
self.iocache.cpu_side = self.iobus.mem_side_ports
|
|
else:
|
|
self.dmabridge.mem_side_port = self.membus.cpu_side_ports
|
|
self.dmabridge.cpu_side_port = self.iobus.mem_side_ports
|
|
|
|
if hasattr(self.realview.gic, "cpu_addr"):
|
|
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
|
self.realview.attachOnChipIO(self.membus, self.iobridge)
|
|
self.realview.attachIO(self.iobus)
|
|
self.system_port = self.membus.cpu_side_ports
|
|
|
|
def attach_pci(self, dev):
|
|
self.realview.attachPciDevice(dev, self.iobus)
|
|
|
|
|
|
class ArmRubySystem(BaseSimpleSystem):
|
|
"""
|
|
Meant to be used with ruby
|
|
"""
|
|
|
|
def __init__(self, mem_size, platform=None, **kwargs):
|
|
super().__init__(mem_size, platform, **kwargs)
|
|
self._dma_ports = []
|
|
self._mem_ports = []
|
|
|
|
def connect(self):
|
|
self.realview.attachOnChipIO(
|
|
self.iobus, dma_ports=self._dma_ports, mem_ports=self._mem_ports
|
|
)
|
|
|
|
self.realview.attachIO(self.iobus, dma_ports=self._dma_ports)
|
|
|
|
for cluster in self._clusters:
|
|
for i, cpu in enumerate(cluster.cpus):
|
|
self.ruby._cpu_ports[i].connectCpuPorts(cpu)
|
|
|
|
def attach_pci(self, dev):
|
|
self.realview.attachPciDevice(
|
|
dev, self.iobus, dma_ports=self._dma_ports
|
|
)
|