configs: Add an example elastic trace generation script (#415)
Current [TraceCPU documentation](https://www.gem5.org/documentation/general_docs/cpu_models/TraceCPU) still references the deprecated **se.py/fs.py** scripts for elastic trace generation (script paths are also outdated). With this PR we provide a simpler Arm based elastic trace generation script that can be used out of the box by a user or that can be extended as needed.
This commit is contained in:
58
configs/common/cores/arm/O3_ARM_Etrace.py
Normal file
58
configs/common/cores/arm/O3_ARM_Etrace.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright (c) 2012, 2017-2018, 2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
# not be construed as granting a license to any other intellectual
|
||||
# property including but not limited to intellectual property relating
|
||||
# to a hardware implementation of the functionality of the software
|
||||
# licensed hereunder. You may use the software subject to the license
|
||||
# terms below provided that you ensure that this notice is replicated
|
||||
# unmodified and in its entirety in all distributions of the software,
|
||||
# modified or unmodified, in source code or in binary form.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.objects import *
|
||||
from .O3_ARM_v7a import O3_ARM_v7a_3
|
||||
|
||||
# O3_ARM_v7a_3 adapted to generate elastic traces
|
||||
class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3):
|
||||
# Make the number of entries in the ROB, LQ and SQ very
|
||||
# large so that there are no stalls due to resource
|
||||
# limitation as such stalls will get captured in the trace
|
||||
# as compute delay. For replay, ROB, LQ and SQ sizes are
|
||||
# modelled in the Trace CPU.
|
||||
numROBEntries = 512
|
||||
LQEntries = 128
|
||||
SQEntries = 128
|
||||
|
||||
def attach_probe_listener(self, inst_trace_file, data_trace_file):
|
||||
# Attach the elastic trace probe listener. Set the protobuf trace
|
||||
# file names. Set the dependency window size equal to the cpu it
|
||||
# is attached to.
|
||||
self.traceListener = m5.objects.ElasticTrace(
|
||||
instFetchTraceFile=inst_trace_file,
|
||||
dataDepTraceFile=data_trace_file,
|
||||
depWindowSize=3 * self.numROBEntries,
|
||||
)
|
||||
@@ -338,56 +338,15 @@ class FastmodelCluster(CpuCluster):
|
||||
pass
|
||||
|
||||
|
||||
class BaseSimpleSystem(ArmSystem):
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, mem_size, platform, **kwargs):
|
||||
super(BaseSimpleSystem, self).__init__(**kwargs)
|
||||
|
||||
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=Parent.voltage_domain
|
||||
)
|
||||
|
||||
if platform is None:
|
||||
self.realview = VExpress_GEM5_V1()
|
||||
else:
|
||||
self.realview = platform
|
||||
|
||||
if hasattr(self.realview.gic, "cpu_addr"):
|
||||
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
||||
|
||||
self.terminal = Terminal()
|
||||
self.vncserver = VncServer()
|
||||
|
||||
self.iobus = IOXBar()
|
||||
# Device DMA -> MEM
|
||||
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
|
||||
class ClusterSystem:
|
||||
"""
|
||||
Base class providing cpu clusters generation/handling methods to
|
||||
SE/FS systems
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._clusters = []
|
||||
|
||||
def getMemRanges(self, mem_size):
|
||||
"""
|
||||
Define system memory ranges. This depends on the physical
|
||||
memory map provided by the realview platform and by the memory
|
||||
size provided by the user (mem_size argument).
|
||||
The method is iterating over all platform ranges until they cover
|
||||
the entire user's memory requirements.
|
||||
"""
|
||||
mem_ranges = []
|
||||
for mem_range in self.realview._mem_regions:
|
||||
size_in_range = min(mem_size, mem_range.size())
|
||||
|
||||
mem_ranges.append(
|
||||
AddrRange(start=mem_range.start, size=size_in_range)
|
||||
)
|
||||
|
||||
mem_size -= size_in_range
|
||||
if mem_size == 0:
|
||||
return mem_ranges
|
||||
|
||||
raise ValueError("memory size too big for platform capabilities")
|
||||
|
||||
def numCpuClusters(self):
|
||||
return len(self._clusters)
|
||||
|
||||
@@ -423,6 +382,80 @@ class BaseSimpleSystem(ArmSystem):
|
||||
cluster.connectMemSide(cluster_mem_bus)
|
||||
|
||||
|
||||
class SimpleSeSystem(System, ClusterSystem):
|
||||
"""
|
||||
Example system class for syscall emulation mode
|
||||
"""
|
||||
|
||||
# Use a fixed cache line size of 64 bytes
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
System.__init__(self, **kwargs)
|
||||
ClusterSystem.__init__(self, **kwargs)
|
||||
# Create a voltage and clock domain for system components
|
||||
self.voltage_domain = VoltageDomain(voltage="3.3V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=self.voltage_domain
|
||||
)
|
||||
|
||||
# Create the off-chip memory bus.
|
||||
self.membus = SystemXBar()
|
||||
|
||||
def connect(self):
|
||||
self.system_port = self.membus.cpu_side_ports
|
||||
|
||||
|
||||
class BaseSimpleSystem(ArmSystem, ClusterSystem):
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, mem_size, platform, **kwargs):
|
||||
ArmSystem.__init__(self, **kwargs)
|
||||
ClusterSystem.__init__(self, **kwargs)
|
||||
|
||||
self.voltage_domain = VoltageDomain(voltage="1.0V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=Parent.voltage_domain
|
||||
)
|
||||
|
||||
if platform is None:
|
||||
self.realview = VExpress_GEM5_V1()
|
||||
else:
|
||||
self.realview = platform
|
||||
|
||||
if hasattr(self.realview.gic, "cpu_addr"):
|
||||
self.gic_cpu_addr = self.realview.gic.cpu_addr
|
||||
|
||||
self.terminal = Terminal()
|
||||
self.vncserver = VncServer()
|
||||
|
||||
self.iobus = IOXBar()
|
||||
# Device DMA -> MEM
|
||||
self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
|
||||
|
||||
def getMemRanges(self, mem_size):
|
||||
"""
|
||||
Define system memory ranges. This depends on the physical
|
||||
memory map provided by the realview platform and by the memory
|
||||
size provided by the user (mem_size argument).
|
||||
The method is iterating over all platform ranges until they cover
|
||||
the entire user's memory requirements.
|
||||
"""
|
||||
mem_ranges = []
|
||||
for mem_range in self.realview._mem_regions:
|
||||
size_in_range = min(mem_size, mem_range.size())
|
||||
|
||||
mem_ranges.append(
|
||||
AddrRange(start=mem_range.start, size=size_in_range)
|
||||
)
|
||||
|
||||
mem_size -= size_in_range
|
||||
if mem_size == 0:
|
||||
return mem_ranges
|
||||
|
||||
raise ValueError("memory size too big for platform capabilities")
|
||||
|
||||
|
||||
class SimpleSystem(BaseSimpleSystem):
|
||||
"""
|
||||
Meant to be used with the classic memory model
|
||||
|
||||
191
configs/example/arm/etrace_se.py
Normal file
191
configs/example/arm/etrace_se.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# Copyright (c) 2016-2017, 2022-2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
# not be construed as granting a license to any other intellectual
|
||||
# property including but not limited to intellectual property relating
|
||||
# to a hardware implementation of the functionality of the software
|
||||
# licensed hereunder. You may use the software subject to the license
|
||||
# terms below provided that you ensure that this notice is replicated
|
||||
# unmodified and in its entirety in all distributions of the software,
|
||||
# modified or unmodified, in source code or in binary form.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
import os
|
||||
import m5
|
||||
from m5.util import addToPath
|
||||
from m5.objects import *
|
||||
import argparse
|
||||
import shlex
|
||||
|
||||
m5.util.addToPath("../..")
|
||||
|
||||
from common import ObjectList
|
||||
|
||||
import devices
|
||||
|
||||
|
||||
def get_processes(cmd):
|
||||
"""Interprets commands to run and returns a list of processes"""
|
||||
|
||||
cwd = os.getcwd()
|
||||
multiprocesses = []
|
||||
for idx, c in enumerate(cmd):
|
||||
argv = shlex.split(c)
|
||||
|
||||
process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0])
|
||||
process.gid = os.getgid()
|
||||
|
||||
print("info: %d. command and arguments: %s" % (idx + 1, process.cmd))
|
||||
multiprocesses.append(process)
|
||||
|
||||
return multiprocesses
|
||||
|
||||
|
||||
def create(args):
|
||||
"""Create and configure the system object."""
|
||||
|
||||
system = devices.SimpleSeSystem(
|
||||
mem_mode="timing",
|
||||
)
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
system.cpu_cluster = devices.ArmCpuCluster(
|
||||
system,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"),
|
||||
devices.L1I,
|
||||
devices.L1D,
|
||||
devices.L2,
|
||||
)
|
||||
|
||||
# Attach the elastic trace probe listener to every CPU in the cluster
|
||||
for cpu in system.cpu_cluster:
|
||||
cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file)
|
||||
|
||||
# As elastic trace generation is enabled, make sure the memory system is
|
||||
# minimal so that compute delays do not include memory access latencies.
|
||||
# Configure the compulsory L1 caches for the O3CPU, do not configure
|
||||
# any more caches.
|
||||
system.addCaches(True, last_cache_level=1)
|
||||
|
||||
# For elastic trace, over-riding Simple Memory latency to 1ns."
|
||||
system.memory = SimpleMemory(
|
||||
range=AddrRange(start=0, size=args.mem_size),
|
||||
latency="1ns",
|
||||
port=system.membus.mem_side_ports,
|
||||
)
|
||||
|
||||
# Parse the command line and get a list of Processes instances
|
||||
# that we can pass to gem5.
|
||||
processes = get_processes(args.commands_to_run)
|
||||
if len(processes) != args.num_cores:
|
||||
print(
|
||||
"Error: Cannot map %d command(s) onto %d CPU(s)"
|
||||
% (len(processes), args.num_cores)
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
system.workload = SEWorkload.init_compatible(processes[0].executable)
|
||||
|
||||
# Assign one workload to each CPU
|
||||
for cpu, workload in zip(system.cpu_cluster.cpus, processes):
|
||||
cpu.workload = workload
|
||||
|
||||
return system
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(epilog=__doc__)
|
||||
|
||||
parser.add_argument(
|
||||
"commands_to_run",
|
||||
metavar="command(s)",
|
||||
nargs="+",
|
||||
help="Command(s) to run",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--inst-trace-file",
|
||||
action="store",
|
||||
type=str,
|
||||
help="""Instruction fetch trace file input to
|
||||
Elastic Trace probe in a capture simulation and
|
||||
Trace CPU in a replay simulation""",
|
||||
default="fetchtrace.proto.gz",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--data-trace-file",
|
||||
action="store",
|
||||
type=str,
|
||||
help="""Data dependency trace file input to
|
||||
Elastic Trace probe in a capture simulation and
|
||||
Trace CPU in a replay simulation""",
|
||||
default="deptrace.proto.gz",
|
||||
)
|
||||
parser.add_argument("--cpu-freq", type=str, default="4GHz")
|
||||
parser.add_argument(
|
||||
"--num-cores", type=int, default=1, help="Number of CPU cores"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mem-size",
|
||||
action="store",
|
||||
type=str,
|
||||
default="2GB",
|
||||
help="Specify the physical memory size",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create a single root node for gem5's object hierarchy. There can
|
||||
# only exist one root node in the simulator at any given
|
||||
# time. Tell gem5 that we want to use syscall emulation mode
|
||||
# instead of full system mode.
|
||||
root = Root(full_system=False)
|
||||
|
||||
# Populate the root node with a system. A system corresponds to a
|
||||
# single node with shared memory.
|
||||
root.system = create(args)
|
||||
|
||||
# Instantiate the C++ object hierarchy. After this point,
|
||||
# SimObjects can't be instantiated anymore.
|
||||
m5.instantiate()
|
||||
|
||||
# Start the simulator. This gives control to the C++ world and
|
||||
# starts the simulator. The returned event tells the simulation
|
||||
# script why the simulator exited.
|
||||
event = m5.simulate()
|
||||
|
||||
# Print the reason for the simulation exit. Some exit codes are
|
||||
# requests for service (e.g., checkpoints) from the simulation
|
||||
# script. We'll just ignore them here and exit.
|
||||
print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}")
|
||||
|
||||
|
||||
if __name__ == "__m5_main__":
|
||||
main()
|
||||
@@ -64,72 +64,6 @@ cpu_types = {
|
||||
}
|
||||
|
||||
|
||||
class SimpleSeSystem(System):
|
||||
"""
|
||||
Example system class for syscall emulation mode
|
||||
"""
|
||||
|
||||
# Use a fixed cache line size of 64 bytes
|
||||
cache_line_size = 64
|
||||
|
||||
def __init__(self, args, **kwargs):
|
||||
super(SimpleSeSystem, self).__init__(**kwargs)
|
||||
|
||||
# Setup book keeping to be able to use CpuClusters from the
|
||||
# devices module.
|
||||
self._clusters = []
|
||||
self._num_cpus = 0
|
||||
|
||||
# Create a voltage and clock domain for system components
|
||||
self.voltage_domain = VoltageDomain(voltage="3.3V")
|
||||
self.clk_domain = SrcClockDomain(
|
||||
clock="1GHz", voltage_domain=self.voltage_domain
|
||||
)
|
||||
|
||||
# Create the off-chip memory bus.
|
||||
self.membus = SystemXBar()
|
||||
|
||||
# Wire up the system port that gem5 uses to load the kernel
|
||||
# and to perform debug accesses.
|
||||
self.system_port = self.membus.cpu_side_ports
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
self.cpu_cluster = devices.ArmCpuCluster(
|
||||
self,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
*cpu_types[args.cpu],
|
||||
tarmac_gen=args.tarmac_gen,
|
||||
tarmac_dest=args.tarmac_dest,
|
||||
)
|
||||
|
||||
# Create a cache hierarchy (unless we are simulating a
|
||||
# functional CPU in atomic memory mode) for the CPU cluster
|
||||
# and connect it to the shared memory bus.
|
||||
if self.cpu_cluster.memory_mode() == "timing":
|
||||
self.cpu_cluster.addL1()
|
||||
self.cpu_cluster.addL2(self.cpu_cluster.clk_domain)
|
||||
self.cpu_cluster.connectMemSide(self.membus)
|
||||
|
||||
# Tell gem5 about the memory mode used by the CPUs we are
|
||||
# simulating.
|
||||
self.mem_mode = self.cpu_cluster.memory_mode()
|
||||
|
||||
def numCpuClusters(self):
|
||||
return len(self._clusters)
|
||||
|
||||
def addCpuCluster(self, cpu_cluster):
|
||||
assert cpu_cluster not in self._clusters
|
||||
assert len(cpu_cluster) > 0
|
||||
self._clusters.append(cpu_cluster)
|
||||
self._num_cpus += len(cpu_cluster)
|
||||
|
||||
def numCpus(self):
|
||||
return self._num_cpus
|
||||
|
||||
|
||||
def get_processes(cmd):
|
||||
"""Interprets commands to run and returns a list of processes"""
|
||||
|
||||
@@ -150,7 +84,31 @@ def get_processes(cmd):
|
||||
def create(args):
|
||||
"""Create and configure the system object."""
|
||||
|
||||
system = SimpleSeSystem(args)
|
||||
cpu_class = cpu_types[args.cpu][0]
|
||||
mem_mode = cpu_class.memory_mode()
|
||||
# Only simulate caches when using a timing CPU (e.g., the HPI model)
|
||||
want_caches = True if mem_mode == "timing" else False
|
||||
|
||||
system = devices.SimpleSeSystem(
|
||||
mem_mode=mem_mode,
|
||||
)
|
||||
|
||||
# Add CPUs to the system. A cluster of CPUs typically have
|
||||
# private L1 caches and a shared L2 cache.
|
||||
system.cpu_cluster = devices.ArmCpuCluster(
|
||||
system,
|
||||
args.num_cores,
|
||||
args.cpu_freq,
|
||||
"1.2V",
|
||||
*cpu_types[args.cpu],
|
||||
tarmac_gen=args.tarmac_gen,
|
||||
tarmac_dest=args.tarmac_dest,
|
||||
)
|
||||
|
||||
# Create a cache hierarchy for the cluster. We are assuming that
|
||||
# clusters have core-private L1 caches and an L2 that's shared
|
||||
# within the cluster.
|
||||
system.addCaches(want_caches, last_cache_level=2)
|
||||
|
||||
# Tell components about the expected physical memory ranges. This
|
||||
# is, for example, used by the MemConfig helper to determine where
|
||||
@@ -160,6 +118,9 @@ def create(args):
|
||||
# Configure the off-chip memory system.
|
||||
MemConfig.config_mem(args, system)
|
||||
|
||||
# Wire up the system's memory system
|
||||
system.connect()
|
||||
|
||||
# Parse the command line and get a list of Processes instances
|
||||
# that we can pass to gem5.
|
||||
processes = get_processes(args.commands_to_run)
|
||||
|
||||
Reference in New Issue
Block a user