From 3b8c974456a8fe3a85687c56c31175b394e9dbbb Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 18 Sep 2023 09:50:32 +0100 Subject: [PATCH 1/5] configs: Refactor BaseSimpleSystem in devices.py We define a new parent (ClusterSystem) to model a system with one or more cpu clusters within it. The idea is to make this new base class reusable by SE systems/scripts as well (like starter_se.py) Change-Id: I1398d773813db565f6ad5ce62cb4c022cb12a55a Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper --- configs/example/arm/devices.py | 103 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py index 6c6474ca2b..03d0a84799 100644 --- a/configs/example/arm/devices.py +++ b/configs/example/arm/devices.py @@ -338,56 +338,15 @@ class FastmodelCluster(CpuCluster): pass -class BaseSimpleSystem(ArmSystem): - cache_line_size = 64 - - def __init__(self, mem_size, platform, **kwargs): - super(BaseSimpleSystem, self).__init__(**kwargs) - - self.voltage_domain = VoltageDomain(voltage="1.0V") - self.clk_domain = SrcClockDomain( - clock="1GHz", voltage_domain=Parent.voltage_domain - ) - - if platform is None: - self.realview = VExpress_GEM5_V1() - else: - self.realview = platform - - if hasattr(self.realview.gic, "cpu_addr"): - self.gic_cpu_addr = self.realview.gic.cpu_addr - - self.terminal = Terminal() - self.vncserver = VncServer() - - self.iobus = IOXBar() - # Device DMA -> MEM - self.mem_ranges = self.getMemRanges(int(Addr(mem_size))) +class ClusterSystem: + """ + Base class providing cpu clusters generation/handling methods to + SE/FS systems + """ + def __init__(self, **kwargs): self._clusters = [] - def getMemRanges(self, mem_size): - """ - Define system memory ranges. This depends on the physical - memory map provided by the realview platform and by the memory - size provided by the user (mem_size argument). - The method is iterating over all platform ranges until they cover - the entire user's memory requirements. - """ - mem_ranges = [] - for mem_range in self.realview._mem_regions: - size_in_range = min(mem_size, mem_range.size()) - - mem_ranges.append( - AddrRange(start=mem_range.start, size=size_in_range) - ) - - mem_size -= size_in_range - if mem_size == 0: - return mem_ranges - - raise ValueError("memory size too big for platform capabilities") - def numCpuClusters(self): return len(self._clusters) @@ -423,6 +382,56 @@ class BaseSimpleSystem(ArmSystem): cluster.connectMemSide(cluster_mem_bus) +class BaseSimpleSystem(ArmSystem, ClusterSystem): + cache_line_size = 64 + + def __init__(self, mem_size, platform, **kwargs): + ArmSystem.__init__(self, **kwargs) + ClusterSystem.__init__(self, **kwargs) + + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock="1GHz", voltage_domain=Parent.voltage_domain + ) + + if platform is None: + self.realview = VExpress_GEM5_V1() + else: + self.realview = platform + + if hasattr(self.realview.gic, "cpu_addr"): + self.gic_cpu_addr = self.realview.gic.cpu_addr + + self.terminal = Terminal() + self.vncserver = VncServer() + + self.iobus = IOXBar() + # Device DMA -> MEM + self.mem_ranges = self.getMemRanges(int(Addr(mem_size))) + + def getMemRanges(self, mem_size): + """ + Define system memory ranges. This depends on the physical + memory map provided by the realview platform and by the memory + size provided by the user (mem_size argument). + The method is iterating over all platform ranges until they cover + the entire user's memory requirements. + """ + mem_ranges = [] + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + + mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + return mem_ranges + + raise ValueError("memory size too big for platform capabilities") + + class SimpleSystem(BaseSimpleSystem): """ Meant to be used with the classic memory model From 7395b94c40a1b291e29f96b375448f799b969791 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 18 Sep 2023 09:51:47 +0100 Subject: [PATCH 2/5] configs: Add a SimpleSeSystem class to devices.py Change-Id: I9d120fbaf0c61c5a053163ec1e5f4f93c583df52 Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper --- configs/example/arm/devices.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py index 03d0a84799..7ceb3cd3bf 100644 --- a/configs/example/arm/devices.py +++ b/configs/example/arm/devices.py @@ -382,6 +382,30 @@ class ClusterSystem: cluster.connectMemSide(cluster_mem_bus) +class SimpleSeSystem(System, ClusterSystem): + """ + Example system class for syscall emulation mode + """ + + # Use a fixed cache line size of 64 bytes + cache_line_size = 64 + + def __init__(self, **kwargs): + System.__init__(self, **kwargs) + ClusterSystem.__init__(self, **kwargs) + # Create a voltage and clock domain for system components + self.voltage_domain = VoltageDomain(voltage="3.3V") + self.clk_domain = SrcClockDomain( + clock="1GHz", voltage_domain=self.voltage_domain + ) + + # Create the off-chip memory bus. + self.membus = SystemXBar() + + def connect(self): + self.system_port = self.membus.cpu_side_ports + + class BaseSimpleSystem(ArmSystem, ClusterSystem): cache_line_size = 64 From e35e2966c04f971326b76a630bef5c321cabcb57 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 18 Sep 2023 09:52:05 +0100 Subject: [PATCH 3/5] configs: Use devices.SimpleSeSystem in starter_se.py Change-Id: I742e280e7a2a4047ac4bb3d783a28ee97f461480 Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper --- configs/example/arm/starter_se.py | 95 +++++++++---------------------- 1 file changed, 28 insertions(+), 67 deletions(-) diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py index f21f399675..9834487155 100644 --- a/configs/example/arm/starter_se.py +++ b/configs/example/arm/starter_se.py @@ -64,72 +64,6 @@ cpu_types = { } -class SimpleSeSystem(System): - """ - Example system class for syscall emulation mode - """ - - # Use a fixed cache line size of 64 bytes - cache_line_size = 64 - - def __init__(self, args, **kwargs): - super(SimpleSeSystem, self).__init__(**kwargs) - - # Setup book keeping to be able to use CpuClusters from the - # devices module. - self._clusters = [] - self._num_cpus = 0 - - # Create a voltage and clock domain for system components - self.voltage_domain = VoltageDomain(voltage="3.3V") - self.clk_domain = SrcClockDomain( - clock="1GHz", voltage_domain=self.voltage_domain - ) - - # Create the off-chip memory bus. - self.membus = SystemXBar() - - # Wire up the system port that gem5 uses to load the kernel - # and to perform debug accesses. - self.system_port = self.membus.cpu_side_ports - - # Add CPUs to the system. A cluster of CPUs typically have - # private L1 caches and a shared L2 cache. - self.cpu_cluster = devices.ArmCpuCluster( - self, - args.num_cores, - args.cpu_freq, - "1.2V", - *cpu_types[args.cpu], - tarmac_gen=args.tarmac_gen, - tarmac_dest=args.tarmac_dest, - ) - - # Create a cache hierarchy (unless we are simulating a - # functional CPU in atomic memory mode) for the CPU cluster - # and connect it to the shared memory bus. - if self.cpu_cluster.memory_mode() == "timing": - self.cpu_cluster.addL1() - self.cpu_cluster.addL2(self.cpu_cluster.clk_domain) - self.cpu_cluster.connectMemSide(self.membus) - - # Tell gem5 about the memory mode used by the CPUs we are - # simulating. - self.mem_mode = self.cpu_cluster.memory_mode() - - def numCpuClusters(self): - return len(self._clusters) - - def addCpuCluster(self, cpu_cluster): - assert cpu_cluster not in self._clusters - assert len(cpu_cluster) > 0 - self._clusters.append(cpu_cluster) - self._num_cpus += len(cpu_cluster) - - def numCpus(self): - return self._num_cpus - - def get_processes(cmd): """Interprets commands to run and returns a list of processes""" @@ -150,7 +84,31 @@ def get_processes(cmd): def create(args): """Create and configure the system object.""" - system = SimpleSeSystem(args) + cpu_class = cpu_types[args.cpu][0] + mem_mode = cpu_class.memory_mode() + # Only simulate caches when using a timing CPU (e.g., the HPI model) + want_caches = True if mem_mode == "timing" else False + + system = devices.SimpleSeSystem( + mem_mode=mem_mode, + ) + + # Add CPUs to the system. A cluster of CPUs typically have + # private L1 caches and a shared L2 cache. + system.cpu_cluster = devices.ArmCpuCluster( + system, + args.num_cores, + args.cpu_freq, + "1.2V", + *cpu_types[args.cpu], + tarmac_gen=args.tarmac_gen, + tarmac_dest=args.tarmac_dest, + ) + + # Create a cache hierarchy for the cluster. We are assuming that + # clusters have core-private L1 caches and an L2 that's shared + # within the cluster. + system.addCaches(want_caches, last_cache_level=2) # Tell components about the expected physical memory ranges. This # is, for example, used by the MemConfig helper to determine where @@ -160,6 +118,9 @@ def create(args): # Configure the off-chip memory system. MemConfig.config_mem(args, system) + # Wire up the system's memory system + system.connect() + # Parse the command line and get a list of Processes instances # that we can pass to gem5. processes = get_processes(args.commands_to_run) From 1a5dee0f0f9839f6c47f99d77a9fab823fd94fe4 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 13 Sep 2023 20:24:02 +0100 Subject: [PATCH 4/5] configs: Add an elastic-trace-generating CPU According to the original paper [1] the elastic trace generation process requires a cpu with a big number of entries in the ROB, LQ and SQ, so that there are no stalls due to resource limitation. At the moment these numbers are copy pasted from the CpuConfig.config_etrace method [2]. [1]: https://ieeexplore.ieee.org/document/7818336 [2]: https://github.com/gem5/gem5/blob/stable/\ configs/common/CpuConfig.py#L40 Change-Id: I00fde49e5420e420a4eddb7b49de4b74360348c9 Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper --- configs/common/cores/arm/O3_ARM_Etrace.py | 58 +++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 configs/common/cores/arm/O3_ARM_Etrace.py diff --git a/configs/common/cores/arm/O3_ARM_Etrace.py b/configs/common/cores/arm/O3_ARM_Etrace.py new file mode 100644 index 0000000000..20870a0b7a --- /dev/null +++ b/configs/common/cores/arm/O3_ARM_Etrace.py @@ -0,0 +1,58 @@ +# Copyright (c) 2012, 2017-2018, 2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import * +from .O3_ARM_v7a import O3_ARM_v7a_3 + +# O3_ARM_v7a_3 adapted to generate elastic traces +class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3): + # Make the number of entries in the ROB, LQ and SQ very + # large so that there are no stalls due to resource + # limitation as such stalls will get captured in the trace + # as compute delay. For replay, ROB, LQ and SQ sizes are + # modelled in the Trace CPU. + numROBEntries = 512 + LQEntries = 128 + SQEntries = 128 + + def attach_probe_listener(self, inst_trace_file, data_trace_file): + # Attach the elastic trace probe listener. Set the protobuf trace + # file names. Set the dependency window size equal to the cpu it + # is attached to. + self.traceListener = m5.objects.ElasticTrace( + instFetchTraceFile=inst_trace_file, + dataDepTraceFile=data_trace_file, + depWindowSize=3 * self.numROBEntries, + ) From 4c4615523f1367bef4ea143072f202c5c0b48b81 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Wed, 13 Sep 2023 20:27:58 +0100 Subject: [PATCH 5/5] configs: Add an example elastic-trace-generating script The new script will automatically use the newly defined O3_ARM_v7a_3_Etrace CPU to run a simple SE simulation while generating elastic trace files. The script is based on starter_se.py, but contains the following limitations: 1) No L2 cache as it might affect computational delay calculations 2) Supporting SimpleMemory only with minimal memory latency There restrictions were imported by the existing elastic trace generation logic in the common library (collected by grepping elastic_trace_en) [1][2][3] Example usage: build/ARM/gem5.opt configs/example/arm/etrace_se.py \ --inst-trace-file [INSTRUCTION TRACE] \ --data-trace-file [DATA TRACE] \ [WORKLOAD] [1]: https://github.com/gem5/gem5/blob/stable/\ configs/common/MemConfig.py#L191 [2]: https://github.com/gem5/gem5/blob/stable/\ configs/common/MemConfig.py#L232 [3]: https://github.com/gem5/gem5/blob/stable/\ configs/common/CacheConfig.py#L130 Change-Id: I021fc84fa101113c5c2f0737d50a930bb4750f76 Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper --- configs/example/arm/etrace_se.py | 191 +++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 configs/example/arm/etrace_se.py diff --git a/configs/example/arm/etrace_se.py b/configs/example/arm/etrace_se.py new file mode 100644 index 0000000000..8fa971ff84 --- /dev/null +++ b/configs/example/arm/etrace_se.py @@ -0,0 +1,191 @@ +# Copyright (c) 2016-2017, 2022-2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import os +import m5 +from m5.util import addToPath +from m5.objects import * +import argparse +import shlex + +m5.util.addToPath("../..") + +from common import ObjectList + +import devices + + +def get_processes(cmd): + """Interprets commands to run and returns a list of processes""" + + cwd = os.getcwd() + multiprocesses = [] + for idx, c in enumerate(cmd): + argv = shlex.split(c) + + process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0]) + process.gid = os.getgid() + + print("info: %d. command and arguments: %s" % (idx + 1, process.cmd)) + multiprocesses.append(process) + + return multiprocesses + + +def create(args): + """Create and configure the system object.""" + + system = devices.SimpleSeSystem( + mem_mode="timing", + ) + + # Add CPUs to the system. A cluster of CPUs typically have + # private L1 caches and a shared L2 cache. + system.cpu_cluster = devices.ArmCpuCluster( + system, + args.num_cores, + args.cpu_freq, + "1.2V", + ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"), + devices.L1I, + devices.L1D, + devices.L2, + ) + + # Attach the elastic trace probe listener to every CPU in the cluster + for cpu in system.cpu_cluster: + cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file) + + # As elastic trace generation is enabled, make sure the memory system is + # minimal so that compute delays do not include memory access latencies. + # Configure the compulsory L1 caches for the O3CPU, do not configure + # any more caches. + system.addCaches(True, last_cache_level=1) + + # For elastic trace, over-riding Simple Memory latency to 1ns." + system.memory = SimpleMemory( + range=AddrRange(start=0, size=args.mem_size), + latency="1ns", + port=system.membus.mem_side_ports, + ) + + # Parse the command line and get a list of Processes instances + # that we can pass to gem5. + processes = get_processes(args.commands_to_run) + if len(processes) != args.num_cores: + print( + "Error: Cannot map %d command(s) onto %d CPU(s)" + % (len(processes), args.num_cores) + ) + sys.exit(1) + + system.workload = SEWorkload.init_compatible(processes[0].executable) + + # Assign one workload to each CPU + for cpu, workload in zip(system.cpu_cluster.cpus, processes): + cpu.workload = workload + + return system + + +def main(): + parser = argparse.ArgumentParser(epilog=__doc__) + + parser.add_argument( + "commands_to_run", + metavar="command(s)", + nargs="+", + help="Command(s) to run", + ) + parser.add_argument( + "--inst-trace-file", + action="store", + type=str, + help="""Instruction fetch trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", + default="fetchtrace.proto.gz", + ) + parser.add_argument( + "--data-trace-file", + action="store", + type=str, + help="""Data dependency trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", + default="deptrace.proto.gz", + ) + parser.add_argument("--cpu-freq", type=str, default="4GHz") + parser.add_argument( + "--num-cores", type=int, default=1, help="Number of CPU cores" + ) + parser.add_argument( + "--mem-size", + action="store", + type=str, + default="2GB", + help="Specify the physical memory size", + ) + + args = parser.parse_args() + + # Create a single root node for gem5's object hierarchy. There can + # only exist one root node in the simulator at any given + # time. Tell gem5 that we want to use syscall emulation mode + # instead of full system mode. + root = Root(full_system=False) + + # Populate the root node with a system. A system corresponds to a + # single node with shared memory. + root.system = create(args) + + # Instantiate the C++ object hierarchy. After this point, + # SimObjects can't be instantiated anymore. + m5.instantiate() + + # Start the simulator. This gives control to the C++ world and + # starts the simulator. The returned event tells the simulation + # script why the simulator exited. + event = m5.simulate() + + # Print the reason for the simulation exit. Some exit codes are + # requests for service (e.g., checkpoints) from the simulation + # script. We'll just ignore them here and exit. + print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}") + + +if __name__ == "__m5_main__": + main()