diff --git a/configs/common/cores/arm/O3_ARM_Etrace.py b/configs/common/cores/arm/O3_ARM_Etrace.py new file mode 100644 index 0000000000..20870a0b7a --- /dev/null +++ b/configs/common/cores/arm/O3_ARM_Etrace.py @@ -0,0 +1,58 @@ +# Copyright (c) 2012, 2017-2018, 2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import * +from .O3_ARM_v7a import O3_ARM_v7a_3 + +# O3_ARM_v7a_3 adapted to generate elastic traces +class O3_ARM_v7a_3_Etrace(O3_ARM_v7a_3): + # Make the number of entries in the ROB, LQ and SQ very + # large so that there are no stalls due to resource + # limitation as such stalls will get captured in the trace + # as compute delay. For replay, ROB, LQ and SQ sizes are + # modelled in the Trace CPU. + numROBEntries = 512 + LQEntries = 128 + SQEntries = 128 + + def attach_probe_listener(self, inst_trace_file, data_trace_file): + # Attach the elastic trace probe listener. Set the protobuf trace + # file names. Set the dependency window size equal to the cpu it + # is attached to. + self.traceListener = m5.objects.ElasticTrace( + instFetchTraceFile=inst_trace_file, + dataDepTraceFile=data_trace_file, + depWindowSize=3 * self.numROBEntries, + ) diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py index 6c6474ca2b..7ceb3cd3bf 100644 --- a/configs/example/arm/devices.py +++ b/configs/example/arm/devices.py @@ -338,56 +338,15 @@ class FastmodelCluster(CpuCluster): pass -class BaseSimpleSystem(ArmSystem): - cache_line_size = 64 - - def __init__(self, mem_size, platform, **kwargs): - super(BaseSimpleSystem, self).__init__(**kwargs) - - self.voltage_domain = VoltageDomain(voltage="1.0V") - self.clk_domain = SrcClockDomain( - clock="1GHz", voltage_domain=Parent.voltage_domain - ) - - if platform is None: - self.realview = VExpress_GEM5_V1() - else: - self.realview = platform - - if hasattr(self.realview.gic, "cpu_addr"): - self.gic_cpu_addr = self.realview.gic.cpu_addr - - self.terminal = Terminal() - self.vncserver = VncServer() - - self.iobus = IOXBar() - # Device DMA -> MEM - self.mem_ranges = self.getMemRanges(int(Addr(mem_size))) +class ClusterSystem: + """ + Base class providing cpu clusters generation/handling methods to + SE/FS systems + """ + def __init__(self, **kwargs): self._clusters = [] - def getMemRanges(self, mem_size): - """ - Define system memory ranges. This depends on the physical - memory map provided by the realview platform and by the memory - size provided by the user (mem_size argument). - The method is iterating over all platform ranges until they cover - the entire user's memory requirements. - """ - mem_ranges = [] - for mem_range in self.realview._mem_regions: - size_in_range = min(mem_size, mem_range.size()) - - mem_ranges.append( - AddrRange(start=mem_range.start, size=size_in_range) - ) - - mem_size -= size_in_range - if mem_size == 0: - return mem_ranges - - raise ValueError("memory size too big for platform capabilities") - def numCpuClusters(self): return len(self._clusters) @@ -423,6 +382,80 @@ class BaseSimpleSystem(ArmSystem): cluster.connectMemSide(cluster_mem_bus) +class SimpleSeSystem(System, ClusterSystem): + """ + Example system class for syscall emulation mode + """ + + # Use a fixed cache line size of 64 bytes + cache_line_size = 64 + + def __init__(self, **kwargs): + System.__init__(self, **kwargs) + ClusterSystem.__init__(self, **kwargs) + # Create a voltage and clock domain for system components + self.voltage_domain = VoltageDomain(voltage="3.3V") + self.clk_domain = SrcClockDomain( + clock="1GHz", voltage_domain=self.voltage_domain + ) + + # Create the off-chip memory bus. + self.membus = SystemXBar() + + def connect(self): + self.system_port = self.membus.cpu_side_ports + + +class BaseSimpleSystem(ArmSystem, ClusterSystem): + cache_line_size = 64 + + def __init__(self, mem_size, platform, **kwargs): + ArmSystem.__init__(self, **kwargs) + ClusterSystem.__init__(self, **kwargs) + + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock="1GHz", voltage_domain=Parent.voltage_domain + ) + + if platform is None: + self.realview = VExpress_GEM5_V1() + else: + self.realview = platform + + if hasattr(self.realview.gic, "cpu_addr"): + self.gic_cpu_addr = self.realview.gic.cpu_addr + + self.terminal = Terminal() + self.vncserver = VncServer() + + self.iobus = IOXBar() + # Device DMA -> MEM + self.mem_ranges = self.getMemRanges(int(Addr(mem_size))) + + def getMemRanges(self, mem_size): + """ + Define system memory ranges. This depends on the physical + memory map provided by the realview platform and by the memory + size provided by the user (mem_size argument). + The method is iterating over all platform ranges until they cover + the entire user's memory requirements. + """ + mem_ranges = [] + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + + mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + return mem_ranges + + raise ValueError("memory size too big for platform capabilities") + + class SimpleSystem(BaseSimpleSystem): """ Meant to be used with the classic memory model diff --git a/configs/example/arm/etrace_se.py b/configs/example/arm/etrace_se.py new file mode 100644 index 0000000000..8fa971ff84 --- /dev/null +++ b/configs/example/arm/etrace_se.py @@ -0,0 +1,191 @@ +# Copyright (c) 2016-2017, 2022-2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import os +import m5 +from m5.util import addToPath +from m5.objects import * +import argparse +import shlex + +m5.util.addToPath("../..") + +from common import ObjectList + +import devices + + +def get_processes(cmd): + """Interprets commands to run and returns a list of processes""" + + cwd = os.getcwd() + multiprocesses = [] + for idx, c in enumerate(cmd): + argv = shlex.split(c) + + process = Process(pid=100 + idx, cwd=cwd, cmd=argv, executable=argv[0]) + process.gid = os.getgid() + + print("info: %d. command and arguments: %s" % (idx + 1, process.cmd)) + multiprocesses.append(process) + + return multiprocesses + + +def create(args): + """Create and configure the system object.""" + + system = devices.SimpleSeSystem( + mem_mode="timing", + ) + + # Add CPUs to the system. A cluster of CPUs typically have + # private L1 caches and a shared L2 cache. + system.cpu_cluster = devices.ArmCpuCluster( + system, + args.num_cores, + args.cpu_freq, + "1.2V", + ObjectList.cpu_list.get("O3_ARM_v7a_3_Etrace"), + devices.L1I, + devices.L1D, + devices.L2, + ) + + # Attach the elastic trace probe listener to every CPU in the cluster + for cpu in system.cpu_cluster: + cpu.attach_probe_listener(args.inst_trace_file, args.data_trace_file) + + # As elastic trace generation is enabled, make sure the memory system is + # minimal so that compute delays do not include memory access latencies. + # Configure the compulsory L1 caches for the O3CPU, do not configure + # any more caches. + system.addCaches(True, last_cache_level=1) + + # For elastic trace, over-riding Simple Memory latency to 1ns." + system.memory = SimpleMemory( + range=AddrRange(start=0, size=args.mem_size), + latency="1ns", + port=system.membus.mem_side_ports, + ) + + # Parse the command line and get a list of Processes instances + # that we can pass to gem5. + processes = get_processes(args.commands_to_run) + if len(processes) != args.num_cores: + print( + "Error: Cannot map %d command(s) onto %d CPU(s)" + % (len(processes), args.num_cores) + ) + sys.exit(1) + + system.workload = SEWorkload.init_compatible(processes[0].executable) + + # Assign one workload to each CPU + for cpu, workload in zip(system.cpu_cluster.cpus, processes): + cpu.workload = workload + + return system + + +def main(): + parser = argparse.ArgumentParser(epilog=__doc__) + + parser.add_argument( + "commands_to_run", + metavar="command(s)", + nargs="+", + help="Command(s) to run", + ) + parser.add_argument( + "--inst-trace-file", + action="store", + type=str, + help="""Instruction fetch trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", + default="fetchtrace.proto.gz", + ) + parser.add_argument( + "--data-trace-file", + action="store", + type=str, + help="""Data dependency trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", + default="deptrace.proto.gz", + ) + parser.add_argument("--cpu-freq", type=str, default="4GHz") + parser.add_argument( + "--num-cores", type=int, default=1, help="Number of CPU cores" + ) + parser.add_argument( + "--mem-size", + action="store", + type=str, + default="2GB", + help="Specify the physical memory size", + ) + + args = parser.parse_args() + + # Create a single root node for gem5's object hierarchy. There can + # only exist one root node in the simulator at any given + # time. Tell gem5 that we want to use syscall emulation mode + # instead of full system mode. + root = Root(full_system=False) + + # Populate the root node with a system. A system corresponds to a + # single node with shared memory. + root.system = create(args) + + # Instantiate the C++ object hierarchy. After this point, + # SimObjects can't be instantiated anymore. + m5.instantiate() + + # Start the simulator. This gives control to the C++ world and + # starts the simulator. The returned event tells the simulation + # script why the simulator exited. + event = m5.simulate() + + # Print the reason for the simulation exit. Some exit codes are + # requests for service (e.g., checkpoints) from the simulation + # script. We'll just ignore them here and exit. + print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}") + + +if __name__ == "__m5_main__": + main() diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py index f21f399675..9834487155 100644 --- a/configs/example/arm/starter_se.py +++ b/configs/example/arm/starter_se.py @@ -64,72 +64,6 @@ cpu_types = { } -class SimpleSeSystem(System): - """ - Example system class for syscall emulation mode - """ - - # Use a fixed cache line size of 64 bytes - cache_line_size = 64 - - def __init__(self, args, **kwargs): - super(SimpleSeSystem, self).__init__(**kwargs) - - # Setup book keeping to be able to use CpuClusters from the - # devices module. - self._clusters = [] - self._num_cpus = 0 - - # Create a voltage and clock domain for system components - self.voltage_domain = VoltageDomain(voltage="3.3V") - self.clk_domain = SrcClockDomain( - clock="1GHz", voltage_domain=self.voltage_domain - ) - - # Create the off-chip memory bus. - self.membus = SystemXBar() - - # Wire up the system port that gem5 uses to load the kernel - # and to perform debug accesses. - self.system_port = self.membus.cpu_side_ports - - # Add CPUs to the system. A cluster of CPUs typically have - # private L1 caches and a shared L2 cache. - self.cpu_cluster = devices.ArmCpuCluster( - self, - args.num_cores, - args.cpu_freq, - "1.2V", - *cpu_types[args.cpu], - tarmac_gen=args.tarmac_gen, - tarmac_dest=args.tarmac_dest, - ) - - # Create a cache hierarchy (unless we are simulating a - # functional CPU in atomic memory mode) for the CPU cluster - # and connect it to the shared memory bus. - if self.cpu_cluster.memory_mode() == "timing": - self.cpu_cluster.addL1() - self.cpu_cluster.addL2(self.cpu_cluster.clk_domain) - self.cpu_cluster.connectMemSide(self.membus) - - # Tell gem5 about the memory mode used by the CPUs we are - # simulating. - self.mem_mode = self.cpu_cluster.memory_mode() - - def numCpuClusters(self): - return len(self._clusters) - - def addCpuCluster(self, cpu_cluster): - assert cpu_cluster not in self._clusters - assert len(cpu_cluster) > 0 - self._clusters.append(cpu_cluster) - self._num_cpus += len(cpu_cluster) - - def numCpus(self): - return self._num_cpus - - def get_processes(cmd): """Interprets commands to run and returns a list of processes""" @@ -150,7 +84,31 @@ def get_processes(cmd): def create(args): """Create and configure the system object.""" - system = SimpleSeSystem(args) + cpu_class = cpu_types[args.cpu][0] + mem_mode = cpu_class.memory_mode() + # Only simulate caches when using a timing CPU (e.g., the HPI model) + want_caches = True if mem_mode == "timing" else False + + system = devices.SimpleSeSystem( + mem_mode=mem_mode, + ) + + # Add CPUs to the system. A cluster of CPUs typically have + # private L1 caches and a shared L2 cache. + system.cpu_cluster = devices.ArmCpuCluster( + system, + args.num_cores, + args.cpu_freq, + "1.2V", + *cpu_types[args.cpu], + tarmac_gen=args.tarmac_gen, + tarmac_dest=args.tarmac_dest, + ) + + # Create a cache hierarchy for the cluster. We are assuming that + # clusters have core-private L1 caches and an L2 that's shared + # within the cluster. + system.addCaches(want_caches, last_cache_level=2) # Tell components about the expected physical memory ranges. This # is, for example, used by the MemConfig helper to determine where @@ -160,6 +118,9 @@ def create(args): # Configure the off-chip memory system. MemConfig.config_mem(args, system) + # Wire up the system's memory system + system.connect() + # Parse the command line and get a list of Processes instances # that we can pass to gem5. processes = get_processes(args.commands_to_run)