diff --git a/configs/example/etrace_replay.py b/configs/example/etrace_replay.py index ddbf01acf5..184934fab9 100644 --- a/configs/example/etrace_replay.py +++ b/configs/example/etrace_replay.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015 ARM Limited +# Copyright (c) 2015, 2023 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -43,10 +43,42 @@ addToPath("../") from common import Options from common import Simulation -from common import CacheConfig from common import MemConfig from common.Caches import * + +def config_cache(args, system): + """ + Configure the cache hierarchy. Only two configurations are natively + supported as an example: L1(I/D) only or L1 + L2. + """ + from common.CacheConfig import _get_cache_opts + + system.l1i = L1_ICache(**_get_cache_opts("l1i", args)) + system.l1d = L1_DCache(**_get_cache_opts("l1d", args)) + + system.cpu.dcache_port = system.l1d.cpu_side + system.cpu.icache_port = system.l1i.cpu_side + + if args.l2cache: + # Provide a clock for the L2 and the L1-to-L2 bus here as they + # are not connected using addTwoLevelCacheHierarchy. Use the + # same clock as the CPUs. + system.l2 = L2Cache( + clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", args) + ) + + system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain) + system.l2.cpu_side = system.tol2bus.mem_side_ports + system.l2.mem_side = system.membus.cpu_side_ports + + system.l1i.mem_side = system.tol2bus.cpu_side_ports + system.l1d.mem_side = system.tol2bus.cpu_side_ports + else: + system.l1i.mem_side = system.membus.cpu_side_ports + system.l1d.mem_side = system.membus.cpu_side_ports + + parser = argparse.ArgumentParser() Options.addCommonOptions(parser) @@ -59,29 +91,18 @@ if "--ruby" in sys.argv: args = parser.parse_args() -numThreads = 1 - -if args.cpu_type != "TraceCPU": - fatal( - "This is a script for elastic trace replay simulation, use " - "--cpu-type=TraceCPU\n" - ) - if args.num_cpus > 1: fatal("This script does not support multi-processor trace replay.\n") -# In this case FutureClass will be None as there is not fast forwarding or -# switching -(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args) -CPUClass.numThreads = numThreads - system = System( - cpu=CPUClass(cpu_id=0), - mem_mode=test_mem_mode, + mem_mode=TraceCPU.memory_mode(), mem_ranges=[AddrRange(args.mem_size)], cache_line_size=args.cacheline_size, ) +# Generate the TraceCPU +system.cpu = TraceCPU() + # Create a top-level voltage domain system.voltage_domain = VoltageDomain(voltage=args.sys_voltage) @@ -105,11 +126,6 @@ system.cpu_clk_domain = SrcClockDomain( for cpu in system.cpu: cpu.clk_domain = system.cpu_clk_domain -# BaseCPU no longer has default values for the BaseCPU.isa -# createThreads() is needed to fill in the cpu.isa -for cpu in system.cpu: - cpu.createThreads() - # Assign input trace files to the Trace CPU system.cpu.instTraceFile = args.inst_trace_file system.cpu.dataTraceFile = args.data_trace_file @@ -118,8 +134,11 @@ system.cpu.dataTraceFile = args.data_trace_file MemClass = Simulation.setMemClass(args) system.membus = SystemXBar() system.system_port = system.membus.cpu_side_ports -CacheConfig.config_cache(args, system) + +# Configure the classic cache hierarchy +config_cache(args, system) + MemConfig.config_mem(args, system) root = Root(full_system=False, system=system) -Simulation.run(args, root, system, FutureClass) +Simulation.run(args, root, system, None) diff --git a/src/cpu/trace/TraceCPU.py b/src/cpu/trace/TraceCPU.py index 1be16518d7..5e82fd9f9f 100644 --- a/src/cpu/trace/TraceCPU.py +++ b/src/cpu/trace/TraceCPU.py @@ -1,4 +1,4 @@ -# Copyright (c) 2013 - 2016 ARM Limited +# Copyright (c) 2013 - 2016, 2023 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -34,10 +34,11 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.params import * -from m5.objects.BaseCPU import BaseCPU +from m5.proxy import * +from m5.objects.ClockedObject import ClockedObject -class TraceCPU(BaseCPU): +class TraceCPU(ClockedObject): """Trace CPU model which replays traces generated in a prior simulation using DerivO3CPU or its derived classes. It interfaces with L1 caches. """ @@ -54,13 +55,10 @@ class TraceCPU(BaseCPU): def require_caches(cls): return True - def addPMU(self, pmu=None): - pass - - @classmethod - def support_take_over(cls): - return True + system = Param.System(Parent.any, "system object") + icache_port = RequestPort("Instruction Port") + dcache_port = RequestPort("Data Port") instTraceFile = Param.String("", "Instruction trace file") dataTraceFile = Param.String("", "Data dependency trace file") sizeStoreBuffer = Param.Unsigned( diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc index 7399cf6199..d5c6b4fb3c 100644 --- a/src/cpu/trace/trace_cpu.cc +++ b/src/cpu/trace/trace_cpu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 - 2016 ARM Limited + * Copyright (c) 2013 - 2016, 2023 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -39,6 +39,7 @@ #include "base/compiler.hh" #include "sim/sim_exit.hh" +#include "sim/system.hh" namespace gem5 { @@ -47,7 +48,8 @@ namespace gem5 int TraceCPU::numTraceCPUs = 0; TraceCPU::TraceCPU(const TraceCPUParams ¶ms) - : BaseCPU(params), + : ClockedObject(params), + cacheLineSize(params.system->cacheLineSize()), icachePort(this), dcachePort(this), instRequestorID(params.system->getRequestorId(this, "inst")), @@ -93,14 +95,6 @@ TraceCPU::updateNumOps(uint64_t rob_num) } } -void -TraceCPU::takeOverFrom(BaseCPU *oldCPU) -{ - // Unbind the ports of the old CPU and bind the ports of the TraceCPU. - getInstPort().takeOverFrom(&oldCPU->getInstPort()); - getDataPort().takeOverFrom(&oldCPU->getDataPort()); -} - void TraceCPU::init() { @@ -109,7 +103,7 @@ TraceCPU::init() DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n", dataTraceFile); - BaseCPU::init(); + ClockedObject::init(); // Get the send tick of the first instruction read request Tick first_icache_tick = icacheGen.init(); @@ -176,7 +170,7 @@ TraceCPU::schedDcacheNext() DPRINTF(TraceCPUData, "DcacheGen event.\n"); // Update stat for numCycles - baseStats.numCycles = clockEdge() / clockPeriod(); + traceStats.numCycles = clockEdge() / clockPeriod(); dcacheGen.execute(); if (dcacheGen.isExecComplete()) { @@ -216,7 +210,7 @@ TraceCPU::checkAndSchedExitEvent() ADD_STAT(cpi, statistics::units::Rate< statistics::units::Cycle, statistics::units::Count>::get(), "Cycles per micro-op used as a proxy for CPI", - trace->baseStats.numCycles / numOps) + trace->traceStats.numCycles / numOps) { cpi.precision(6); } @@ -591,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) // stat counting this is useful to keep a check on how frequently this // happens. If required the code could be revised to mimick splitting such // a request into two. - unsigned blk_size = owner.cacheLineSize(); + unsigned blk_size = owner.cacheLineSize; Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); if (!(blk_offset + node_ptr->size <= blk_size)) { node_ptr->size = blk_size - blk_offset; @@ -1152,6 +1146,20 @@ TraceCPU::schedDcacheNextEvent(Tick when) } +Port & +TraceCPU::getPort(const std::string &if_name, PortID idx) +{ + // Get the right port based on name. This applies to all the + // subclasses of the base CPU and relies on their implementation + // of getDataPort and getInstPort. + if (if_name == "dcache_port") + return getDataPort(); + else if (if_name == "icache_port") + return getInstPort(); + else + return ClockedObject::getPort(if_name, idx); +} + bool TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt) { diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh index 87f820fe6d..d54b3c4227 100644 --- a/src/cpu/trace/trace_cpu.hh +++ b/src/cpu/trace/trace_cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 - 2016 ARM Limited + * Copyright (c) 2013 - 2016, 2023 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -45,13 +45,16 @@ #include #include "base/statistics.hh" -#include "cpu/base.hh" #include "debug/TraceCPUData.hh" #include "debug/TraceCPUInst.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "mem/request.hh" #include "params/TraceCPU.hh" #include "proto/inst_dep_record.pb.h" #include "proto/packet.pb.h" #include "proto/protoio.hh" +#include "sim/clocked_object.hh" #include "sim/sim_events.hh" namespace gem5 @@ -66,8 +69,7 @@ namespace gem5 * simulation compared to the detailed cpu model and good correlation when the * same trace is used for playback on different memory sub-systems. * - * The TraceCPU inherits from BaseCPU so some virtual methods need to be - * defined. It has two port subclasses inherited from RequestPort for + * The TraceCPU has two port subclasses inherited from RequestPort for * instruction and data ports. It issues the memory requests deducing the * timing from the trace and without performing real execution of micro-ops. As * soon as the last dependency for an instruction is complete, its @@ -139,7 +141,7 @@ namespace gem5 * exit. */ -class TraceCPU : public BaseCPU +class TraceCPU : public ClockedObject { public: @@ -147,15 +149,6 @@ class TraceCPU : public BaseCPU void init(); - /** - * This is a pure virtual function in BaseCPU. As we don't know how many - * insts are in the trace but only know how how many micro-ops are we - * cannot count this stat. - * - * @return 0 - */ - Counter totalInsts() const { return 0; } - /** * Return totalOps as the number of committed micro-ops plus the * speculatively issued loads that are modelled in the TraceCPU replay. @@ -170,17 +163,6 @@ class TraceCPU : public BaseCPU */ void updateNumOps(uint64_t rob_num); - /* Pure virtual function in BaseCPU. Do nothing. */ - void wakeup(ThreadID tid=0) { return; } - - /* - * When resuming from checkpoint in FS mode, the TraceCPU takes over from - * the old cpu. This function overrides the takeOverFrom() function in the - * BaseCPU. It unbinds the ports of the old CPU and binds the ports of the - * TraceCPU. - */ - void takeOverFrom(BaseCPU *oldCPU); - /** * When instruction cache port receives a retry, schedule event * icacheNextEvent. @@ -303,6 +285,9 @@ class TraceCPU : public BaseCPU TraceCPU* owner; }; + /** Cache the cache line size that we get from the system */ + const unsigned int cacheLineSize; + /** Port to connect to L1 instruction cache. */ IcachePort icachePort; @@ -1112,6 +1097,8 @@ class TraceCPU : public BaseCPU /** Stat for number of simulated micro-ops. */ statistics::Scalar numOps; + /** Number of CPU cycles simulated */ + statistics::Scalar numCycles; /** Stat for the CPI. This is really cycles per * micro-op and not inst. */ statistics::Formula cpi; @@ -1125,6 +1112,18 @@ class TraceCPU : public BaseCPU /** Used to get a reference to the dcache port. */ Port &getDataPort() { return dcachePort; } + /** + * Get a port on this CPU. All CPUs have a data and + * instruction port, and this method uses getDataPort and + * getInstPort of the subclasses to resolve the two ports. + * + * @param if_name the port name + * @param idx ignored index + * + * @return a reference to the port with the given name + */ + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; }; } // namespace gem5