cpu, configs: Fix TraceCPU after multi-ISA addition (#302)

This PR fixes #301
This commit is contained in:
Bobby R. Bruce
2023-09-12 17:26:27 -07:00
committed by GitHub
4 changed files with 96 additions and 72 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2015 ARM Limited
# Copyright (c) 2015, 2023 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -43,10 +43,42 @@ addToPath("../")
from common import Options
from common import Simulation
from common import CacheConfig
from common import MemConfig
from common.Caches import *
def config_cache(args, system):
"""
Configure the cache hierarchy. Only two configurations are natively
supported as an example: L1(I/D) only or L1 + L2.
"""
from common.CacheConfig import _get_cache_opts
system.l1i = L1_ICache(**_get_cache_opts("l1i", args))
system.l1d = L1_DCache(**_get_cache_opts("l1d", args))
system.cpu.dcache_port = system.l1d.cpu_side
system.cpu.icache_port = system.l1i.cpu_side
if args.l2cache:
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the
# same clock as the CPUs.
system.l2 = L2Cache(
clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", args)
)
system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
system.l2.cpu_side = system.tol2bus.mem_side_ports
system.l2.mem_side = system.membus.cpu_side_ports
system.l1i.mem_side = system.tol2bus.cpu_side_ports
system.l1d.mem_side = system.tol2bus.cpu_side_ports
else:
system.l1i.mem_side = system.membus.cpu_side_ports
system.l1d.mem_side = system.membus.cpu_side_ports
parser = argparse.ArgumentParser()
Options.addCommonOptions(parser)
@@ -59,29 +91,18 @@ if "--ruby" in sys.argv:
args = parser.parse_args()
numThreads = 1
if args.cpu_type != "TraceCPU":
fatal(
"This is a script for elastic trace replay simulation, use "
"--cpu-type=TraceCPU\n"
)
if args.num_cpus > 1:
fatal("This script does not support multi-processor trace replay.\n")
# In this case FutureClass will be None as there is not fast forwarding or
# switching
(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
CPUClass.numThreads = numThreads
system = System(
cpu=CPUClass(cpu_id=0),
mem_mode=test_mem_mode,
mem_mode=TraceCPU.memory_mode(),
mem_ranges=[AddrRange(args.mem_size)],
cache_line_size=args.cacheline_size,
)
# Generate the TraceCPU
system.cpu = TraceCPU()
# Create a top-level voltage domain
system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
@@ -105,11 +126,6 @@ system.cpu_clk_domain = SrcClockDomain(
for cpu in system.cpu:
cpu.clk_domain = system.cpu_clk_domain
# BaseCPU no longer has default values for the BaseCPU.isa
# createThreads() is needed to fill in the cpu.isa
for cpu in system.cpu:
cpu.createThreads()
# Assign input trace files to the Trace CPU
system.cpu.instTraceFile = args.inst_trace_file
system.cpu.dataTraceFile = args.data_trace_file
@@ -118,8 +134,11 @@ system.cpu.dataTraceFile = args.data_trace_file
MemClass = Simulation.setMemClass(args)
system.membus = SystemXBar()
system.system_port = system.membus.cpu_side_ports
CacheConfig.config_cache(args, system)
# Configure the classic cache hierarchy
config_cache(args, system)
MemConfig.config_mem(args, system)
root = Root(full_system=False, system=system)
Simulation.run(args, root, system, FutureClass)
Simulation.run(args, root, system, None)

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2013 - 2016 ARM Limited
# Copyright (c) 2013 - 2016, 2023 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -34,10 +34,11 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
from m5.objects.BaseCPU import BaseCPU
from m5.proxy import *
from m5.objects.ClockedObject import ClockedObject
class TraceCPU(BaseCPU):
class TraceCPU(ClockedObject):
"""Trace CPU model which replays traces generated in a prior simulation
using DerivO3CPU or its derived classes. It interfaces with L1 caches.
"""
@@ -54,13 +55,10 @@ class TraceCPU(BaseCPU):
def require_caches(cls):
return True
def addPMU(self, pmu=None):
pass
@classmethod
def support_take_over(cls):
return True
system = Param.System(Parent.any, "system object")
icache_port = RequestPort("Instruction Port")
dcache_port = RequestPort("Data Port")
instTraceFile = Param.String("", "Instruction trace file")
dataTraceFile = Param.String("", "Data dependency trace file")
sizeStoreBuffer = Param.Unsigned(

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 - 2016 ARM Limited
* Copyright (c) 2013 - 2016, 2023 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -39,6 +39,7 @@
#include "base/compiler.hh"
#include "sim/sim_exit.hh"
#include "sim/system.hh"
namespace gem5
{
@@ -47,7 +48,8 @@ namespace gem5
int TraceCPU::numTraceCPUs = 0;
TraceCPU::TraceCPU(const TraceCPUParams &params)
: BaseCPU(params),
: ClockedObject(params),
cacheLineSize(params.system->cacheLineSize()),
icachePort(this),
dcachePort(this),
instRequestorID(params.system->getRequestorId(this, "inst")),
@@ -93,14 +95,6 @@ TraceCPU::updateNumOps(uint64_t rob_num)
}
}
void
TraceCPU::takeOverFrom(BaseCPU *oldCPU)
{
// Unbind the ports of the old CPU and bind the ports of the TraceCPU.
getInstPort().takeOverFrom(&oldCPU->getInstPort());
getDataPort().takeOverFrom(&oldCPU->getDataPort());
}
void
TraceCPU::init()
{
@@ -109,7 +103,7 @@ TraceCPU::init()
DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
dataTraceFile);
BaseCPU::init();
ClockedObject::init();
// Get the send tick of the first instruction read request
Tick first_icache_tick = icacheGen.init();
@@ -176,7 +170,7 @@ TraceCPU::schedDcacheNext()
DPRINTF(TraceCPUData, "DcacheGen event.\n");
// Update stat for numCycles
baseStats.numCycles = clockEdge() / clockPeriod();
traceStats.numCycles = clockEdge() / clockPeriod();
dcacheGen.execute();
if (dcacheGen.isExecComplete()) {
@@ -216,7 +210,7 @@ TraceCPU::checkAndSchedExitEvent()
ADD_STAT(cpi, statistics::units::Rate<
statistics::units::Cycle, statistics::units::Count>::get(),
"Cycles per micro-op used as a proxy for CPI",
trace->baseStats.numCycles / numOps)
trace->traceStats.numCycles / numOps)
{
cpi.precision(6);
}
@@ -591,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
// stat counting this is useful to keep a check on how frequently this
// happens. If required the code could be revised to mimick splitting such
// a request into two.
unsigned blk_size = owner.cacheLineSize();
unsigned blk_size = owner.cacheLineSize;
Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
if (!(blk_offset + node_ptr->size <= blk_size)) {
node_ptr->size = blk_size - blk_offset;
@@ -1152,6 +1146,20 @@ TraceCPU::schedDcacheNextEvent(Tick when)
}
Port &
TraceCPU::getPort(const std::string &if_name, PortID idx)
{
// Get the right port based on name. This applies to all the
// subclasses of the base CPU and relies on their implementation
// of getDataPort and getInstPort.
if (if_name == "dcache_port")
return getDataPort();
else if (if_name == "icache_port")
return getInstPort();
else
return ClockedObject::getPort(if_name, idx);
}
bool
TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 - 2016 ARM Limited
* Copyright (c) 2013 - 2016, 2023 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -45,13 +45,16 @@
#include <unordered_map>
#include "base/statistics.hh"
#include "cpu/base.hh"
#include "debug/TraceCPUData.hh"
#include "debug/TraceCPUInst.hh"
#include "mem/packet.hh"
#include "mem/port.hh"
#include "mem/request.hh"
#include "params/TraceCPU.hh"
#include "proto/inst_dep_record.pb.h"
#include "proto/packet.pb.h"
#include "proto/protoio.hh"
#include "sim/clocked_object.hh"
#include "sim/sim_events.hh"
namespace gem5
@@ -66,8 +69,7 @@ namespace gem5
* simulation compared to the detailed cpu model and good correlation when the
* same trace is used for playback on different memory sub-systems.
*
* The TraceCPU inherits from BaseCPU so some virtual methods need to be
* defined. It has two port subclasses inherited from RequestPort for
* The TraceCPU has two port subclasses inherited from RequestPort for
* instruction and data ports. It issues the memory requests deducing the
* timing from the trace and without performing real execution of micro-ops. As
* soon as the last dependency for an instruction is complete, its
@@ -139,7 +141,7 @@ namespace gem5
* exit.
*/
class TraceCPU : public BaseCPU
class TraceCPU : public ClockedObject
{
public:
@@ -147,15 +149,6 @@ class TraceCPU : public BaseCPU
void init();
/**
* This is a pure virtual function in BaseCPU. As we don't know how many
* insts are in the trace but only know how how many micro-ops are we
* cannot count this stat.
*
* @return 0
*/
Counter totalInsts() const { return 0; }
/**
* Return totalOps as the number of committed micro-ops plus the
* speculatively issued loads that are modelled in the TraceCPU replay.
@@ -170,17 +163,6 @@ class TraceCPU : public BaseCPU
*/
void updateNumOps(uint64_t rob_num);
/* Pure virtual function in BaseCPU. Do nothing. */
void wakeup(ThreadID tid=0) { return; }
/*
* When resuming from checkpoint in FS mode, the TraceCPU takes over from
* the old cpu. This function overrides the takeOverFrom() function in the
* BaseCPU. It unbinds the ports of the old CPU and binds the ports of the
* TraceCPU.
*/
void takeOverFrom(BaseCPU *oldCPU);
/**
* When instruction cache port receives a retry, schedule event
* icacheNextEvent.
@@ -303,6 +285,9 @@ class TraceCPU : public BaseCPU
TraceCPU* owner;
};
/** Cache the cache line size that we get from the system */
const unsigned int cacheLineSize;
/** Port to connect to L1 instruction cache. */
IcachePort icachePort;
@@ -1112,6 +1097,8 @@ class TraceCPU : public BaseCPU
/** Stat for number of simulated micro-ops. */
statistics::Scalar numOps;
/** Number of CPU cycles simulated */
statistics::Scalar numCycles;
/** Stat for the CPI. This is really cycles per
* micro-op and not inst. */
statistics::Formula cpi;
@@ -1125,6 +1112,18 @@ class TraceCPU : public BaseCPU
/** Used to get a reference to the dcache port. */
Port &getDataPort() { return dcachePort; }
/**
* Get a port on this CPU. All CPUs have a data and
* instruction port, and this method uses getDataPort and
* getInstPort of the subclasses to resolve the two ports.
*
* @param if_name the port name
* @param idx ignored index
*
* @return a reference to the port with the given name
*/
Port &getPort(const std::string &if_name,
PortID idx=InvalidPortID) override;
};
} // namespace gem5