cpu, configs: Fix TraceCPU after multi-ISA addition (#302)
This PR fixes #301
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2015 ARM Limited
|
||||
# Copyright (c) 2015, 2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -43,10 +43,42 @@ addToPath("../")
|
||||
|
||||
from common import Options
|
||||
from common import Simulation
|
||||
from common import CacheConfig
|
||||
from common import MemConfig
|
||||
from common.Caches import *
|
||||
|
||||
|
||||
def config_cache(args, system):
|
||||
"""
|
||||
Configure the cache hierarchy. Only two configurations are natively
|
||||
supported as an example: L1(I/D) only or L1 + L2.
|
||||
"""
|
||||
from common.CacheConfig import _get_cache_opts
|
||||
|
||||
system.l1i = L1_ICache(**_get_cache_opts("l1i", args))
|
||||
system.l1d = L1_DCache(**_get_cache_opts("l1d", args))
|
||||
|
||||
system.cpu.dcache_port = system.l1d.cpu_side
|
||||
system.cpu.icache_port = system.l1i.cpu_side
|
||||
|
||||
if args.l2cache:
|
||||
# Provide a clock for the L2 and the L1-to-L2 bus here as they
|
||||
# are not connected using addTwoLevelCacheHierarchy. Use the
|
||||
# same clock as the CPUs.
|
||||
system.l2 = L2Cache(
|
||||
clk_domain=system.cpu_clk_domain, **_get_cache_opts("l2", args)
|
||||
)
|
||||
|
||||
system.tol2bus = L2XBar(clk_domain=system.cpu_clk_domain)
|
||||
system.l2.cpu_side = system.tol2bus.mem_side_ports
|
||||
system.l2.mem_side = system.membus.cpu_side_ports
|
||||
|
||||
system.l1i.mem_side = system.tol2bus.cpu_side_ports
|
||||
system.l1d.mem_side = system.tol2bus.cpu_side_ports
|
||||
else:
|
||||
system.l1i.mem_side = system.membus.cpu_side_ports
|
||||
system.l1d.mem_side = system.membus.cpu_side_ports
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
Options.addCommonOptions(parser)
|
||||
|
||||
@@ -59,29 +91,18 @@ if "--ruby" in sys.argv:
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
numThreads = 1
|
||||
|
||||
if args.cpu_type != "TraceCPU":
|
||||
fatal(
|
||||
"This is a script for elastic trace replay simulation, use "
|
||||
"--cpu-type=TraceCPU\n"
|
||||
)
|
||||
|
||||
if args.num_cpus > 1:
|
||||
fatal("This script does not support multi-processor trace replay.\n")
|
||||
|
||||
# In this case FutureClass will be None as there is not fast forwarding or
|
||||
# switching
|
||||
(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
|
||||
CPUClass.numThreads = numThreads
|
||||
|
||||
system = System(
|
||||
cpu=CPUClass(cpu_id=0),
|
||||
mem_mode=test_mem_mode,
|
||||
mem_mode=TraceCPU.memory_mode(),
|
||||
mem_ranges=[AddrRange(args.mem_size)],
|
||||
cache_line_size=args.cacheline_size,
|
||||
)
|
||||
|
||||
# Generate the TraceCPU
|
||||
system.cpu = TraceCPU()
|
||||
|
||||
# Create a top-level voltage domain
|
||||
system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
|
||||
|
||||
@@ -105,11 +126,6 @@ system.cpu_clk_domain = SrcClockDomain(
|
||||
for cpu in system.cpu:
|
||||
cpu.clk_domain = system.cpu_clk_domain
|
||||
|
||||
# BaseCPU no longer has default values for the BaseCPU.isa
|
||||
# createThreads() is needed to fill in the cpu.isa
|
||||
for cpu in system.cpu:
|
||||
cpu.createThreads()
|
||||
|
||||
# Assign input trace files to the Trace CPU
|
||||
system.cpu.instTraceFile = args.inst_trace_file
|
||||
system.cpu.dataTraceFile = args.data_trace_file
|
||||
@@ -118,8 +134,11 @@ system.cpu.dataTraceFile = args.data_trace_file
|
||||
MemClass = Simulation.setMemClass(args)
|
||||
system.membus = SystemXBar()
|
||||
system.system_port = system.membus.cpu_side_ports
|
||||
CacheConfig.config_cache(args, system)
|
||||
|
||||
# Configure the classic cache hierarchy
|
||||
config_cache(args, system)
|
||||
|
||||
MemConfig.config_mem(args, system)
|
||||
|
||||
root = Root(full_system=False, system=system)
|
||||
Simulation.run(args, root, system, FutureClass)
|
||||
Simulation.run(args, root, system, None)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2013 - 2016 ARM Limited
|
||||
# Copyright (c) 2013 - 2016, 2023 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -34,10 +34,11 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from m5.params import *
|
||||
from m5.objects.BaseCPU import BaseCPU
|
||||
from m5.proxy import *
|
||||
from m5.objects.ClockedObject import ClockedObject
|
||||
|
||||
|
||||
class TraceCPU(BaseCPU):
|
||||
class TraceCPU(ClockedObject):
|
||||
"""Trace CPU model which replays traces generated in a prior simulation
|
||||
using DerivO3CPU or its derived classes. It interfaces with L1 caches.
|
||||
"""
|
||||
@@ -54,13 +55,10 @@ class TraceCPU(BaseCPU):
|
||||
def require_caches(cls):
|
||||
return True
|
||||
|
||||
def addPMU(self, pmu=None):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def support_take_over(cls):
|
||||
return True
|
||||
system = Param.System(Parent.any, "system object")
|
||||
|
||||
icache_port = RequestPort("Instruction Port")
|
||||
dcache_port = RequestPort("Data Port")
|
||||
instTraceFile = Param.String("", "Instruction trace file")
|
||||
dataTraceFile = Param.String("", "Data dependency trace file")
|
||||
sizeStoreBuffer = Param.Unsigned(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 - 2016 ARM Limited
|
||||
* Copyright (c) 2013 - 2016, 2023 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -39,6 +39,7 @@
|
||||
|
||||
#include "base/compiler.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
#include "sim/system.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
@@ -47,7 +48,8 @@ namespace gem5
|
||||
int TraceCPU::numTraceCPUs = 0;
|
||||
|
||||
TraceCPU::TraceCPU(const TraceCPUParams ¶ms)
|
||||
: BaseCPU(params),
|
||||
: ClockedObject(params),
|
||||
cacheLineSize(params.system->cacheLineSize()),
|
||||
icachePort(this),
|
||||
dcachePort(this),
|
||||
instRequestorID(params.system->getRequestorId(this, "inst")),
|
||||
@@ -93,14 +95,6 @@ TraceCPU::updateNumOps(uint64_t rob_num)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TraceCPU::takeOverFrom(BaseCPU *oldCPU)
|
||||
{
|
||||
// Unbind the ports of the old CPU and bind the ports of the TraceCPU.
|
||||
getInstPort().takeOverFrom(&oldCPU->getInstPort());
|
||||
getDataPort().takeOverFrom(&oldCPU->getDataPort());
|
||||
}
|
||||
|
||||
void
|
||||
TraceCPU::init()
|
||||
{
|
||||
@@ -109,7 +103,7 @@ TraceCPU::init()
|
||||
DPRINTF(TraceCPUData, "Data memory request trace file is \"%s\".\n",
|
||||
dataTraceFile);
|
||||
|
||||
BaseCPU::init();
|
||||
ClockedObject::init();
|
||||
|
||||
// Get the send tick of the first instruction read request
|
||||
Tick first_icache_tick = icacheGen.init();
|
||||
@@ -176,7 +170,7 @@ TraceCPU::schedDcacheNext()
|
||||
DPRINTF(TraceCPUData, "DcacheGen event.\n");
|
||||
|
||||
// Update stat for numCycles
|
||||
baseStats.numCycles = clockEdge() / clockPeriod();
|
||||
traceStats.numCycles = clockEdge() / clockPeriod();
|
||||
|
||||
dcacheGen.execute();
|
||||
if (dcacheGen.isExecComplete()) {
|
||||
@@ -216,7 +210,7 @@ TraceCPU::checkAndSchedExitEvent()
|
||||
ADD_STAT(cpi, statistics::units::Rate<
|
||||
statistics::units::Cycle, statistics::units::Count>::get(),
|
||||
"Cycles per micro-op used as a proxy for CPI",
|
||||
trace->baseStats.numCycles / numOps)
|
||||
trace->traceStats.numCycles / numOps)
|
||||
{
|
||||
cpi.precision(6);
|
||||
}
|
||||
@@ -591,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
|
||||
// stat counting this is useful to keep a check on how frequently this
|
||||
// happens. If required the code could be revised to mimick splitting such
|
||||
// a request into two.
|
||||
unsigned blk_size = owner.cacheLineSize();
|
||||
unsigned blk_size = owner.cacheLineSize;
|
||||
Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
|
||||
if (!(blk_offset + node_ptr->size <= blk_size)) {
|
||||
node_ptr->size = blk_size - blk_offset;
|
||||
@@ -1152,6 +1146,20 @@ TraceCPU::schedDcacheNextEvent(Tick when)
|
||||
|
||||
}
|
||||
|
||||
Port &
|
||||
TraceCPU::getPort(const std::string &if_name, PortID idx)
|
||||
{
|
||||
// Get the right port based on name. This applies to all the
|
||||
// subclasses of the base CPU and relies on their implementation
|
||||
// of getDataPort and getInstPort.
|
||||
if (if_name == "dcache_port")
|
||||
return getDataPort();
|
||||
else if (if_name == "icache_port")
|
||||
return getInstPort();
|
||||
else
|
||||
return ClockedObject::getPort(if_name, idx);
|
||||
}
|
||||
|
||||
bool
|
||||
TraceCPU::IcachePort::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 - 2016 ARM Limited
|
||||
* Copyright (c) 2013 - 2016, 2023 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -45,13 +45,16 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "debug/TraceCPUData.hh"
|
||||
#include "debug/TraceCPUInst.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/port.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "params/TraceCPU.hh"
|
||||
#include "proto/inst_dep_record.pb.h"
|
||||
#include "proto/packet.pb.h"
|
||||
#include "proto/protoio.hh"
|
||||
#include "sim/clocked_object.hh"
|
||||
#include "sim/sim_events.hh"
|
||||
|
||||
namespace gem5
|
||||
@@ -66,8 +69,7 @@ namespace gem5
|
||||
* simulation compared to the detailed cpu model and good correlation when the
|
||||
* same trace is used for playback on different memory sub-systems.
|
||||
*
|
||||
* The TraceCPU inherits from BaseCPU so some virtual methods need to be
|
||||
* defined. It has two port subclasses inherited from RequestPort for
|
||||
* The TraceCPU has two port subclasses inherited from RequestPort for
|
||||
* instruction and data ports. It issues the memory requests deducing the
|
||||
* timing from the trace and without performing real execution of micro-ops. As
|
||||
* soon as the last dependency for an instruction is complete, its
|
||||
@@ -139,7 +141,7 @@ namespace gem5
|
||||
* exit.
|
||||
*/
|
||||
|
||||
class TraceCPU : public BaseCPU
|
||||
class TraceCPU : public ClockedObject
|
||||
{
|
||||
|
||||
public:
|
||||
@@ -147,15 +149,6 @@ class TraceCPU : public BaseCPU
|
||||
|
||||
void init();
|
||||
|
||||
/**
|
||||
* This is a pure virtual function in BaseCPU. As we don't know how many
|
||||
* insts are in the trace but only know how how many micro-ops are we
|
||||
* cannot count this stat.
|
||||
*
|
||||
* @return 0
|
||||
*/
|
||||
Counter totalInsts() const { return 0; }
|
||||
|
||||
/**
|
||||
* Return totalOps as the number of committed micro-ops plus the
|
||||
* speculatively issued loads that are modelled in the TraceCPU replay.
|
||||
@@ -170,17 +163,6 @@ class TraceCPU : public BaseCPU
|
||||
*/
|
||||
void updateNumOps(uint64_t rob_num);
|
||||
|
||||
/* Pure virtual function in BaseCPU. Do nothing. */
|
||||
void wakeup(ThreadID tid=0) { return; }
|
||||
|
||||
/*
|
||||
* When resuming from checkpoint in FS mode, the TraceCPU takes over from
|
||||
* the old cpu. This function overrides the takeOverFrom() function in the
|
||||
* BaseCPU. It unbinds the ports of the old CPU and binds the ports of the
|
||||
* TraceCPU.
|
||||
*/
|
||||
void takeOverFrom(BaseCPU *oldCPU);
|
||||
|
||||
/**
|
||||
* When instruction cache port receives a retry, schedule event
|
||||
* icacheNextEvent.
|
||||
@@ -303,6 +285,9 @@ class TraceCPU : public BaseCPU
|
||||
TraceCPU* owner;
|
||||
};
|
||||
|
||||
/** Cache the cache line size that we get from the system */
|
||||
const unsigned int cacheLineSize;
|
||||
|
||||
/** Port to connect to L1 instruction cache. */
|
||||
IcachePort icachePort;
|
||||
|
||||
@@ -1112,6 +1097,8 @@ class TraceCPU : public BaseCPU
|
||||
|
||||
/** Stat for number of simulated micro-ops. */
|
||||
statistics::Scalar numOps;
|
||||
/** Number of CPU cycles simulated */
|
||||
statistics::Scalar numCycles;
|
||||
/** Stat for the CPI. This is really cycles per
|
||||
* micro-op and not inst. */
|
||||
statistics::Formula cpi;
|
||||
@@ -1125,6 +1112,18 @@ class TraceCPU : public BaseCPU
|
||||
/** Used to get a reference to the dcache port. */
|
||||
Port &getDataPort() { return dcachePort; }
|
||||
|
||||
/**
|
||||
* Get a port on this CPU. All CPUs have a data and
|
||||
* instruction port, and this method uses getDataPort and
|
||||
* getInstPort of the subclasses to resolve the two ports.
|
||||
*
|
||||
* @param if_name the port name
|
||||
* @param idx ignored index
|
||||
*
|
||||
* @return a reference to the port with the given name
|
||||
*/
|
||||
Port &getPort(const std::string &if_name,
|
||||
PortID idx=InvalidPortID) override;
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
Reference in New Issue
Block a user